From 3f5e149b7af0b9ec65c1f86272138f510516646c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 5 Feb 2024 11:51:36 +0100 Subject: [PATCH 01/21] Make subsampling use less memory --- .../raft/neighbors/detail/ivf_flat_build.cuh | 5 +-- .../raft/neighbors/detail/ivf_pq_build.cuh | 11 ++++-- cpp/include/raft/random/detail/rng_impl.cuh | 11 ++++++ .../raft/spatial/knn/detail/ann_utils.cuh | 38 ++++++++++++++++++- 4 files changed, 57 insertions(+), 8 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh index ab30b4009d..8bf9842466 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh @@ -365,9 +365,8 @@ inline auto build(raft::resources const& handle, auto trainset_ratio = std::max( 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); auto n_rows_train = n_rows / trainset_ratio; - auto trainset = make_device_matrix(handle, n_rows_train, index.dim()); - raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset.view(), random_seed); + auto trainset = raft::spatial::knn::detail::utils::subsample( + handle, dataset, n_rows, n_rows_train, dim, random_seed); auto centers_view = raft::make_device_matrix_view( index.centers().data_handle(), index.n_lists(), index.dim()); raft::cluster::kmeans_balanced_params kmeans_params; diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 0ef6cb13fb..f4486c6188 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -1730,12 +1730,13 @@ auto build(raft::resources const& handle, // Besides just sampling, we transform the input dataset into floats to make it easier // to use gemm operations from cublas. - auto trainset = - make_device_mdarray(handle, device_mr, make_extents(n_rows_train, dim)); + auto trainset = make_device_mdarray(handle, device_mr, make_extents(0, 0)); if constexpr (std::is_same_v) { - raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset.view(), random_seed); + // raft::spatial::knn::detail::utils::subsample( + // handle, dataset, n_rows, trainset.view(), random_seed); + trainset = raft::spatial::knn::detail::utils::subsample( + handle, dataset, n_rows, n_rows_train, dim, random_seed); } else { // TODO(tfeher): Enable codebook generation with any type T, and then remove // trainset tmp. @@ -1744,6 +1745,8 @@ auto build(raft::resources const& handle, raft::spatial::knn::detail::utils::subsample( handle, dataset, n_rows, trainset_tmp.view(), random_seed); cudaDeviceSynchronize(); + trainset = + make_device_mdarray(handle, device_mr, make_extents(n_rows_train, dim)); raft::linalg::unaryOp(trainset.data_handle(), trainset_tmp.data_handle(), trainset.size(), diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index 57f4c8d33d..ace98e6d3f 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -290,10 +290,21 @@ void sampleWithoutReplacement(RngState& rng_state, { ASSERT(sampledLen <= len, "sampleWithoutReplacement: 'sampledLen' cant be more than 'len'."); + // size_t free, total; + // float GiB = 1073741824.0f; + // cudaMemGetInfo(&free, &total); + // RAFT_LOG_INFO("sampleWithoutReplacement::start free mem %6.1f, used mem %6.1f", + // free / GiB, + // (total - free) / GiB); rmm::device_uvector expWts(len, stream); rmm::device_uvector sortedWts(len, stream); rmm::device_uvector inIdx(len, stream); rmm::device_uvector outIdxBuff(len, stream); + + // cudaMemGetInfo(&free, &total); + // RAFT_LOG_INFO("sampleWithoutReplacement::buffers free mem %6.1f, used mem %6.1f", + // free / GiB, + // (total - free) / GiB); auto* inIdxPtr = inIdx.data(); // generate modified weights SamplingParams params; diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index e55dc82f5d..294c3097ad 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -588,11 +588,22 @@ auto get_subsample_indices(raft::resources const& res, IdxT n_samples, IdxT n_su -> raft::device_vector { RAFT_EXPECTS(n_subsamples <= n_samples, "Cannot have more training samples than dataset vectors"); + // size_t free, total; + // float GiB = 1073741824.0f; + // cudaMemGetInfo(&free, &total); + // RAFT_LOG_INFO( + // "get_subsample_indices::data free mem %6.1f, used mem %6.1f", free / GiB, (total - free) / + // GiB); auto data_indices = raft::make_device_vector(res, n_samples); + // cudaMemGetInfo(&free, &total); + // RAFT_LOG_INFO("get_subsample_indices::train free mem %6.1f, used mem %6.1f", + // free / GiB, + // (total - free) / GiB); + + auto train_indices = raft::make_device_vector(res, n_subsamples); raft::linalg::map_offset(res, data_indices.view(), identity_op()); raft::random::RngState rng(seed); - auto train_indices = raft::make_device_vector(res, n_subsamples); raft::random::sample_without_replacement(res, rng, raft::make_const_mdspan(data_indices.view()), @@ -629,4 +640,29 @@ void subsample(raft::resources const& res, raft::matrix::detail::gather(res, dataset, make_const_mdspan(train_indices.view()), output); } } + +/** Subsample the dataset to create a training set*/ +template +raft::device_matrix subsample( + raft::resources const& res, const T* input, IdxT n_samples, IdxT n_train, IdxT n_dim, int seed) +{ + raft::device_vector train_indices = + get_subsample_indices(res, n_samples, n_train, seed); + + auto output = raft::make_device_matrix(res, n_train, n_dim); + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, input)); + T* ptr = reinterpret_cast(attr.devicePointer); + if (ptr != nullptr) { + raft::matrix::gather(res, + raft::make_device_matrix_view(ptr, n_samples, n_dim), + raft::make_const_mdspan(train_indices.view()), + output.view()); + } else { + auto dataset = raft::make_host_matrix_view(input, n_samples, n_dim); + raft::matrix::detail::gather( + res, dataset, make_const_mdspan(train_indices.view()), output.view()); + } + return output; +} } // namespace raft::spatial::knn::detail::utils From 1d2a68140e98cf31186e5cb70be856490e0716cb Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 5 Feb 2024 12:01:55 +0100 Subject: [PATCH 02/21] Add subsample benchmark --- cpp/bench/prims/random/subsample.cu | 197 ++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) create mode 100644 cpp/bench/prims/random/subsample.cu diff --git a/cpp/bench/prims/random/subsample.cu b/cpp/bench/prims/random/subsample.cu new file mode 100644 index 0000000000..a89b1b1650 --- /dev/null +++ b/cpp/bench/prims/random/subsample.cu @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace raft::bench::random { + +struct sample_inputs { + int n_samples; + int n_train; + int method; +}; // struct sample_inputs + +template +auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) + -> raft::device_vector +{ + RAFT_EXPECTS(n_subsamples <= n_samples, "Cannot have more training samples than dataset vectors"); + auto stream = resource::get_cuda_stream(res); + + auto rnd_idx = + raft::make_device_vector(res, std::min(1.5 * n_subsamples, n_samples)); + auto linear_idx = raft::make_device_vector(res, rnd_idx.size()); + raft::linalg::map_offset(res, linear_idx.view(), identity_op()); + + raft::random::RngState state(137ULL); + raft::random::uniformInt( + res, state, rnd_idx.data_handle(), rnd_idx.size(), IdxT(0), IdxT(n_samples)); + + // Sort indices according to rnd keys + size_t workspace_size = 0; + cub::DeviceMergeSort::SortPairs(nullptr, + workspace_size, + rnd_idx.data_handle(), + linear_idx.data_handle(), + rnd_idx.size(), + raft::less_op{}); + float GiB = 1073741824.0f; + RAFT_LOG_INFO("worksize sort %6.1f GiB", workspace_size / GiB); + auto workspace = raft::make_device_vector(res, workspace_size); + cub::DeviceMergeSort::SortPairs(nullptr, + workspace_size, + rnd_idx.data_handle(), + linear_idx.data_handle(), + rnd_idx.size(), + raft::less_op{}); + + if (rnd_idx.size() == static_cast(n_samples)) { + // We shuffled the linear_idx array by sorting it according to rnd_idx. + // We return the first n_subsamples elements. + if (n_subsamples == n_samples) { return linear_idx; } + rnd_idx = raft::make_device_vector(res, n_subsamples); + raft::copy(rnd_idx.data_handle(), linear_idx.data_handle(), n_subsamples, stream); + return rnd_idx; + } + // Else we do a rejection sampling (or excess sampling): we generated more random indices than + // needed and reject the duplicates. + auto keys_out = raft::make_device_vector(res, rnd_idx.size()); + auto values_out = raft::make_device_vector(res, rnd_idx.size()); + rmm::device_scalar num_selected(stream); + size_t worksize2 = 0; + cub::DeviceSelect::UniqueByKey(nullptr, + worksize2, + rnd_idx.data_handle(), + linear_idx.data_handle(), + keys_out.data_handle(), + values_out.data_handle(), + num_selected.data(), + rnd_idx.size(), + stream); + + RAFT_LOG_INFO("worksize unique %6.1f GiB", worksize2 / GiB); + + if (worksize2 > workspace.size()) { + workspace = raft::make_device_vector(res, worksize2); + } + + cub::DeviceSelect::UniqueByKey(workspace.data_handle(), + worksize2, + rnd_idx.data_handle(), + linear_idx.data_handle(), + keys_out.data_handle(), + values_out.data_handle(), + num_selected.data(), + rnd_idx.size(), + stream); + + IdxT selected = num_selected.value(stream); + + if (selected < n_subsamples) { + RAFT_LOG_WARN("Subsampling returned with less unique indices (%zu) than requested (%zu)", + (size_t)selected, + (size_t)n_subsamples); + + } else { + RAFT_LOG_INFO( + "Subsampling unique indices (%zu) requested (%zu)", (size_t)selected, (size_t)n_subsamples); + } + + // need to shuffle again + cub::DeviceMergeSort::SortPairs(workspace.data_handle(), + worksize2, + linear_idx.data_handle(), + rnd_idx.data_handle(), + n_samples, + raft::less_op{}); + + if (n_subsamples == n_samples) { return linear_idx; } + values_out = raft::make_device_vector(res, n_subsamples); + raft::copy(values_out.data_handle(), rnd_idx.data_handle(), n_subsamples, stream); + return values_out; +} + +template +auto bernoulli_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) + -> raft::device_vector +{ + RAFT_EXPECTS(n_subsamples <= n_samples, "Cannot have more training samples than dataset vectors"); + + auto indices = raft::make_device_vector(res, n_subsamples); + raft::random::RngState state(123456ULL); + raft::random::uniformInt( + res, state, indices.data_handle(), n_subsamples, IdxT(0), IdxT(n_samples)); + return indices; +} + +template +struct sample : public fixture { + sample(const sample_inputs& p) + : params(p), + in(make_device_vector(res, p.n_samples)), + out(make_device_vector(res, p.n_train)) + { + raft::random::RngState r(123456ULL); + } + + void run_benchmark(::benchmark::State& state) override + { + raft::random::RngState r(123456ULL); + loop_on_state(state, [this, &r]() { + if (params.method == 0) { + this->out = raft::spatial::knn::detail::utils::get_subsample_indices( + this->res, this->params.n_samples, this->params.n_train, 137); + } else if (params.method == 1) { + this->out = + bernoulli_subsample(this->res, this->params.n_samples, this->params.n_train, 137); + } else if (params.method == 2) { + this->out = + excess_subsample(this->res, this->params.n_samples, this->params.n_train, 137); + } + // raft::random::permute( + // perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, + // stream); + }); + } + + private: + raft::device_resources res; + sample_inputs params; + raft::device_vector out, in; +}; // struct sample + +const std::vector input_vecs = {{10000000, 1000000, 0}, + {10000000, 10000000, 0}, + {100000000, 10000000, 1}, + {100000000, 100000000, 1}, + {100000000, 10000000, 2}, + {100000000, 50000000, 2}, + {100000000, 100000000, 2}}; + +RAFT_BENCH_REGISTER(sample, "", input_vecs); + +} // namespace raft::bench::random From 4040a9623134b88bfeff4e6507760f8145928f55 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 12 Mar 2024 01:14:32 +0100 Subject: [PATCH 03/21] debug --- cpp/bench/prims/CMakeLists.txt | 172 +++++++++--------- cpp/bench/prims/random/subsample.cu | 65 +++++-- .../raft/spatial/knn/detail/ann_utils.cuh | 4 + 3 files changed, 142 insertions(+), 99 deletions(-) diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index 3a2431cd34..18936317f6 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -74,94 +74,96 @@ function(ConfigureBench) endfunction() if(BUILD_PRIMS_BENCH) - ConfigureBench( - NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/core/copy.cu bench/prims/main.cpp - ) - - ConfigureBench( - NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu - bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu - bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp - ) - - ConfigureBench( - NAME - DISTANCE_BENCH - PATH - bench/prims/distance/distance_cosine.cu - bench/prims/distance/distance_exp_l2.cu - bench/prims/distance/distance_l1.cu - bench/prims/distance/distance_unexp_l2.cu - bench/prims/distance/fused_l2_nn.cu - bench/prims/distance/masked_nn.cu - bench/prims/distance/kernels.cu - bench/prims/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME - LINALG_BENCH - PATH - bench/prims/linalg/add.cu - bench/prims/linalg/map_then_reduce.cu - bench/prims/linalg/matrix_vector_op.cu - bench/prims/linalg/norm.cu - bench/prims/linalg/normalize.cu - bench/prims/linalg/reduce_cols_by_key.cu - bench/prims/linalg/reduce_rows_by_key.cu - bench/prims/linalg/reduce.cu - bench/prims/linalg/sddmm.cu - bench/prims/main.cpp - ) - - ConfigureBench( - NAME - MATRIX_BENCH - PATH - bench/prims/matrix/argmin.cu - bench/prims/matrix/gather.cu - bench/prims/matrix/select_k.cu - bench/prims/matrix/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) + # ConfigureBench( + # NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/core/copy.cu bench/prims/main.cpp + # ) + + # ConfigureBench( + # NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu + # bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureBench( + # NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu + # bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp + # ) + + # ConfigureBench( + # NAME + # DISTANCE_BENCH + # PATH + # bench/prims/distance/distance_cosine.cu + # bench/prims/distance/distance_exp_l2.cu + # bench/prims/distance/distance_l1.cu + # bench/prims/distance/distance_unexp_l2.cu + # bench/prims/distance/fused_l2_nn.cu + # bench/prims/distance/masked_nn.cu + # bench/prims/distance/kernels.cu + # bench/prims/main.cpp + # OPTIONAL + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureBench( + # NAME + # LINALG_BENCH + # PATH + # bench/prims/linalg/add.cu + # bench/prims/linalg/map_then_reduce.cu + # bench/prims/linalg/matrix_vector_op.cu + # bench/prims/linalg/norm.cu + # bench/prims/linalg/normalize.cu + # bench/prims/linalg/reduce_cols_by_key.cu + # bench/prims/linalg/reduce_rows_by_key.cu + # bench/prims/linalg/reduce.cu + # bench/prims/linalg/sddmm.cu + # bench/prims/main.cpp + # ) + + # ConfigureBench( + # NAME + # MATRIX_BENCH + # PATH + # bench/prims/matrix/argmin.cu + # bench/prims/matrix/gather.cu + # bench/prims/matrix/select_k.cu + # bench/prims/matrix/main.cpp + # OPTIONAL + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) ConfigureBench( - NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu bench/prims/random/permute.cu - bench/prims/random/rng.cu bench/prims/main.cpp + NAME RANDOM_BENCH PATH + # bench/prims/random/make_blobs.cu bench/prims/random/permute.cu + # bench/prims/random/rng.cu + bench/prims/random/subsample.cu bench/prims/main.cpp ) - ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) - - ConfigureBench( - NAME - NEIGHBORS_BENCH - PATH - bench/prims/neighbors/knn/brute_force_float_int64_t.cu - bench/prims/neighbors/knn/brute_force_float_uint32_t.cu - bench/prims/neighbors/knn/cagra_float_uint32_t.cu - bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu - bench/prims/neighbors/refine_float_int64_t.cu - bench/prims/neighbors/refine_uint8_t_int64_t.cu - bench/prims/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) + # ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) + + # ConfigureBench( + # NAME + # NEIGHBORS_BENCH + # PATH + # bench/prims/neighbors/knn/brute_force_float_int64_t.cu + # bench/prims/neighbors/knn/brute_force_float_uint32_t.cu + # bench/prims/neighbors/knn/cagra_float_uint32_t.cu + # bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu + # bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu + # bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu + # bench/prims/neighbors/refine_float_int64_t.cu + # bench/prims/neighbors/refine_uint8_t_int64_t.cu + # bench/prims/main.cpp + # OPTIONAL + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) endif() diff --git a/cpp/bench/prims/random/subsample.cu b/cpp/bench/prims/random/subsample.cu index a89b1b1650..03b22db95a 100644 --- a/cpp/bench/prims/random/subsample.cu +++ b/cpp/bench/prims/random/subsample.cu @@ -15,7 +15,7 @@ */ #include -#include + #include #include #include @@ -25,8 +25,11 @@ #include #include #include + #include +#include + namespace raft::bench::random { struct sample_inputs { @@ -42,8 +45,15 @@ auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamp RAFT_EXPECTS(n_subsamples <= n_samples, "Cannot have more training samples than dataset vectors"); auto stream = resource::get_cuda_stream(res); + // number of samples we'll need to sample (with replacement), to expect 'k' + // unique samples from 'n' is given by the following equation: log(1 - k/n)/log(1 - 1/n) ref: + // https://stats.stackexchange.com/questions/296005/the-expected-number-of-unique-elements-drawn-with-replacement + IdxT n_excess_samples = std::ceil(raft::log(1 - double(n_subsamples) / double(n_samples)) / + (raft::log(1 - 1 / double(n_samples)))); auto rnd_idx = - raft::make_device_vector(res, std::min(1.5 * n_subsamples, n_samples)); + raft::make_device_vector(res, std::min(n_excess_samples, n_samples)); + + RAFT_LOG_INFO("We will draw %zu random samples", (size_t)rnd_idx.size()); auto linear_idx = raft::make_device_vector(res, rnd_idx.size()); raft::linalg::map_offset(res, linear_idx.view(), identity_op()); @@ -51,6 +61,9 @@ auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamp raft::random::uniformInt( res, state, rnd_idx.data_handle(), rnd_idx.size(), IdxT(0), IdxT(n_samples)); + if (rnd_idx.size() <= 100) { + print_vector("rnd_idx", rnd_idx.data_handle(), rnd_idx.size(), std::cout); + } // Sort indices according to rnd keys size_t workspace_size = 0; cub::DeviceMergeSort::SortPairs(nullptr, @@ -62,13 +75,19 @@ auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamp float GiB = 1073741824.0f; RAFT_LOG_INFO("worksize sort %6.1f GiB", workspace_size / GiB); auto workspace = raft::make_device_vector(res, workspace_size); - cub::DeviceMergeSort::SortPairs(nullptr, + cub::DeviceMergeSort::SortPairs(workspace.data_handle(), workspace_size, rnd_idx.data_handle(), linear_idx.data_handle(), rnd_idx.size(), raft::less_op{}); + if (rnd_idx.size() <= 100) { + print_vector("rnd _idx sorted", rnd_idx.data_handle(), rnd_idx.size(), std::cout); + } + if (rnd_idx.size() <= 100) { + print_vector("linear_idx sorted", linear_idx.data_handle(), linear_idx.size(), std::cout); + } if (rnd_idx.size() == static_cast(n_samples)) { // We shuffled the linear_idx array by sorting it according to rnd_idx. // We return the first n_subsamples elements. @@ -111,12 +130,18 @@ auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamp IdxT selected = num_selected.value(stream); + if (rnd_idx.size() <= 100) { + print_vector("unique keys (rnd_idx)", keys_out.data_handle(), selected, std::cout); + print_vector("unique vals (linear idx)", values_out.data_handle(), selected, std::cout); + } if (selected < n_subsamples) { RAFT_LOG_WARN("Subsampling returned with less unique indices (%zu) than requested (%zu)", (size_t)selected, (size_t)n_subsamples); } else { + RAFT_LOG_INFO( + "We have %zu unique idices out of %zu samples", (size_t)selected, (size_t)rnd_idx.size()); RAFT_LOG_INFO( "Subsampling unique indices (%zu) requested (%zu)", (size_t)selected, (size_t)n_subsamples); } @@ -124,14 +149,18 @@ auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamp // need to shuffle again cub::DeviceMergeSort::SortPairs(workspace.data_handle(), worksize2, - linear_idx.data_handle(), - rnd_idx.data_handle(), - n_samples, + values_out.data_handle(), + keys_out.data_handle(), + n_subsamples, raft::less_op{}); + if (rnd_idx.size() <= 100) { + print_vector("re sorted keys ", keys_out.data_handle(), selected, std::cout); + print_vector("re sorted vals ", values_out.data_handle(), selected, std::cout); + } if (n_subsamples == n_samples) { return linear_idx; } values_out = raft::make_device_vector(res, n_subsamples); - raft::copy(values_out.data_handle(), rnd_idx.data_handle(), n_subsamples, stream); + raft::copy(values_out.data_handle(), keys_out.data_handle(), n_subsamples, stream); return values_out; } @@ -176,6 +205,9 @@ struct sample : public fixture { // perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, // stream); }); + if (this->params.n_train <= 100) { + print_vector("samples", this->out.data_handle(), this->params.n_train, std::cout); + } } private: @@ -184,13 +216,18 @@ struct sample : public fixture { raft::device_vector out, in; }; // struct sample -const std::vector input_vecs = {{10000000, 1000000, 0}, - {10000000, 10000000, 0}, - {100000000, 10000000, 1}, - {100000000, 100000000, 1}, - {100000000, 10000000, 2}, - {100000000, 50000000, 2}, - {100000000, 100000000, 2}}; +const std::vector input_vecs = { + {100, 20, 2}, {10, 5, 2}, + //{100, 50, 2}, + // {10000000, 1000000, 0}, + // {10000000, 10000000, 0}, + // {100000000, 10000000, 1}, + // {100000000, 100000000, 1}, + // {100000000, 10000000, 2}, + // {100000000, 50000000, 2}, + // {1000, 900, 2} + //{100000000, 100000000, 2} +}; RAFT_BENCH_REGISTER(sample, "", input_vecs); diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index e7e9ec6c08..d7f4651b56 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -18,6 +18,10 @@ #include #include +#include +#include +#include +#include #include #include #include From e09c9f7b5381ba159135fe37e2a50bbb5add2ebf Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 12 Mar 2024 23:39:02 +0100 Subject: [PATCH 04/21] Fix bug --- cpp/bench/prims/random/subsample.cu | 159 +--- cpp/include/raft/random/detail/rng_device.cuh | 1 + cpp/include/raft/random/detail/rng_impl.cuh | 159 ++++ cpp/include/raft/random/rng.cuh | 24 + cpp/test/CMakeLists.txt | 753 +++++++++--------- 5 files changed, 577 insertions(+), 519 deletions(-) diff --git a/cpp/bench/prims/random/subsample.cu b/cpp/bench/prims/random/subsample.cu index 03b22db95a..64a5e32669 100644 --- a/cpp/bench/prims/random/subsample.cu +++ b/cpp/bench/prims/random/subsample.cu @@ -38,132 +38,6 @@ struct sample_inputs { int method; }; // struct sample_inputs -template -auto excess_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) - -> raft::device_vector -{ - RAFT_EXPECTS(n_subsamples <= n_samples, "Cannot have more training samples than dataset vectors"); - auto stream = resource::get_cuda_stream(res); - - // number of samples we'll need to sample (with replacement), to expect 'k' - // unique samples from 'n' is given by the following equation: log(1 - k/n)/log(1 - 1/n) ref: - // https://stats.stackexchange.com/questions/296005/the-expected-number-of-unique-elements-drawn-with-replacement - IdxT n_excess_samples = std::ceil(raft::log(1 - double(n_subsamples) / double(n_samples)) / - (raft::log(1 - 1 / double(n_samples)))); - auto rnd_idx = - raft::make_device_vector(res, std::min(n_excess_samples, n_samples)); - - RAFT_LOG_INFO("We will draw %zu random samples", (size_t)rnd_idx.size()); - auto linear_idx = raft::make_device_vector(res, rnd_idx.size()); - raft::linalg::map_offset(res, linear_idx.view(), identity_op()); - - raft::random::RngState state(137ULL); - raft::random::uniformInt( - res, state, rnd_idx.data_handle(), rnd_idx.size(), IdxT(0), IdxT(n_samples)); - - if (rnd_idx.size() <= 100) { - print_vector("rnd_idx", rnd_idx.data_handle(), rnd_idx.size(), std::cout); - } - // Sort indices according to rnd keys - size_t workspace_size = 0; - cub::DeviceMergeSort::SortPairs(nullptr, - workspace_size, - rnd_idx.data_handle(), - linear_idx.data_handle(), - rnd_idx.size(), - raft::less_op{}); - float GiB = 1073741824.0f; - RAFT_LOG_INFO("worksize sort %6.1f GiB", workspace_size / GiB); - auto workspace = raft::make_device_vector(res, workspace_size); - cub::DeviceMergeSort::SortPairs(workspace.data_handle(), - workspace_size, - rnd_idx.data_handle(), - linear_idx.data_handle(), - rnd_idx.size(), - raft::less_op{}); - - if (rnd_idx.size() <= 100) { - print_vector("rnd _idx sorted", rnd_idx.data_handle(), rnd_idx.size(), std::cout); - } - if (rnd_idx.size() <= 100) { - print_vector("linear_idx sorted", linear_idx.data_handle(), linear_idx.size(), std::cout); - } - if (rnd_idx.size() == static_cast(n_samples)) { - // We shuffled the linear_idx array by sorting it according to rnd_idx. - // We return the first n_subsamples elements. - if (n_subsamples == n_samples) { return linear_idx; } - rnd_idx = raft::make_device_vector(res, n_subsamples); - raft::copy(rnd_idx.data_handle(), linear_idx.data_handle(), n_subsamples, stream); - return rnd_idx; - } - // Else we do a rejection sampling (or excess sampling): we generated more random indices than - // needed and reject the duplicates. - auto keys_out = raft::make_device_vector(res, rnd_idx.size()); - auto values_out = raft::make_device_vector(res, rnd_idx.size()); - rmm::device_scalar num_selected(stream); - size_t worksize2 = 0; - cub::DeviceSelect::UniqueByKey(nullptr, - worksize2, - rnd_idx.data_handle(), - linear_idx.data_handle(), - keys_out.data_handle(), - values_out.data_handle(), - num_selected.data(), - rnd_idx.size(), - stream); - - RAFT_LOG_INFO("worksize unique %6.1f GiB", worksize2 / GiB); - - if (worksize2 > workspace.size()) { - workspace = raft::make_device_vector(res, worksize2); - } - - cub::DeviceSelect::UniqueByKey(workspace.data_handle(), - worksize2, - rnd_idx.data_handle(), - linear_idx.data_handle(), - keys_out.data_handle(), - values_out.data_handle(), - num_selected.data(), - rnd_idx.size(), - stream); - - IdxT selected = num_selected.value(stream); - - if (rnd_idx.size() <= 100) { - print_vector("unique keys (rnd_idx)", keys_out.data_handle(), selected, std::cout); - print_vector("unique vals (linear idx)", values_out.data_handle(), selected, std::cout); - } - if (selected < n_subsamples) { - RAFT_LOG_WARN("Subsampling returned with less unique indices (%zu) than requested (%zu)", - (size_t)selected, - (size_t)n_subsamples); - - } else { - RAFT_LOG_INFO( - "We have %zu unique idices out of %zu samples", (size_t)selected, (size_t)rnd_idx.size()); - RAFT_LOG_INFO( - "Subsampling unique indices (%zu) requested (%zu)", (size_t)selected, (size_t)n_subsamples); - } - - // need to shuffle again - cub::DeviceMergeSort::SortPairs(workspace.data_handle(), - worksize2, - values_out.data_handle(), - keys_out.data_handle(), - n_subsamples, - raft::less_op{}); - - if (rnd_idx.size() <= 100) { - print_vector("re sorted keys ", keys_out.data_handle(), selected, std::cout); - print_vector("re sorted vals ", values_out.data_handle(), selected, std::cout); - } - if (n_subsamples == n_samples) { return linear_idx; } - values_out = raft::make_device_vector(res, n_subsamples); - raft::copy(values_out.data_handle(), keys_out.data_handle(), n_subsamples, stream); - return values_out; -} - template auto bernoulli_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) -> raft::device_vector @@ -198,12 +72,9 @@ struct sample : public fixture { this->out = bernoulli_subsample(this->res, this->params.n_samples, this->params.n_train, 137); } else if (params.method == 2) { - this->out = - excess_subsample(this->res, this->params.n_samples, this->params.n_train, 137); + this->out = raft::random::excess_subsample( + this->res, r, this->params.n_samples, this->params.n_train); } - // raft::random::permute( - // perms.data(), out.data(), in.data(), params.cols, params.rows, params.rowMajor, - // stream); }); if (this->params.n_train <= 100) { print_vector("samples", this->out.data_handle(), this->params.n_train, std::cout); @@ -216,18 +87,20 @@ struct sample : public fixture { raft::device_vector out, in; }; // struct sample -const std::vector input_vecs = { - {100, 20, 2}, {10, 5, 2}, - //{100, 50, 2}, - // {10000000, 1000000, 0}, - // {10000000, 10000000, 0}, - // {100000000, 10000000, 1}, - // {100000000, 100000000, 1}, - // {100000000, 10000000, 2}, - // {100000000, 50000000, 2}, - // {1000, 900, 2} - //{100000000, 100000000, 2} -}; +const std::vector input_vecs = {{100, 20, 2}, + {10, 5, 2}, + {20, 10, 2}, + {20, 15, 2}, + {100, 50, 2}, + {1000, 500, 2}, + {1000, 600, 2}, + {1000, 700, 2}, + {10000, 5000, 2}, + {100000, 50000, 2}, + {100000000, 10000000, 2}, + {100000000, 50000000, 2}, + {1000, 900, 2}, + {100000000, 100000000, 2}}; RAFT_BENCH_REGISTER(sample, "", input_vecs); diff --git a/cpp/include/raft/random/detail/rng_device.cuh b/cpp/include/raft/random/detail/rng_device.cuh index 12c67679ba..5e962fc982 100644 --- a/cpp/include/raft/random/detail/rng_device.cuh +++ b/cpp/include/raft/random/detail/rng_device.cuh @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index ace98e6d3f..08be3f6a98 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -17,12 +17,18 @@ #pragma once #include +#include +#include #include #include #include #include #include +#include + +#include + namespace raft { namespace random { namespace detail { @@ -278,6 +284,7 @@ std::enable_if_t> discrete(RngState& rng_state, len); } +/** Note the memory space requirements are O(4*len) */ template void sampleWithoutReplacement(RngState& rng_state, DataT* out, @@ -339,6 +346,158 @@ void affine_transform_params(RngState const& rng_state, IdxT n, IdxT& a, IdxT& b b = mt_rng() % n; } +/** @brief Sample without replacement from range 0..N-1. + * + * Elements are sampled uniformly. + * The algorithm will allocate a workspace of size O(4*n_samples) internally. + * + * We use max N random numbers. Depending on how large n_samples is w.r.t to N, we + * either use rejection sampling, sort the [0..N-1] values using random keys. + * + * @tparam IdxT type of indices that we sample + * @tparam MatIdxT extent type of the returned mdarray + * + * @param res RAFT resource handle + * @param RngState state random number generator state + * @param N number of elements to sample from. We will sample values in range 0..N-1 + * @param n_samples number of samples to return + * + * @return device mdarray with the random samples + */ +template +auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT n_samples) + -> raft::device_vector +{ + RAFT_EXPECTS(n_samples <= N, "Cannot have more training samples than dataset vectors"); + + // Number of samples we'll need to sample (with replacement), to expect 'k' + // unique samples from 'n' is given by the following equation: log(1 - k/n)/log(1 - 1/n) ref: + // https://stats.stackexchange.com/questions/296005/the-expected-number-of-unique-elements-drawn-with-replacement + IdxT n_excess_samples = + n_samples < N + ? std::ceil(raft::log(1 - double(n_samples) / double(N)) / (raft::log(1 - 1 / double(N)))) + : N; + + // There is a variance of n_excess_samples, we take 10% more elements. + n_excess_samples += std::max(0.1 * n_samples, 100); + + // n_excess_sampless will be larger than N around k = 0.64*N. When we reach N, then instead of + // doing rejection sampling, we simply shuffle the range [0..N-1] using N random numbers. + n_excess_samples = std::min(n_excess_samples, N); + auto rnd_idx = raft::make_device_vector(res, n_excess_samples); + + RAFT_LOG_INFO("We will draw %zu random samples", (size_t)rnd_idx.size()); + auto linear_idx = raft::make_device_vector(res, rnd_idx.size()); + raft::linalg::map_offset(res, linear_idx.view(), identity_op()); + + uniformInt(res, state, rnd_idx.data_handle(), rnd_idx.size(), IdxT(0), IdxT(N)); + + if (rnd_idx.size() <= 100) { + print_vector("rnd_idx", rnd_idx.data_handle(), rnd_idx.size(), std::cout); + } + // Sort indices according to rnd keys + size_t workspace_size = 0; + auto stream = resource::get_cuda_stream(res); + cub::DeviceMergeSort::SortPairs(nullptr, + workspace_size, + rnd_idx.data_handle(), + linear_idx.data_handle(), + rnd_idx.size(), + raft::less_op{}, + stream); + float GiB = 1073741824.0f; + RAFT_LOG_INFO("worksize sort %6.1f GiB", workspace_size / GiB); + auto workspace = raft::make_device_vector(res, workspace_size); + cub::DeviceMergeSort::SortPairs(workspace.data_handle(), + workspace_size, + rnd_idx.data_handle(), + linear_idx.data_handle(), + rnd_idx.size(), + raft::less_op{}, + stream); + + if (rnd_idx.size() <= 100) { + print_vector("rnd _idx sorted", rnd_idx.data_handle(), rnd_idx.size(), std::cout); + } + if (rnd_idx.size() <= 100) { + print_vector("linear_idx sorted", linear_idx.data_handle(), linear_idx.size(), std::cout); + } + if (rnd_idx.size() == static_cast(N)) { + // We shuffled the linear_idx array by sorting it according to rnd_idx. + // We return the first n_samples elements. + if (n_samples == N) { return linear_idx; } + rnd_idx = raft::make_device_vector(res, n_samples); + raft::copy(rnd_idx.data_handle(), linear_idx.data_handle(), n_samples, stream); + return rnd_idx; + } + // Else we do a rejection sampling (or excess sampling): we generated more random indices than + // needed and reject the duplicates. + auto keys_out = raft::make_device_vector(res, rnd_idx.size()); + auto values_out = raft::make_device_vector(res, rnd_idx.size()); + rmm::device_scalar num_selected(stream); + size_t worksize2 = 0; + cub::DeviceSelect::UniqueByKey(nullptr, + worksize2, + rnd_idx.data_handle(), + linear_idx.data_handle(), + keys_out.data_handle(), + values_out.data_handle(), + num_selected.data(), + rnd_idx.size(), + stream); + + RAFT_LOG_INFO("worksize unique %6.1f GiB", worksize2 / GiB); + + if (worksize2 > workspace.size()) { + workspace = raft::make_device_vector(res, worksize2); + workspace_size = workspace.size(); + } + + cub::DeviceSelect::UniqueByKey(workspace.data_handle(), + workspace_size, + rnd_idx.data_handle(), + linear_idx.data_handle(), + keys_out.data_handle(), + values_out.data_handle(), + num_selected.data(), + rnd_idx.size(), + stream); + + IdxT selected = num_selected.value(stream); + + if (rnd_idx.size() <= 100) { + print_vector("unique keys (rnd_idx)", keys_out.data_handle(), selected, std::cout); + print_vector("unique vals (linear idx)", values_out.data_handle(), selected, std::cout); + } + if (selected < n_samples) { + RAFT_LOG_WARN("Subsampling returned with less unique indices (%zu) than requested (%zu)", + (size_t)selected, + (size_t)n_samples); + } + RAFT_LOG_INFO( + "We have %zu unique idices out of %zu samples", (size_t)selected, (size_t)rnd_idx.size()); + RAFT_LOG_INFO( + "Subsampling unique indices (%zu) requested (%zu)", (size_t)selected, (size_t)n_samples); + + // After duplicates are removed, we need to shuffle back to random order + + cub::DeviceMergeSort::SortPairs(workspace.data_handle(), + workspace_size, + values_out.data_handle(), + keys_out.data_handle(), + n_samples, + raft::less_op{}, + stream); + if (rnd_idx.size() <= 100) { + print_vector("re sorted keys ", keys_out.data_handle(), selected, std::cout); + print_vector("re sorted vals ", values_out.data_handle(), selected, std::cout); + } + + values_out = raft::make_device_vector(res, n_samples); + raft::copy(values_out.data_handle(), keys_out.data_handle(), n_samples, stream); + return values_out; +} + }; // end namespace detail }; // end namespace random }; // end namespace raft diff --git a/cpp/include/raft/random/rng.cuh b/cpp/include/raft/random/rng.cuh index 4e63669f98..10d809d3b8 100644 --- a/cpp/include/raft/random/rng.cuh +++ b/cpp/include/raft/random/rng.cuh @@ -813,6 +813,30 @@ void sampleWithoutReplacement(raft::resources const& handle, rng_state, out, outIdx, in, wts, sampledLen, len, resource::get_cuda_stream(handle)); } +/** @brief Sample without replacement from range 0..N-1. + * + * Elements are sampled uniformly. + * The algorithm will allocate a workspace of size O(4*n_samples) internally. + * + * We use max N random numbers. Depending on how large n_samples is w.r.t to N, we + * either use rejection sampling, sort the [0..N-1] values using random keys. + * + * @tparam IdxT type of indices that we sample + * @tparam MatIdxT extent type of the returned mdarray + * + * @param res RAFT resource handle + * @param RngState state random number generator state + * @param N number of elements to sample from. We will sample values in range 0..N-1 + * @param n_samples number of samples to return + * + * @return device mdarray with the random samples + */ +template +auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT n_samples) +{ + return detail::excess_subsample(res, state, N, n_samples); +} + /** * @brief Generates the 'a' and 'b' parameters for a modulo affine * transformation equation: `(ax + b) % n` diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index dd7eb839ab..28ef83af34 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -95,389 +95,390 @@ endfunction() # * distance tests ------------------------------------------------------------------------- if(BUILD_TESTS) - ConfigureTest( - NAME - CLUSTER_TEST - PATH - test/cluster/kmeans.cu - test/cluster/kmeans_balanced.cu - test/cluster/kmeans_find_k.cu - test/cluster/cluster_solvers.cu - test/cluster/linkage.cu - test/cluster/spectral.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - CORE_TEST - PATH - test/core/bitset.cu - test/core/device_resources_manager.cpp - test/core/device_setter.cpp - test/core/logger.cpp - test/core/math_device.cu - test/core/math_host.cpp - test/core/operators_device.cu - test/core/operators_host.cpp - test/core/handle.cpp - test/core/interruptible.cu - test/core/nvtx.cpp - test/core/mdarray.cu - test/core/mdbuffer.cu - test/core/mdspan_copy.cpp - test/core/mdspan_copy.cu - test/core/mdspan_utils.cu - test/core/numpy_serializer.cu - test/core/memory_type.cpp - test/core/sparse_matrix.cu - test/core/sparse_matrix.cpp - test/core/span.cpp - test/core/span.cu - test/core/stream_view.cpp - test/core/temporary_device_buffer.cu - test/test.cpp - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB - EXPLICIT_INSTANTIATE_ONLY NOCUDA - ) - - ConfigureTest( - NAME - DISTANCE_TEST - PATH - test/distance/dist_adj.cu - test/distance/dist_adj_distance_instance.cu - test/distance/dist_canberra.cu - test/distance/dist_correlation.cu - test/distance/dist_cos.cu - test/distance/dist_hamming.cu - test/distance/dist_hellinger.cu - test/distance/dist_inner_product.cu - test/distance/dist_jensen_shannon.cu - test/distance/dist_kl_divergence.cu - test/distance/dist_l1.cu - test/distance/dist_l2_exp.cu - test/distance/dist_l2_unexp.cu - test/distance/dist_l2_sqrt_exp.cu - test/distance/dist_l_inf.cu - test/distance/dist_lp_unexp.cu - test/distance/dist_russell_rao.cu - test/distance/masked_nn.cu - test/distance/masked_nn_compress_to_bits.cu - test/distance/fused_l2_nn.cu - test/distance/gram.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - list( - APPEND - EXT_HEADER_TEST_SOURCES - test/ext_headers/raft_neighbors_brute_force.cu - test/ext_headers/raft_distance_distance.cu - test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - test/ext_headers/raft_matrix_detail_select_k.cu - test/ext_headers/raft_neighbors_ball_cover.cu - test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - test/ext_headers/raft_distance_fused_l2_nn.cu - test/ext_headers/raft_neighbors_ivf_pq.cu - test/ext_headers/raft_util_memory_pool.cpp - test/ext_headers/raft_neighbors_ivf_flat.cu - test/ext_headers/raft_core_logger.cpp - test/ext_headers/raft_neighbors_refine.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - ) - - # Test that the split headers compile in isolation with: - # - # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - EXPLICIT_INSTANTIATE_ONLY - ) - ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - ConfigureTest( - NAME - LINALG_TEST - PATH - test/linalg/add.cu - test/linalg/axpy.cu - test/linalg/binary_op.cu - test/linalg/cholesky_r1.cu - test/linalg/coalesced_reduction.cu - test/linalg/divide.cu - test/linalg/dot.cu - test/linalg/eig.cu - test/linalg/eig_sel.cu - test/linalg/gemm_layout.cu - test/linalg/gemv.cu - test/linalg/map.cu - test/linalg/map_then_reduce.cu - test/linalg/matrix_vector.cu - test/linalg/matrix_vector_op.cu - test/linalg/mean_squared_error.cu - test/linalg/multiply.cu - test/linalg/norm.cu - test/linalg/normalize.cu - test/linalg/power.cu - test/linalg/randomized_svd.cu - test/linalg/reduce.cu - test/linalg/reduce_cols_by_key.cu - test/linalg/reduce_rows_by_key.cu - test/linalg/rsvd.cu - test/linalg/sqrt.cu - test/linalg/strided_reduction.cu - test/linalg/subtract.cu - test/linalg/svd.cu - test/linalg/ternary_op.cu - test/linalg/transpose.cu - test/linalg/unary_op.cu - ) - - ConfigureTest( - NAME - MATRIX_TEST - PATH - test/matrix/argmax.cu - test/matrix/argmin.cu - test/matrix/columnSort.cu - test/matrix/diagonal.cu - test/matrix/gather.cu - test/matrix/scatter.cu - test/matrix/eye.cu - test/matrix/linewise_op.cu - test/matrix/math.cu - test/matrix/matrix.cu - test/matrix/norm.cu - test/matrix/reverse.cu - test/matrix/slice.cu - test/matrix/triangular.cu - test/sparse/spectral_matrix.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - - ConfigureTest( - NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) + # ConfigureTest( + # NAME + # CLUSTER_TEST + # PATH + # test/cluster/kmeans.cu + # test/cluster/kmeans_balanced.cu + # test/cluster/kmeans_find_k.cu + # test/cluster/cluster_solvers.cu + # test/cluster/linkage.cu + # test/cluster/spectral.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # CORE_TEST + # PATH + # test/core/bitset.cu + # test/core/device_resources_manager.cpp + # test/core/device_setter.cpp + # test/core/logger.cpp + # test/core/math_device.cu + # test/core/math_host.cpp + # test/core/operators_device.cu + # test/core/operators_host.cpp + # test/core/handle.cpp + # test/core/interruptible.cu + # test/core/nvtx.cpp + # test/core/mdarray.cu + # test/core/mdbuffer.cu + # test/core/mdspan_copy.cpp + # test/core/mdspan_copy.cu + # test/core/mdspan_utils.cu + # test/core/numpy_serializer.cu + # test/core/memory_type.cpp + # test/core/sparse_matrix.cu + # test/core/sparse_matrix.cpp + # test/core/span.cpp + # test/core/span.cu + # test/core/stream_view.cpp + # test/core/temporary_device_buffer.cu + # test/test.cpp + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB + # EXPLICIT_INSTANTIATE_ONLY NOCUDA + # ) + + # ConfigureTest( + # NAME + # DISTANCE_TEST + # PATH + # test/distance/dist_adj.cu + # test/distance/dist_adj_distance_instance.cu + # test/distance/dist_canberra.cu + # test/distance/dist_correlation.cu + # test/distance/dist_cos.cu + # test/distance/dist_hamming.cu + # test/distance/dist_hellinger.cu + # test/distance/dist_inner_product.cu + # test/distance/dist_jensen_shannon.cu + # test/distance/dist_kl_divergence.cu + # test/distance/dist_l1.cu + # test/distance/dist_l2_exp.cu + # test/distance/dist_l2_unexp.cu + # test/distance/dist_l2_sqrt_exp.cu + # test/distance/dist_l_inf.cu + # test/distance/dist_lp_unexp.cu + # test/distance/dist_russell_rao.cu + # test/distance/masked_nn.cu + # test/distance/masked_nn_compress_to_bits.cu + # test/distance/fused_l2_nn.cu + # test/distance/gram.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # list( + # APPEND + # EXT_HEADER_TEST_SOURCES + # test/ext_headers/raft_neighbors_brute_force.cu + # test/ext_headers/raft_distance_distance.cu + # test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu + # test/ext_headers/raft_matrix_detail_select_k.cu + # test/ext_headers/raft_neighbors_ball_cover.cu + # test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu + # test/ext_headers/raft_distance_fused_l2_nn.cu + # test/ext_headers/raft_neighbors_ivf_pq.cu + # test/ext_headers/raft_util_memory_pool.cpp + # test/ext_headers/raft_neighbors_ivf_flat.cu + # test/ext_headers/raft_core_logger.cpp + # test/ext_headers/raft_neighbors_refine.cu + # test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu + # test/ext_headers/raft_linalg_detail_coalesced_reduction.cu + # test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu + # test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu + # test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu + # ) + + # # Test that the split headers compile in isolation with: + # # + # # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined + # # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined + # # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. + # ConfigureTest( + # NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + # ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) + # ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + + # ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) + + # ConfigureTest( + # NAME + # LINALG_TEST + # PATH + # test/linalg/add.cu + # test/linalg/axpy.cu + # test/linalg/binary_op.cu + # test/linalg/cholesky_r1.cu + # test/linalg/coalesced_reduction.cu + # test/linalg/divide.cu + # test/linalg/dot.cu + # test/linalg/eig.cu + # test/linalg/eig_sel.cu + # test/linalg/gemm_layout.cu + # test/linalg/gemv.cu + # test/linalg/map.cu + # test/linalg/map_then_reduce.cu + # test/linalg/matrix_vector.cu + # test/linalg/matrix_vector_op.cu + # test/linalg/mean_squared_error.cu + # test/linalg/multiply.cu + # test/linalg/norm.cu + # test/linalg/normalize.cu + # test/linalg/power.cu + # test/linalg/randomized_svd.cu + # test/linalg/reduce.cu + # test/linalg/reduce_cols_by_key.cu + # test/linalg/reduce_rows_by_key.cu + # test/linalg/rsvd.cu + # test/linalg/sqrt.cu + # test/linalg/strided_reduction.cu + # test/linalg/subtract.cu + # test/linalg/svd.cu + # test/linalg/ternary_op.cu + # test/linalg/transpose.cu + # test/linalg/unary_op.cu + # ) + + # ConfigureTest( + # NAME + # MATRIX_TEST + # PATH + # test/matrix/argmax.cu + # test/matrix/argmin.cu + # test/matrix/columnSort.cu + # test/matrix/diagonal.cu + # test/matrix/gather.cu + # test/matrix/scatter.cu + # test/matrix/eye.cu + # test/matrix/linewise_op.cu + # test/matrix/math.cu + # test/matrix/matrix.cu + # test/matrix/norm.cu + # test/matrix/reverse.cu + # test/matrix/slice.cu + # test/matrix/triangular.cu + # test/sparse/spectral_matrix.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + + # ConfigureTest( + # NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) ConfigureTest( NAME RANDOM_TEST PATH - test/random/make_blobs.cu - test/random/make_regression.cu - test/random/multi_variable_gaussian.cu - test/random/rng_pcg_host_api.cu - test/random/permute.cu - test/random/rng.cu - test/random/rng_discrete.cu - test/random/rng_int.cu - test/random/rmat_rectangular_generator.cu - test/random/sample_without_replacement.cu - ) - - ConfigureTest( - NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + # test/random/make_blobs.cu + # test/random/make_regression.cu + # test/random/multi_variable_gaussian.cu + # test/random/rng_pcg_host_api.cu + # test/random/permute.cu + # test/random/rng.cu + # test/random/rng_discrete.cu + # test/random/rng_int.cu + # test/random/rmat_rectangular_generator.cu + # test/random/sample_without_replacement.cu + test/random/excess_sampling.cu ) - ConfigureTest( - NAME - SPARSE_TEST - PATH - test/sparse/add.cu - test/sparse/convert_coo.cu - test/sparse/convert_csr.cu - test/sparse/csr_row_slice.cu - test/sparse/csr_to_dense.cu - test/sparse/csr_transpose.cu - test/sparse/degree.cu - test/sparse/filter.cu - test/sparse/norm.cu - test/sparse/normalize.cu - test/sparse/reduce.cu - test/sparse/row_op.cu - test/sparse/sddmm.cu - test/sparse/sort.cu - test/sparse/spgemmi.cu - test/sparse/spmm.cu - test/sparse/symmetrize.cu - ) - - ConfigureTest( - NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_NEIGHBORS_TEST - PATH - test/sparse/neighbors/cross_component_nn.cu - test/sparse/neighbors/brute_force.cu - test/sparse/neighbors/knn_graph.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - NEIGHBORS_TEST - PATH - test/neighbors/knn.cu - test/neighbors/fused_l2_knn.cu - test/neighbors/tiled_knn.cu - test/neighbors/haversine.cu - test/neighbors/ball_cover.cu - test/neighbors/epsilon_neighborhood.cu - test/neighbors/refine.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB - EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_CAGRA_TEST - PATH - test/neighbors/ann_cagra/test_float_uint32_t.cu - test/neighbors/ann_cagra/test_half_uint32_t.cu - test/neighbors/ann_cagra/test_int8_t_uint32_t.cu - test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu - test/neighbors/ann_cagra/test_float_int64_t.cu - test/neighbors/ann_cagra/test_half_int64_t.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_IVF_TEST - PATH - test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu - test/neighbors/ann_ivf_flat/test_float_int64_t.cu - test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_float_uint32_t.cu - test/neighbors/ann_ivf_pq/test_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_NN_DESCENT_TEST - PATH - test/neighbors/ann_nn_descent/test_float_uint32_t.cu - test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu - test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - STATS_TEST - PATH - test/stats/accuracy.cu - test/stats/adjusted_rand_index.cu - test/stats/completeness_score.cu - test/stats/contingencyMatrix.cu - test/stats/cov.cu - test/stats/dispersion.cu - test/stats/entropy.cu - test/stats/histogram.cu - test/stats/homogeneity_score.cu - test/stats/information_criterion.cu - test/stats/kl_divergence.cu - test/stats/mean.cu - test/stats/meanvar.cu - test/stats/mean_center.cu - test/stats/minmax.cu - test/stats/mutual_info_score.cu - test/stats/neighborhood_recall.cu - test/stats/r2_score.cu - test/stats/rand_index.cu - test/stats/regression_metrics.cu - test/stats/silhouette_score.cu - test/stats/stddev.cu - test/stats/sum.cu - test/stats/trustworthiness.cu - test/stats/weighted_mean.cu - test/stats/v_measure.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - UTILS_TEST - PATH - test/core/seive.cu - test/util/bitonic_sort.cu - test/util/cudart_utils.cpp - test/util/device_atomics.cu - test/util/integer_utils.cpp - test/util/integer_utils.cu - test/util/memory_type_dispatcher.cu - test/util/pow2_utils.cu - test/util/reduction.cu - ) + # ConfigureTest( + # NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + # test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # SPARSE_TEST + # PATH + # test/sparse/add.cu + # test/sparse/convert_coo.cu + # test/sparse/convert_csr.cu + # test/sparse/csr_row_slice.cu + # test/sparse/csr_to_dense.cu + # test/sparse/csr_transpose.cu + # test/sparse/degree.cu + # test/sparse/filter.cu + # test/sparse/norm.cu + # test/sparse/normalize.cu + # test/sparse/reduce.cu + # test/sparse/row_op.cu + # test/sparse/sddmm.cu + # test/sparse/sort.cu + # test/sparse/spgemmi.cu + # test/sparse/spmm.cu + # test/sparse/symmetrize.cu + # ) + + # ConfigureTest( + # NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu + # test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # SPARSE_NEIGHBORS_TEST + # PATH + # test/sparse/neighbors/cross_component_nn.cu + # test/sparse/neighbors/brute_force.cu + # test/sparse/neighbors/knn_graph.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_TEST + # PATH + # test/neighbors/knn.cu + # test/neighbors/fused_l2_knn.cu + # test/neighbors/tiled_knn.cu + # test/neighbors/haversine.cu + # test/neighbors/ball_cover.cu + # test/neighbors/epsilon_neighborhood.cu + # test/neighbors/refine.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB + # EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_CAGRA_TEST + # PATH + # test/neighbors/ann_cagra/test_float_uint32_t.cu + # test/neighbors/ann_cagra/test_half_uint32_t.cu + # test/neighbors/ann_cagra/test_int8_t_uint32_t.cu + # test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu + # test/neighbors/ann_cagra/test_float_int64_t.cu + # test/neighbors/ann_cagra/test_half_int64_t.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_IVF_TEST + # PATH + # test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu + # test/neighbors/ann_ivf_flat/test_float_int64_t.cu + # test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu + # test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_float_uint32_t.cu + # test/neighbors/ann_ivf_pq/test_float_int64_t.cu + # test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu + # test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_NN_DESCENT_TEST + # PATH + # test/neighbors/ann_nn_descent/test_float_uint32_t.cu + # test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu + # test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME + # STATS_TEST + # PATH + # test/stats/accuracy.cu + # test/stats/adjusted_rand_index.cu + # test/stats/completeness_score.cu + # test/stats/contingencyMatrix.cu + # test/stats/cov.cu + # test/stats/dispersion.cu + # test/stats/entropy.cu + # test/stats/histogram.cu + # test/stats/homogeneity_score.cu + # test/stats/information_criterion.cu + # test/stats/kl_divergence.cu + # test/stats/mean.cu + # test/stats/meanvar.cu + # test/stats/mean_center.cu + # test/stats/minmax.cu + # test/stats/mutual_info_score.cu + # test/stats/neighborhood_recall.cu + # test/stats/r2_score.cu + # test/stats/rand_index.cu + # test/stats/regression_metrics.cu + # test/stats/silhouette_score.cu + # test/stats/stddev.cu + # test/stats/sum.cu + # test/stats/trustworthiness.cu + # test/stats/weighted_mean.cu + # test/stats/v_measure.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # UTILS_TEST + # PATH + # test/core/seive.cu + # test/util/bitonic_sort.cu + # test/util/cudart_utils.cpp + # test/util/device_atomics.cu + # test/util/integer_utils.cpp + # test/util/integer_utils.cu + # test/util/memory_type_dispatcher.cu + # test/util/pow2_utils.cu + # test/util/reduction.cu + # ) endif() # ################################################################################################## From a6f9083abf1bed20e1f38b20dbfe65b7dc395bcd Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 12 Mar 2024 23:39:29 +0100 Subject: [PATCH 05/21] add tests --- cpp/test/random/excess_sampling.cu | 113 +++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 cpp/test/random/excess_sampling.cu diff --git a/cpp/test/random/excess_sampling.cu b/cpp/test/random/excess_sampling.cu new file mode 100644 index 0000000000..fec515900e --- /dev/null +++ b/cpp/test/random/excess_sampling.cu @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace raft { +namespace random { + +using namespace raft::random; + +struct inputs { + int N; + int n_samples; +}; + +template +::std::ostream& operator<<(::std::ostream& os, const inputs p) +{ + os << p.N << "/" << p.n_samples; + return os; +} + +template +class ExcessSamplingTest : public ::testing::TestWithParam { + public: + ExcessSamplingTest() + : params(::testing::TestWithParam::GetParam()), + state{137ULL}, + in(make_device_vector(res, params.n_samples)), + out(make_device_vector(res, 0)), + h_out(make_host_vector(res, params.n_samples)) + + { + } + + void check() + { + out = raft::random::excess_subsample(res, state, params.N, params.n_samples); + ASSERT_TRUE(out.extent(0) == params.n_samples); + raft::copy(h_out.data_handle(), out.data_handle(), out.size(), stream); + + resource::sync_stream(res, stream); + std::unordered_set occurrence; + size_t sum = 0; + for (int i = 0; i < params.n_samples; ++i) { + T val = h_out(i); + sum += val; + ASSERT_TRUE(0 <= val && val < params.N) + << "out-of-range index @i=" << i << " val=" << val << " n_samples=" << params.n_samples; + ASSERT_TRUE(occurrence.find(val) == occurrence.end()) + << "repeated index @i=" << i << " idx=" << val; + occurrence.insert(val); + } + float avg = sum / (float)params.n_samples; + std::cout << "samples " << params.n_samples << ", average" << avg << std::endl; + if (params.n_samples >= 100) { + ASSERT_TRUE(raft::match(avg, params.N / 2.0, raft::CompareApprox(0.1))); + } + } + + protected: + inputs params; + raft::resources res; + cudaStream_t stream; + RngState state; + device_vector out, in; + host_vector h_out; +}; + +const std::vector input1 = {{1, 0}, + {1, 1}, + {10, 0}, + {10, 1}, + {10, 2}, + {10, 10}, + {200, 0}, + {200, 1}, + {200, 100}, + {200, 130}, + {200, 200}}; + +using ExcessSamplingTestInt64 = ExcessSamplingTest; +TEST_P(ExcessSamplingTestInt64, SamplingTest) { check(); } +INSTANTIATE_TEST_SUITE_P(ExcessSamplingTests, ExcessSamplingTestInt64, ::testing::ValuesIn(input1)); + +} // namespace random +} // namespace raft From 941e165be4f0992b7c0e82ae4aecc3489dd89228 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 13 Mar 2024 01:16:39 +0100 Subject: [PATCH 06/21] cleanup --- cpp/bench/prims/CMakeLists.txt | 172 +++-- cpp/bench/prims/random/subsample.cu | 25 +- cpp/include/raft/random/detail/rng_impl.cuh | 46 +- cpp/test/CMakeLists.txt | 752 ++++++++++---------- cpp/test/random/excess_sampling.cu | 19 +- 5 files changed, 482 insertions(+), 532 deletions(-) diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index 18936317f6..02b94cc4ab 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -74,96 +74,94 @@ function(ConfigureBench) endfunction() if(BUILD_PRIMS_BENCH) - # ConfigureBench( - # NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/core/copy.cu bench/prims/main.cpp - # ) - - # ConfigureBench( - # NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu - # bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureBench( - # NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu - # bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp - # ) - - # ConfigureBench( - # NAME - # DISTANCE_BENCH - # PATH - # bench/prims/distance/distance_cosine.cu - # bench/prims/distance/distance_exp_l2.cu - # bench/prims/distance/distance_l1.cu - # bench/prims/distance/distance_unexp_l2.cu - # bench/prims/distance/fused_l2_nn.cu - # bench/prims/distance/masked_nn.cu - # bench/prims/distance/kernels.cu - # bench/prims/main.cpp - # OPTIONAL - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureBench( - # NAME - # LINALG_BENCH - # PATH - # bench/prims/linalg/add.cu - # bench/prims/linalg/map_then_reduce.cu - # bench/prims/linalg/matrix_vector_op.cu - # bench/prims/linalg/norm.cu - # bench/prims/linalg/normalize.cu - # bench/prims/linalg/reduce_cols_by_key.cu - # bench/prims/linalg/reduce_rows_by_key.cu - # bench/prims/linalg/reduce.cu - # bench/prims/linalg/sddmm.cu - # bench/prims/main.cpp - # ) - - # ConfigureBench( - # NAME - # MATRIX_BENCH - # PATH - # bench/prims/matrix/argmin.cu - # bench/prims/matrix/gather.cu - # bench/prims/matrix/select_k.cu - # bench/prims/matrix/main.cpp - # OPTIONAL - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) + ConfigureBench( + NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/core/copy.cu bench/prims/main.cpp + ) + + ConfigureBench( + NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu + bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureBench( + NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu + bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp + ) + + ConfigureBench( + NAME + DISTANCE_BENCH + PATH + bench/prims/distance/distance_cosine.cu + bench/prims/distance/distance_exp_l2.cu + bench/prims/distance/distance_l1.cu + bench/prims/distance/distance_unexp_l2.cu + bench/prims/distance/fused_l2_nn.cu + bench/prims/distance/masked_nn.cu + bench/prims/distance/kernels.cu + bench/prims/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureBench( + NAME + LINALG_BENCH + PATH + bench/prims/linalg/add.cu + bench/prims/linalg/map_then_reduce.cu + bench/prims/linalg/matrix_vector_op.cu + bench/prims/linalg/norm.cu + bench/prims/linalg/normalize.cu + bench/prims/linalg/reduce_cols_by_key.cu + bench/prims/linalg/reduce_rows_by_key.cu + bench/prims/linalg/reduce.cu + bench/prims/linalg/sddmm.cu + bench/prims/main.cpp + ) + + ConfigureBench( + NAME + MATRIX_BENCH + PATH + bench/prims/matrix/argmin.cu + bench/prims/matrix/gather.cu + bench/prims/matrix/select_k.cu + bench/prims/matrix/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) ConfigureBench( - NAME RANDOM_BENCH PATH - # bench/prims/random/make_blobs.cu bench/prims/random/permute.cu - # bench/prims/random/rng.cu - bench/prims/random/subsample.cu bench/prims/main.cpp + NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu bench/prims/random/permute.cu + bench/prims/random/rng.cu bench/prims/random/subsample.cu bench/prims/main.cpp ) - # ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) - - # ConfigureBench( - # NAME - # NEIGHBORS_BENCH - # PATH - # bench/prims/neighbors/knn/brute_force_float_int64_t.cu - # bench/prims/neighbors/knn/brute_force_float_uint32_t.cu - # bench/prims/neighbors/knn/cagra_float_uint32_t.cu - # bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu - # bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu - # bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu - # bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu - # bench/prims/neighbors/refine_float_int64_t.cu - # bench/prims/neighbors/refine_uint8_t_int64_t.cu - # bench/prims/main.cpp - # OPTIONAL - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) + ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) + + ConfigureBench( + NAME + NEIGHBORS_BENCH + PATH + bench/prims/neighbors/knn/brute_force_float_int64_t.cu + bench/prims/neighbors/knn/brute_force_float_uint32_t.cu + bench/prims/neighbors/knn/cagra_float_uint32_t.cu + bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu + bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu + bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu + bench/prims/neighbors/refine_float_int64_t.cu + bench/prims/neighbors/refine_uint8_t_int64_t.cu + bench/prims/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) endif() diff --git a/cpp/bench/prims/random/subsample.cu b/cpp/bench/prims/random/subsample.cu index 64a5e32669..1c384f9a03 100644 --- a/cpp/bench/prims/random/subsample.cu +++ b/cpp/bench/prims/random/subsample.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,7 @@ struct sample_inputs { int method; }; // struct sample_inputs +// Sample with replacement. We use this as a baseline. template auto bernoulli_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) -> raft::device_vector @@ -65,10 +66,7 @@ struct sample : public fixture { { raft::random::RngState r(123456ULL); loop_on_state(state, [this, &r]() { - if (params.method == 0) { - this->out = raft::spatial::knn::detail::utils::get_subsample_indices( - this->res, this->params.n_samples, this->params.n_train, 137); - } else if (params.method == 1) { + if (params.method == 1) { this->out = bernoulli_subsample(this->res, this->params.n_samples, this->params.n_train, 137); } else if (params.method == 2) { @@ -76,9 +74,6 @@ struct sample : public fixture { this->res, r, this->params.n_samples, this->params.n_train); } }); - if (this->params.n_train <= 100) { - print_vector("samples", this->out.data_handle(), this->params.n_train, std::cout); - } } private: @@ -87,19 +82,11 @@ struct sample : public fixture { raft::device_vector out, in; }; // struct sample -const std::vector input_vecs = {{100, 20, 2}, - {10, 5, 2}, - {20, 10, 2}, - {20, 15, 2}, - {100, 50, 2}, - {1000, 500, 2}, - {1000, 600, 2}, - {1000, 700, 2}, - {10000, 5000, 2}, - {100000, 50000, 2}, +const std::vector input_vecs = {{100000000, 10000000, 1}, + {100000000, 50000000, 1}, + {100000000, 100000000, 1}, {100000000, 10000000, 2}, {100000000, 50000000, 2}, - {1000, 900, 2}, {100000000, 100000000, 2}}; RAFT_BENCH_REGISTER(sample, "", input_vecs); diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index 08be3f6a98..08a57e17c0 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -297,21 +297,10 @@ void sampleWithoutReplacement(RngState& rng_state, { ASSERT(sampledLen <= len, "sampleWithoutReplacement: 'sampledLen' cant be more than 'len'."); - // size_t free, total; - // float GiB = 1073741824.0f; - // cudaMemGetInfo(&free, &total); - // RAFT_LOG_INFO("sampleWithoutReplacement::start free mem %6.1f, used mem %6.1f", - // free / GiB, - // (total - free) / GiB); rmm::device_uvector expWts(len, stream); rmm::device_uvector sortedWts(len, stream); rmm::device_uvector inIdx(len, stream); rmm::device_uvector outIdxBuff(len, stream); - - // cudaMemGetInfo(&free, &total); - // RAFT_LOG_INFO("sampleWithoutReplacement::buffers free mem %6.1f, used mem %6.1f", - // free / GiB, - // (total - free) / GiB); auto* inIdxPtr = inIdx.data(); // generate modified weights SamplingParams params; @@ -386,15 +375,11 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT n_excess_samples = std::min(n_excess_samples, N); auto rnd_idx = raft::make_device_vector(res, n_excess_samples); - RAFT_LOG_INFO("We will draw %zu random samples", (size_t)rnd_idx.size()); auto linear_idx = raft::make_device_vector(res, rnd_idx.size()); raft::linalg::map_offset(res, linear_idx.view(), identity_op()); uniformInt(res, state, rnd_idx.data_handle(), rnd_idx.size(), IdxT(0), IdxT(N)); - if (rnd_idx.size() <= 100) { - print_vector("rnd_idx", rnd_idx.data_handle(), rnd_idx.size(), std::cout); - } // Sort indices according to rnd keys size_t workspace_size = 0; auto stream = resource::get_cuda_stream(res); @@ -405,8 +390,6 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT rnd_idx.size(), raft::less_op{}, stream); - float GiB = 1073741824.0f; - RAFT_LOG_INFO("worksize sort %6.1f GiB", workspace_size / GiB); auto workspace = raft::make_device_vector(res, workspace_size); cub::DeviceMergeSort::SortPairs(workspace.data_handle(), workspace_size, @@ -416,12 +399,6 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT raft::less_op{}, stream); - if (rnd_idx.size() <= 100) { - print_vector("rnd _idx sorted", rnd_idx.data_handle(), rnd_idx.size(), std::cout); - } - if (rnd_idx.size() <= 100) { - print_vector("linear_idx sorted", linear_idx.data_handle(), linear_idx.size(), std::cout); - } if (rnd_idx.size() == static_cast(N)) { // We shuffled the linear_idx array by sorting it according to rnd_idx. // We return the first n_samples elements. @@ -446,8 +423,6 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT rnd_idx.size(), stream); - RAFT_LOG_INFO("worksize unique %6.1f GiB", worksize2 / GiB); - if (worksize2 > workspace.size()) { workspace = raft::make_device_vector(res, worksize2); workspace_size = workspace.size(); @@ -465,22 +440,13 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT IdxT selected = num_selected.value(stream); - if (rnd_idx.size() <= 100) { - print_vector("unique keys (rnd_idx)", keys_out.data_handle(), selected, std::cout); - print_vector("unique vals (linear idx)", values_out.data_handle(), selected, std::cout); - } if (selected < n_samples) { - RAFT_LOG_WARN("Subsampling returned with less unique indices (%zu) than requested (%zu)", - (size_t)selected, - (size_t)n_samples); + RAFT_LOG_DEBUG("Subsampling returned with less unique indices (%zu) than requested (%zu)", + (size_t)selected, + (size_t)n_samples); } - RAFT_LOG_INFO( - "We have %zu unique idices out of %zu samples", (size_t)selected, (size_t)rnd_idx.size()); - RAFT_LOG_INFO( - "Subsampling unique indices (%zu) requested (%zu)", (size_t)selected, (size_t)n_samples); // After duplicates are removed, we need to shuffle back to random order - cub::DeviceMergeSort::SortPairs(workspace.data_handle(), workspace_size, values_out.data_handle(), @@ -488,10 +454,6 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT n_samples, raft::less_op{}, stream); - if (rnd_idx.size() <= 100) { - print_vector("re sorted keys ", keys_out.data_handle(), selected, std::cout); - print_vector("re sorted vals ", values_out.data_handle(), selected, std::cout); - } values_out = raft::make_device_vector(res, n_samples); raft::copy(values_out.data_handle(), keys_out.data_handle(), n_samples, stream); diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 28ef83af34..037f85698c 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -95,390 +95,390 @@ endfunction() # * distance tests ------------------------------------------------------------------------- if(BUILD_TESTS) - # ConfigureTest( - # NAME - # CLUSTER_TEST - # PATH - # test/cluster/kmeans.cu - # test/cluster/kmeans_balanced.cu - # test/cluster/kmeans_find_k.cu - # test/cluster/cluster_solvers.cu - # test/cluster/linkage.cu - # test/cluster/spectral.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # CORE_TEST - # PATH - # test/core/bitset.cu - # test/core/device_resources_manager.cpp - # test/core/device_setter.cpp - # test/core/logger.cpp - # test/core/math_device.cu - # test/core/math_host.cpp - # test/core/operators_device.cu - # test/core/operators_host.cpp - # test/core/handle.cpp - # test/core/interruptible.cu - # test/core/nvtx.cpp - # test/core/mdarray.cu - # test/core/mdbuffer.cu - # test/core/mdspan_copy.cpp - # test/core/mdspan_copy.cu - # test/core/mdspan_utils.cu - # test/core/numpy_serializer.cu - # test/core/memory_type.cpp - # test/core/sparse_matrix.cu - # test/core/sparse_matrix.cpp - # test/core/span.cpp - # test/core/span.cu - # test/core/stream_view.cpp - # test/core/temporary_device_buffer.cu - # test/test.cpp - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB - # EXPLICIT_INSTANTIATE_ONLY NOCUDA - # ) - - # ConfigureTest( - # NAME - # DISTANCE_TEST - # PATH - # test/distance/dist_adj.cu - # test/distance/dist_adj_distance_instance.cu - # test/distance/dist_canberra.cu - # test/distance/dist_correlation.cu - # test/distance/dist_cos.cu - # test/distance/dist_hamming.cu - # test/distance/dist_hellinger.cu - # test/distance/dist_inner_product.cu - # test/distance/dist_jensen_shannon.cu - # test/distance/dist_kl_divergence.cu - # test/distance/dist_l1.cu - # test/distance/dist_l2_exp.cu - # test/distance/dist_l2_unexp.cu - # test/distance/dist_l2_sqrt_exp.cu - # test/distance/dist_l_inf.cu - # test/distance/dist_lp_unexp.cu - # test/distance/dist_russell_rao.cu - # test/distance/masked_nn.cu - # test/distance/masked_nn_compress_to_bits.cu - # test/distance/fused_l2_nn.cu - # test/distance/gram.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # list( - # APPEND - # EXT_HEADER_TEST_SOURCES - # test/ext_headers/raft_neighbors_brute_force.cu - # test/ext_headers/raft_distance_distance.cu - # test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - # test/ext_headers/raft_matrix_detail_select_k.cu - # test/ext_headers/raft_neighbors_ball_cover.cu - # test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - # test/ext_headers/raft_distance_fused_l2_nn.cu - # test/ext_headers/raft_neighbors_ivf_pq.cu - # test/ext_headers/raft_util_memory_pool.cpp - # test/ext_headers/raft_neighbors_ivf_flat.cu - # test/ext_headers/raft_core_logger.cpp - # test/ext_headers/raft_neighbors_refine.cu - # test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - # test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - # test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - # test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - # test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - # ) - - # # Test that the split headers compile in isolation with: - # # - # # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - # ConfigureTest( - # NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - # ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - # ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - # ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - # ConfigureTest( - # NAME - # LINALG_TEST - # PATH - # test/linalg/add.cu - # test/linalg/axpy.cu - # test/linalg/binary_op.cu - # test/linalg/cholesky_r1.cu - # test/linalg/coalesced_reduction.cu - # test/linalg/divide.cu - # test/linalg/dot.cu - # test/linalg/eig.cu - # test/linalg/eig_sel.cu - # test/linalg/gemm_layout.cu - # test/linalg/gemv.cu - # test/linalg/map.cu - # test/linalg/map_then_reduce.cu - # test/linalg/matrix_vector.cu - # test/linalg/matrix_vector_op.cu - # test/linalg/mean_squared_error.cu - # test/linalg/multiply.cu - # test/linalg/norm.cu - # test/linalg/normalize.cu - # test/linalg/power.cu - # test/linalg/randomized_svd.cu - # test/linalg/reduce.cu - # test/linalg/reduce_cols_by_key.cu - # test/linalg/reduce_rows_by_key.cu - # test/linalg/rsvd.cu - # test/linalg/sqrt.cu - # test/linalg/strided_reduction.cu - # test/linalg/subtract.cu - # test/linalg/svd.cu - # test/linalg/ternary_op.cu - # test/linalg/transpose.cu - # test/linalg/unary_op.cu - # ) - - # ConfigureTest( - # NAME - # MATRIX_TEST - # PATH - # test/matrix/argmax.cu - # test/matrix/argmin.cu - # test/matrix/columnSort.cu - # test/matrix/diagonal.cu - # test/matrix/gather.cu - # test/matrix/scatter.cu - # test/matrix/eye.cu - # test/matrix/linewise_op.cu - # test/matrix/math.cu - # test/matrix/matrix.cu - # test/matrix/norm.cu - # test/matrix/reverse.cu - # test/matrix/slice.cu - # test/matrix/triangular.cu - # test/sparse/spectral_matrix.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - - # ConfigureTest( - # NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - # ) + ConfigureTest( + NAME + CLUSTER_TEST + PATH + test/cluster/kmeans.cu + test/cluster/kmeans_balanced.cu + test/cluster/kmeans_find_k.cu + test/cluster/cluster_solvers.cu + test/cluster/linkage.cu + test/cluster/spectral.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + CORE_TEST + PATH + test/core/bitset.cu + test/core/device_resources_manager.cpp + test/core/device_setter.cpp + test/core/logger.cpp + test/core/math_device.cu + test/core/math_host.cpp + test/core/operators_device.cu + test/core/operators_host.cpp + test/core/handle.cpp + test/core/interruptible.cu + test/core/nvtx.cpp + test/core/mdarray.cu + test/core/mdbuffer.cu + test/core/mdspan_copy.cpp + test/core/mdspan_copy.cu + test/core/mdspan_utils.cu + test/core/numpy_serializer.cu + test/core/memory_type.cpp + test/core/sparse_matrix.cu + test/core/sparse_matrix.cpp + test/core/span.cpp + test/core/span.cu + test/core/stream_view.cpp + test/core/temporary_device_buffer.cu + test/test.cpp + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB + EXPLICIT_INSTANTIATE_ONLY NOCUDA + ) + + ConfigureTest( + NAME + DISTANCE_TEST + PATH + test/distance/dist_adj.cu + test/distance/dist_adj_distance_instance.cu + test/distance/dist_canberra.cu + test/distance/dist_correlation.cu + test/distance/dist_cos.cu + test/distance/dist_hamming.cu + test/distance/dist_hellinger.cu + test/distance/dist_inner_product.cu + test/distance/dist_jensen_shannon.cu + test/distance/dist_kl_divergence.cu + test/distance/dist_l1.cu + test/distance/dist_l2_exp.cu + test/distance/dist_l2_unexp.cu + test/distance/dist_l2_sqrt_exp.cu + test/distance/dist_l_inf.cu + test/distance/dist_lp_unexp.cu + test/distance/dist_russell_rao.cu + test/distance/masked_nn.cu + test/distance/masked_nn_compress_to_bits.cu + test/distance/fused_l2_nn.cu + test/distance/gram.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + list( + APPEND + EXT_HEADER_TEST_SOURCES + test/ext_headers/raft_neighbors_brute_force.cu + test/ext_headers/raft_distance_distance.cu + test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu + test/ext_headers/raft_matrix_detail_select_k.cu + test/ext_headers/raft_neighbors_ball_cover.cu + test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu + test/ext_headers/raft_distance_fused_l2_nn.cu + test/ext_headers/raft_neighbors_ivf_pq.cu + test/ext_headers/raft_util_memory_pool.cpp + test/ext_headers/raft_neighbors_ivf_flat.cu + test/ext_headers/raft_core_logger.cpp + test/ext_headers/raft_neighbors_refine.cu + test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu + test/ext_headers/raft_linalg_detail_coalesced_reduction.cu + test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu + test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu + test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu + ) + + # Test that the split headers compile in isolation with: + # + # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined + # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined + # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. + ConfigureTest( + NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + EXPLICIT_INSTANTIATE_ONLY + ) + ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) + ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + + ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) + + ConfigureTest( + NAME + LINALG_TEST + PATH + test/linalg/add.cu + test/linalg/axpy.cu + test/linalg/binary_op.cu + test/linalg/cholesky_r1.cu + test/linalg/coalesced_reduction.cu + test/linalg/divide.cu + test/linalg/dot.cu + test/linalg/eig.cu + test/linalg/eig_sel.cu + test/linalg/gemm_layout.cu + test/linalg/gemv.cu + test/linalg/map.cu + test/linalg/map_then_reduce.cu + test/linalg/matrix_vector.cu + test/linalg/matrix_vector_op.cu + test/linalg/mean_squared_error.cu + test/linalg/multiply.cu + test/linalg/norm.cu + test/linalg/normalize.cu + test/linalg/power.cu + test/linalg/randomized_svd.cu + test/linalg/reduce.cu + test/linalg/reduce_cols_by_key.cu + test/linalg/reduce_rows_by_key.cu + test/linalg/rsvd.cu + test/linalg/sqrt.cu + test/linalg/strided_reduction.cu + test/linalg/subtract.cu + test/linalg/svd.cu + test/linalg/ternary_op.cu + test/linalg/transpose.cu + test/linalg/unary_op.cu + ) + + ConfigureTest( + NAME + MATRIX_TEST + PATH + test/matrix/argmax.cu + test/matrix/argmin.cu + test/matrix/columnSort.cu + test/matrix/diagonal.cu + test/matrix/gather.cu + test/matrix/scatter.cu + test/matrix/eye.cu + test/matrix/linewise_op.cu + test/matrix/math.cu + test/matrix/matrix.cu + test/matrix/norm.cu + test/matrix/reverse.cu + test/matrix/slice.cu + test/matrix/triangular.cu + test/sparse/spectral_matrix.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + + ConfigureTest( + NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) ConfigureTest( NAME RANDOM_TEST PATH - # test/random/make_blobs.cu - # test/random/make_regression.cu - # test/random/multi_variable_gaussian.cu - # test/random/rng_pcg_host_api.cu - # test/random/permute.cu - # test/random/rng.cu - # test/random/rng_discrete.cu - # test/random/rng_int.cu - # test/random/rmat_rectangular_generator.cu - # test/random/sample_without_replacement.cu + test/random/make_blobs.cu + test/random/make_regression.cu + test/random/multi_variable_gaussian.cu + test/random/rng_pcg_host_api.cu + test/random/permute.cu + test/random/rng.cu + test/random/rng_discrete.cu + test/random/rng_int.cu + test/random/rmat_rectangular_generator.cu + test/random/sample_without_replacement.cu test/random/excess_sampling.cu ) - # ConfigureTest( - # NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - # test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # SPARSE_TEST - # PATH - # test/sparse/add.cu - # test/sparse/convert_coo.cu - # test/sparse/convert_csr.cu - # test/sparse/csr_row_slice.cu - # test/sparse/csr_to_dense.cu - # test/sparse/csr_transpose.cu - # test/sparse/degree.cu - # test/sparse/filter.cu - # test/sparse/norm.cu - # test/sparse/normalize.cu - # test/sparse/reduce.cu - # test/sparse/row_op.cu - # test/sparse/sddmm.cu - # test/sparse/sort.cu - # test/sparse/spgemmi.cu - # test/sparse/spmm.cu - # test/sparse/symmetrize.cu - # ) - - # ConfigureTest( - # NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - # test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # SPARSE_NEIGHBORS_TEST - # PATH - # test/sparse/neighbors/cross_component_nn.cu - # test/sparse/neighbors/brute_force.cu - # test/sparse/neighbors/knn_graph.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_TEST - # PATH - # test/neighbors/knn.cu - # test/neighbors/fused_l2_knn.cu - # test/neighbors/tiled_knn.cu - # test/neighbors/haversine.cu - # test/neighbors/ball_cover.cu - # test/neighbors/epsilon_neighborhood.cu - # test/neighbors/refine.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB - # EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_ANN_CAGRA_TEST - # PATH - # test/neighbors/ann_cagra/test_float_uint32_t.cu - # test/neighbors/ann_cagra/test_half_uint32_t.cu - # test/neighbors/ann_cagra/test_int8_t_uint32_t.cu - # test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu - # test/neighbors/ann_cagra/test_float_int64_t.cu - # test/neighbors/ann_cagra/test_half_int64_t.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # GPUS - # 1 - # PERCENT - # 100 - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_ANN_IVF_TEST - # PATH - # test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu - # test/neighbors/ann_ivf_flat/test_float_int64_t.cu - # test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - # test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu - # test/neighbors/ann_ivf_pq/test_float_uint32_t.cu - # test/neighbors/ann_ivf_pq/test_float_int64_t.cu - # test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - # test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - # test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu - # test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # GPUS - # 1 - # PERCENT - # 100 - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_ANN_NN_DESCENT_TEST - # PATH - # test/neighbors/ann_nn_descent/test_float_uint32_t.cu - # test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu - # test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # GPUS - # 1 - # PERCENT - # 100 - # ) - - # ConfigureTest( - # NAME - # STATS_TEST - # PATH - # test/stats/accuracy.cu - # test/stats/adjusted_rand_index.cu - # test/stats/completeness_score.cu - # test/stats/contingencyMatrix.cu - # test/stats/cov.cu - # test/stats/dispersion.cu - # test/stats/entropy.cu - # test/stats/histogram.cu - # test/stats/homogeneity_score.cu - # test/stats/information_criterion.cu - # test/stats/kl_divergence.cu - # test/stats/mean.cu - # test/stats/meanvar.cu - # test/stats/mean_center.cu - # test/stats/minmax.cu - # test/stats/mutual_info_score.cu - # test/stats/neighborhood_recall.cu - # test/stats/r2_score.cu - # test/stats/rand_index.cu - # test/stats/regression_metrics.cu - # test/stats/silhouette_score.cu - # test/stats/stddev.cu - # test/stats/sum.cu - # test/stats/trustworthiness.cu - # test/stats/weighted_mean.cu - # test/stats/v_measure.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # UTILS_TEST - # PATH - # test/core/seive.cu - # test/util/bitonic_sort.cu - # test/util/cudart_utils.cpp - # test/util/device_atomics.cu - # test/util/integer_utils.cpp - # test/util/integer_utils.cu - # test/util/memory_type_dispatcher.cu - # test/util/pow2_utils.cu - # test/util/reduction.cu - # ) + ConfigureTest( + NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + SPARSE_TEST + PATH + test/sparse/add.cu + test/sparse/convert_coo.cu + test/sparse/convert_csr.cu + test/sparse/csr_row_slice.cu + test/sparse/csr_to_dense.cu + test/sparse/csr_transpose.cu + test/sparse/degree.cu + test/sparse/filter.cu + test/sparse/norm.cu + test/sparse/normalize.cu + test/sparse/reduce.cu + test/sparse/row_op.cu + test/sparse/sddmm.cu + test/sparse/sort.cu + test/sparse/spgemmi.cu + test/sparse/spmm.cu + test/sparse/symmetrize.cu + ) + + ConfigureTest( + NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu + test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + SPARSE_NEIGHBORS_TEST + PATH + test/sparse/neighbors/cross_component_nn.cu + test/sparse/neighbors/brute_force.cu + test/sparse/neighbors/knn_graph.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + NEIGHBORS_TEST + PATH + test/neighbors/knn.cu + test/neighbors/fused_l2_knn.cu + test/neighbors/tiled_knn.cu + test/neighbors/haversine.cu + test/neighbors/ball_cover.cu + test/neighbors/epsilon_neighborhood.cu + test/neighbors/refine.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB + EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_CAGRA_TEST + PATH + test/neighbors/ann_cagra/test_float_uint32_t.cu + test/neighbors/ann_cagra/test_half_uint32_t.cu + test/neighbors/ann_cagra/test_int8_t_uint32_t.cu + test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu + test/neighbors/ann_cagra/test_float_int64_t.cu + test/neighbors/ann_cagra/test_half_int64_t.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS + 1 + PERCENT + 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_IVF_TEST + PATH + test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu + test/neighbors/ann_ivf_flat/test_float_int64_t.cu + test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu + test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + test/neighbors/ann_ivf_pq/test_float_uint32_t.cu + test/neighbors/ann_ivf_pq/test_float_int64_t.cu + test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu + test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu + test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu + test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS + 1 + PERCENT + 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_NN_DESCENT_TEST + PATH + test/neighbors/ann_nn_descent/test_float_uint32_t.cu + test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu + test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS + 1 + PERCENT + 100 + ) + + ConfigureTest( + NAME + STATS_TEST + PATH + test/stats/accuracy.cu + test/stats/adjusted_rand_index.cu + test/stats/completeness_score.cu + test/stats/contingencyMatrix.cu + test/stats/cov.cu + test/stats/dispersion.cu + test/stats/entropy.cu + test/stats/histogram.cu + test/stats/homogeneity_score.cu + test/stats/information_criterion.cu + test/stats/kl_divergence.cu + test/stats/mean.cu + test/stats/meanvar.cu + test/stats/mean_center.cu + test/stats/minmax.cu + test/stats/mutual_info_score.cu + test/stats/neighborhood_recall.cu + test/stats/r2_score.cu + test/stats/rand_index.cu + test/stats/regression_metrics.cu + test/stats/silhouette_score.cu + test/stats/stddev.cu + test/stats/sum.cu + test/stats/trustworthiness.cu + test/stats/weighted_mean.cu + test/stats/v_measure.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + UTILS_TEST + PATH + test/core/seive.cu + test/util/bitonic_sort.cu + test/util/cudart_utils.cpp + test/util/device_atomics.cu + test/util/integer_utils.cpp + test/util/integer_utils.cu + test/util/memory_type_dispatcher.cu + test/util/pow2_utils.cu + test/util/reduction.cu + ) endif() # ################################################################################################## diff --git a/cpp/test/random/excess_sampling.cu b/cpp/test/random/excess_sampling.cu index fec515900e..8c788c491b 100644 --- a/cpp/test/random/excess_sampling.cu +++ b/cpp/test/random/excess_sampling.cu @@ -35,8 +35,8 @@ namespace random { using namespace raft::random; struct inputs { - int N; - int n_samples; + int64_t N; + int64_t n_samples; }; template @@ -67,8 +67,8 @@ class ExcessSamplingTest : public ::testing::TestWithParam { resource::sync_stream(res, stream); std::unordered_set occurrence; - size_t sum = 0; - for (int i = 0; i < params.n_samples; ++i) { + int64_t sum = 0; + for (int64_t i = 0; i < params.n_samples; ++i) { T val = h_out(i); sum += val; ASSERT_TRUE(0 <= val && val < params.N) @@ -78,9 +78,9 @@ class ExcessSamplingTest : public ::testing::TestWithParam { occurrence.insert(val); } float avg = sum / (float)params.n_samples; - std::cout << "samples " << params.n_samples << ", average" << avg << std::endl; - if (params.n_samples >= 100) { - ASSERT_TRUE(raft::match(avg, params.N / 2.0, raft::CompareApprox(0.1))); + if (params.n_samples >= 100 && params.N / params.n_samples < 100) { + ASSERT_TRUE(raft::match(avg, (params.N - 1) / 2.0f, raft::CompareApprox(0.2))) + << "non-uniform sample"; } } @@ -99,11 +99,14 @@ const std::vector input1 = {{1, 0}, {10, 1}, {10, 2}, {10, 10}, + {137, 42}, {200, 0}, {200, 1}, {200, 100}, {200, 130}, - {200, 200}}; + {200, 200}, + {10000, 893}, + {10000000000, 1023}}; using ExcessSamplingTestInt64 = ExcessSamplingTest; TEST_P(ExcessSamplingTestInt64, SamplingTest) { check(); } From eb73ef5d336edb84e4109c63f2da23093af1d2ca Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 13 Mar 2024 09:06:20 +0100 Subject: [PATCH 07/21] added sample_rows to matrix namespace --- cpp/bench/prims/random/subsample.cu | 18 +++++ cpp/include/raft/matrix/detail/gather.cuh | 72 +++++++++++++++++++ .../raft/matrix/detail/sample_rows.cuh | 54 ++++++++++++++ cpp/include/raft/matrix/sample_rows.cuh | 51 +++++++++++++ cpp/include/raft/random/detail/rng_impl.cuh | 2 +- cpp/include/raft/random/rng.cuh | 4 +- 6 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 cpp/include/raft/matrix/detail/sample_rows.cuh create mode 100644 cpp/include/raft/matrix/sample_rows.cuh diff --git a/cpp/bench/prims/random/subsample.cu b/cpp/bench/prims/random/subsample.cu index 1c384f9a03..4c8ca2bf31 100644 --- a/cpp/bench/prims/random/subsample.cu +++ b/cpp/bench/prims/random/subsample.cu @@ -27,6 +27,7 @@ #include #include +#include #include @@ -38,6 +39,12 @@ struct sample_inputs { int method; }; // struct sample_inputs +inline auto operator<<(std::ostream& os, const sample_inputs& p) -> std::ostream& +{ + os << p.n_samples << "#" << p.n_train << "#" << p.method; + return os; +} + // Sample with replacement. We use this as a baseline. template auto bernoulli_subsample(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) @@ -56,14 +63,22 @@ template struct sample : public fixture { sample(const sample_inputs& p) : params(p), + old_mr(rmm::mr::get_current_device_resource()), + pool_mr(rmm::mr::get_current_device_resource(), 2 * GiB), in(make_device_vector(res, p.n_samples)), out(make_device_vector(res, p.n_train)) { + rmm::mr::set_current_device_resource(&pool_mr); raft::random::RngState r(123456ULL); } + ~sample() { rmm::mr::set_current_device_resource(old_mr); } void run_benchmark(::benchmark::State& state) override { + std::ostringstream label_stream; + label_stream << params; + state.SetLabel(label_stream.str()); + raft::random::RngState r(123456ULL); loop_on_state(state, [this, &r]() { if (params.method == 1) { @@ -77,7 +92,10 @@ struct sample : public fixture { } private: + float GiB = 1073741824.0f; raft::device_resources res; + rmm::mr::device_memory_resource* old_mr; + rmm::mr::pool_memory_resource pool_mr; sample_inputs params; raft::device_vector out, in; }; // struct sample diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 651fec81c3..553f2d71f1 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -16,7 +16,15 @@ #pragma once +#include +#include +#include +#include +#include #include +#include +#include +#include #include #include @@ -336,6 +344,70 @@ void gather_if(const InputIteratorT in, gatherImpl(in, D, N, map, stencil, map_length, out, pred_op, transform_op, stream); } +template +void gather_buff(host_matrix_view dataset, + host_vector_view indices, + IdxT offset, + pinned_matrix_view buff) +{ + raft::common::nvtx::range fun_scope("gather_host_buff"); + IdxT batch_size = std::min(buff.extent(0), indices.extent(0) - offset); + +#pragma omp for + for (IdxT i = 0; i < batch_size; i++) { + IdxT in_idx = indices(offset + i); + for (IdxT k = 0; k < buff.extent(1); k++) { + buff(i, k) = dataset(in_idx, k); + } + } +} + +template +void gather(raft::resources const& res, + host_matrix_view dataset, + device_vector_view indices, + raft::device_matrix_view output) +{ + raft::common::nvtx::range fun_scope("gather"); + IdxT n_dim = output.extent(1); + IdxT n_train = output.extent(0); + auto indices_host = raft::make_host_vector(n_train); + raft::copy( + indices_host.data_handle(), indices.data_handle(), n_train, resource::get_cuda_stream(res)); + resource::sync_stream(res); + + const size_t max_batch_size = 32768; + // Gather the vector on the host in tmp buffers. We use two buffers to overlap H2D sync + // and gathering the data. + raft::common::nvtx::push_range("gather::alloc_buffers"); + auto out_tmp1 = raft::make_pinned_matrix(res, max_batch_size, n_dim); + auto out_tmp2 = raft::make_pinned_matrix(res, max_batch_size, n_dim); + auto view1 = out_tmp1.view(); + auto view2 = out_tmp2.view(); + raft::common::nvtx::pop_range(); + + gather_buff(dataset, make_const_mdspan(indices_host.view()), (IdxT)0, view1); +#pragma omp parallel + for (IdxT device_offset = 0; device_offset < n_train; device_offset += max_batch_size) { + IdxT batch_size = std::min(max_batch_size, n_train - device_offset); +#pragma omp master + raft::copy(output.data_handle() + device_offset * n_dim, + view1.data_handle(), + batch_size * n_dim, + resource::get_cuda_stream(res)); + // Start gathering the next batch on the host. + IdxT host_offset = device_offset + batch_size; + batch_size = std::min(max_batch_size, n_train - host_offset); + if (batch_size > 0) { + gather_buff(dataset, make_const_mdspan(indices_host.view()), host_offset, view2); + } +#pragma omp master + resource::sync_stream(res); +#pragma omp barrier + std::swap(view1, view2); + } +} + } // namespace detail } // namespace matrix } // namespace raft diff --git a/cpp/include/raft/matrix/detail/sample_rows.cuh b/cpp/include/raft/matrix/detail/sample_rows.cuh new file mode 100644 index 0000000000..c8120c9ab2 --- /dev/null +++ b/cpp/include/raft/matrix/detail/sample_rows.cuh @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::matrix { + +/** Select rows randomly from input and copy to output. */ +template +void sample_rows(raft::resources const& res, + const T* input, + IdxT n_rows_input, + raft::device_matrix_view output, + RngState random_state) +{ + IdxT n_dim = output.extent(1); + IdxT n_samples = output.extent(0); + + raft::device_vector train_indices = + raft::random::excess_subsample(res, random_state, n_rows_input, n_samples); + + cudaPointerAttributes attr; + RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, input)); + T* ptr = reinterpret_cast(attr.devicePointer); + if (ptr != nullptr) { + raft::matrix::gather(res, + raft::make_device_matrix_view(ptr, n_rows_input, n_dim), + raft::make_const_mdspan(train_indices.view()), + output); + } else { + auto dataset = raft::make_host_matrix_view(input, n_rows_input, n_dim); + raft::matrix::detail::gather(res, dataset, make_const_mdspan(train_indices.view()), output); + } +} +} // namespace raft::matrix diff --git a/cpp/include/raft/matrix/sample_rows.cuh b/cpp/include/raft/matrix/sample_rows.cuh new file mode 100644 index 0000000000..2f8b8e6248 --- /dev/null +++ b/cpp/include/raft/matrix/sample_rows.cuh @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::matrix { + +/** Select rows randomly from input and copy to output. */ +template +void sample_rows(raft::resources const& res, + const T* input, + IdxT n_rows_input, + raft::device_matrix_view output, + RngState random_state) +{ + detail::sample_rows(res, input, n_rows_input, output, random_state); +} + +/** Subsample the dataset to create a training set*/ +template +raft::device_matrix sample_rows(raft::resources const& res, + const T* input, + IdxT n_rows_input, + IdxT n_train, + IdxT n_dim, + RngState random_state) +{ + auto output = raft::make_device_matrix(res, n_train, n_dim); + detail::sample_rows(res, input, n_rows_input, output, random_state); + return output; +} +} // namespace raft::matrix diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index 08a57e17c0..70ef1bbfcc 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -341,7 +341,7 @@ void affine_transform_params(RngState const& rng_state, IdxT n, IdxT& a, IdxT& b * The algorithm will allocate a workspace of size O(4*n_samples) internally. * * We use max N random numbers. Depending on how large n_samples is w.r.t to N, we - * either use rejection sampling, sort the [0..N-1] values using random keys. + * either use rejection sampling, or sort the [0..N-1] values using random keys. * * @tparam IdxT type of indices that we sample * @tparam MatIdxT extent type of the returned mdarray diff --git a/cpp/include/raft/random/rng.cuh b/cpp/include/raft/random/rng.cuh index 10d809d3b8..977d82830b 100644 --- a/cpp/include/raft/random/rng.cuh +++ b/cpp/include/raft/random/rng.cuh @@ -816,10 +816,10 @@ void sampleWithoutReplacement(raft::resources const& handle, /** @brief Sample without replacement from range 0..N-1. * * Elements are sampled uniformly. - * The algorithm will allocate a workspace of size O(4*n_samples) internally. + * The algorithm will allocate a workspace of size 4*n_samples*sizeof(IdxT) internally. * * We use max N random numbers. Depending on how large n_samples is w.r.t to N, we - * either use rejection sampling, sort the [0..N-1] values using random keys. + * either use rejection sampling, or sort the [0..N-1] values using random keys. * * @tparam IdxT type of indices that we sample * @tparam MatIdxT extent type of the returned mdarray From cc2cf2409cc4fe692a66a84724fbf4b37eb89cdd Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 13 Mar 2024 09:19:17 +0100 Subject: [PATCH 08/21] add test for sample rows --- .../raft/spatial/knn/detail/ann_utils.cuh | 82 -- cpp/test/CMakeLists.txt | 733 +++++++++--------- cpp/test/matrix/sample_rows.cu | 79 ++ 3 files changed, 446 insertions(+), 448 deletions(-) create mode 100644 cpp/test/matrix/sample_rows.cu diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index d7f4651b56..78e63f756d 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -577,86 +577,4 @@ struct batch_load_iterator { size_type cur_pos_; }; -template -auto get_subsample_indices(raft::resources const& res, IdxT n_samples, IdxT n_subsamples, int seed) - -> raft::device_vector -{ - RAFT_EXPECTS(n_subsamples <= n_samples, "Cannot have more training samples than dataset vectors"); - // size_t free, total; - // float GiB = 1073741824.0f; - // cudaMemGetInfo(&free, &total); - // RAFT_LOG_INFO( - // "get_subsample_indices::data free mem %6.1f, used mem %6.1f", free / GiB, (total - free) / - // GiB); - - auto data_indices = raft::make_device_vector(res, n_samples); - // cudaMemGetInfo(&free, &total); - // RAFT_LOG_INFO("get_subsample_indices::train free mem %6.1f, used mem %6.1f", - // free / GiB, - // (total - free) / GiB); - - auto train_indices = raft::make_device_vector(res, n_subsamples); - raft::linalg::map_offset(res, data_indices.view(), identity_op()); - raft::random::RngState rng(seed); - raft::random::sample_without_replacement(res, - rng, - raft::make_const_mdspan(data_indices.view()), - std::nullopt, - train_indices.view(), - std::nullopt); - return train_indices; -} - -/** Subsample the dataset to create a training set*/ -template -void subsample(raft::resources const& res, - const T* input, - IdxT n_samples, - raft::device_matrix_view output, - int seed) -{ - IdxT n_dim = output.extent(1); - IdxT n_train = output.extent(0); - - raft::device_vector train_indices = - get_subsample_indices(res, n_samples, n_train, seed); - - cudaPointerAttributes attr; - RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, input)); - T* ptr = reinterpret_cast(attr.devicePointer); - if (ptr != nullptr) { - raft::matrix::gather(res, - raft::make_device_matrix_view(ptr, n_samples, n_dim), - raft::make_const_mdspan(train_indices.view()), - output); - } else { - auto dataset = raft::make_host_matrix_view(input, n_samples, n_dim); - raft::matrix::detail::gather(res, dataset, make_const_mdspan(train_indices.view()), output); - } -} - -/** Subsample the dataset to create a training set*/ -template -raft::device_matrix subsample( - raft::resources const& res, const T* input, IdxT n_samples, IdxT n_train, IdxT n_dim, int seed) -{ - raft::device_vector train_indices = - get_subsample_indices(res, n_samples, n_train, seed); - - auto output = raft::make_device_matrix(res, n_train, n_dim); - cudaPointerAttributes attr; - RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, input)); - T* ptr = reinterpret_cast(attr.devicePointer); - if (ptr != nullptr) { - raft::matrix::gather(res, - raft::make_device_matrix_view(ptr, n_samples, n_dim), - raft::make_const_mdspan(train_indices.view()), - output.view()); - } else { - auto dataset = raft::make_host_matrix_view(input, n_samples, n_dim); - raft::matrix::detail::gather( - res, dataset, make_const_mdspan(train_indices.view()), output.view()); - } - return output; -} } // namespace raft::spatial::knn::detail::utils diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 037f85698c..cda9ca69e8 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -95,390 +95,391 @@ endfunction() # * distance tests ------------------------------------------------------------------------- if(BUILD_TESTS) - ConfigureTest( - NAME - CLUSTER_TEST - PATH - test/cluster/kmeans.cu - test/cluster/kmeans_balanced.cu - test/cluster/kmeans_find_k.cu - test/cluster/cluster_solvers.cu - test/cluster/linkage.cu - test/cluster/spectral.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - CORE_TEST - PATH - test/core/bitset.cu - test/core/device_resources_manager.cpp - test/core/device_setter.cpp - test/core/logger.cpp - test/core/math_device.cu - test/core/math_host.cpp - test/core/operators_device.cu - test/core/operators_host.cpp - test/core/handle.cpp - test/core/interruptible.cu - test/core/nvtx.cpp - test/core/mdarray.cu - test/core/mdbuffer.cu - test/core/mdspan_copy.cpp - test/core/mdspan_copy.cu - test/core/mdspan_utils.cu - test/core/numpy_serializer.cu - test/core/memory_type.cpp - test/core/sparse_matrix.cu - test/core/sparse_matrix.cpp - test/core/span.cpp - test/core/span.cu - test/core/stream_view.cpp - test/core/temporary_device_buffer.cu - test/test.cpp - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB - EXPLICIT_INSTANTIATE_ONLY NOCUDA - ) - - ConfigureTest( - NAME - DISTANCE_TEST - PATH - test/distance/dist_adj.cu - test/distance/dist_adj_distance_instance.cu - test/distance/dist_canberra.cu - test/distance/dist_correlation.cu - test/distance/dist_cos.cu - test/distance/dist_hamming.cu - test/distance/dist_hellinger.cu - test/distance/dist_inner_product.cu - test/distance/dist_jensen_shannon.cu - test/distance/dist_kl_divergence.cu - test/distance/dist_l1.cu - test/distance/dist_l2_exp.cu - test/distance/dist_l2_unexp.cu - test/distance/dist_l2_sqrt_exp.cu - test/distance/dist_l_inf.cu - test/distance/dist_lp_unexp.cu - test/distance/dist_russell_rao.cu - test/distance/masked_nn.cu - test/distance/masked_nn_compress_to_bits.cu - test/distance/fused_l2_nn.cu - test/distance/gram.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - list( - APPEND - EXT_HEADER_TEST_SOURCES - test/ext_headers/raft_neighbors_brute_force.cu - test/ext_headers/raft_distance_distance.cu - test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - test/ext_headers/raft_matrix_detail_select_k.cu - test/ext_headers/raft_neighbors_ball_cover.cu - test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - test/ext_headers/raft_distance_fused_l2_nn.cu - test/ext_headers/raft_neighbors_ivf_pq.cu - test/ext_headers/raft_util_memory_pool.cpp - test/ext_headers/raft_neighbors_ivf_flat.cu - test/ext_headers/raft_core_logger.cpp - test/ext_headers/raft_neighbors_refine.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - ) - - # Test that the split headers compile in isolation with: - # - # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - EXPLICIT_INSTANTIATE_ONLY - ) - ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - ConfigureTest( - NAME - LINALG_TEST - PATH - test/linalg/add.cu - test/linalg/axpy.cu - test/linalg/binary_op.cu - test/linalg/cholesky_r1.cu - test/linalg/coalesced_reduction.cu - test/linalg/divide.cu - test/linalg/dot.cu - test/linalg/eig.cu - test/linalg/eig_sel.cu - test/linalg/gemm_layout.cu - test/linalg/gemv.cu - test/linalg/map.cu - test/linalg/map_then_reduce.cu - test/linalg/matrix_vector.cu - test/linalg/matrix_vector_op.cu - test/linalg/mean_squared_error.cu - test/linalg/multiply.cu - test/linalg/norm.cu - test/linalg/normalize.cu - test/linalg/power.cu - test/linalg/randomized_svd.cu - test/linalg/reduce.cu - test/linalg/reduce_cols_by_key.cu - test/linalg/reduce_rows_by_key.cu - test/linalg/rsvd.cu - test/linalg/sqrt.cu - test/linalg/strided_reduction.cu - test/linalg/subtract.cu - test/linalg/svd.cu - test/linalg/ternary_op.cu - test/linalg/transpose.cu - test/linalg/unary_op.cu - ) + # ConfigureTest( + # NAME + # CLUSTER_TEST + # PATH + # test/cluster/kmeans.cu + # test/cluster/kmeans_balanced.cu + # test/cluster/kmeans_find_k.cu + # test/cluster/cluster_solvers.cu + # test/cluster/linkage.cu + # test/cluster/spectral.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # CORE_TEST + # PATH + # test/core/bitset.cu + # test/core/device_resources_manager.cpp + # test/core/device_setter.cpp + # test/core/logger.cpp + # test/core/math_device.cu + # test/core/math_host.cpp + # test/core/operators_device.cu + # test/core/operators_host.cpp + # test/core/handle.cpp + # test/core/interruptible.cu + # test/core/nvtx.cpp + # test/core/mdarray.cu + # test/core/mdbuffer.cu + # test/core/mdspan_copy.cpp + # test/core/mdspan_copy.cu + # test/core/mdspan_utils.cu + # test/core/numpy_serializer.cu + # test/core/memory_type.cpp + # test/core/sparse_matrix.cu + # test/core/sparse_matrix.cpp + # test/core/span.cpp + # test/core/span.cu + # test/core/stream_view.cpp + # test/core/temporary_device_buffer.cu + # test/test.cpp + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB + # EXPLICIT_INSTANTIATE_ONLY NOCUDA + # ) + + # ConfigureTest( + # NAME + # DISTANCE_TEST + # PATH + # test/distance/dist_adj.cu + # test/distance/dist_adj_distance_instance.cu + # test/distance/dist_canberra.cu + # test/distance/dist_correlation.cu + # test/distance/dist_cos.cu + # test/distance/dist_hamming.cu + # test/distance/dist_hellinger.cu + # test/distance/dist_inner_product.cu + # test/distance/dist_jensen_shannon.cu + # test/distance/dist_kl_divergence.cu + # test/distance/dist_l1.cu + # test/distance/dist_l2_exp.cu + # test/distance/dist_l2_unexp.cu + # test/distance/dist_l2_sqrt_exp.cu + # test/distance/dist_l_inf.cu + # test/distance/dist_lp_unexp.cu + # test/distance/dist_russell_rao.cu + # test/distance/masked_nn.cu + # test/distance/masked_nn_compress_to_bits.cu + # test/distance/fused_l2_nn.cu + # test/distance/gram.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # list( + # APPEND + # EXT_HEADER_TEST_SOURCES + # test/ext_headers/raft_neighbors_brute_force.cu + # test/ext_headers/raft_distance_distance.cu + # test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu + # test/ext_headers/raft_matrix_detail_select_k.cu + # test/ext_headers/raft_neighbors_ball_cover.cu + # test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu + # test/ext_headers/raft_distance_fused_l2_nn.cu + # test/ext_headers/raft_neighbors_ivf_pq.cu + # test/ext_headers/raft_util_memory_pool.cpp + # test/ext_headers/raft_neighbors_ivf_flat.cu + # test/ext_headers/raft_core_logger.cpp + # test/ext_headers/raft_neighbors_refine.cu + # test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu + # test/ext_headers/raft_linalg_detail_coalesced_reduction.cu + # test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu + # test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu + # test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu + # ) + + # # Test that the split headers compile in isolation with: + # # + # # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined + # # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined + # # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. + # ConfigureTest( + # NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + # ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) + # ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + + # ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) + + # ConfigureTest( + # NAME + # LINALG_TEST + # PATH + # test/linalg/add.cu + # test/linalg/axpy.cu + # test/linalg/binary_op.cu + # test/linalg/cholesky_r1.cu + # test/linalg/coalesced_reduction.cu + # test/linalg/divide.cu + # test/linalg/dot.cu + # test/linalg/eig.cu + # test/linalg/eig_sel.cu + # test/linalg/gemm_layout.cu + # test/linalg/gemv.cu + # test/linalg/map.cu + # test/linalg/map_then_reduce.cu + # test/linalg/matrix_vector.cu + # test/linalg/matrix_vector_op.cu + # test/linalg/mean_squared_error.cu + # test/linalg/multiply.cu + # test/linalg/norm.cu + # test/linalg/normalize.cu + # test/linalg/power.cu + # test/linalg/randomized_svd.cu + # test/linalg/reduce.cu + # test/linalg/reduce_cols_by_key.cu + # test/linalg/reduce_rows_by_key.cu + # test/linalg/rsvd.cu + # test/linalg/sqrt.cu + # test/linalg/strided_reduction.cu + # test/linalg/subtract.cu + # test/linalg/svd.cu + # test/linalg/ternary_op.cu + # test/linalg/transpose.cu + # test/linalg/unary_op.cu + # ) ConfigureTest( NAME MATRIX_TEST PATH - test/matrix/argmax.cu - test/matrix/argmin.cu - test/matrix/columnSort.cu - test/matrix/diagonal.cu - test/matrix/gather.cu - test/matrix/scatter.cu - test/matrix/eye.cu - test/matrix/linewise_op.cu - test/matrix/math.cu - test/matrix/matrix.cu - test/matrix/norm.cu - test/matrix/reverse.cu - test/matrix/slice.cu - test/matrix/triangular.cu - test/sparse/spectral_matrix.cu + # test/matrix/argmax.cu + # test/matrix/argmin.cu + # test/matrix/columnSort.cu + # test/matrix/diagonal.cu + # test/matrix/gather.cu + # test/matrix/scatter.cu + # test/matrix/eye.cu + # test/matrix/linewise_op.cu + # test/matrix/math.cu + # test/matrix/matrix.cu + # test/matrix/norm.cu + # test/matrix/reverse.cu + test/matrix/sample_rows.cu + # test/matrix/slice.cu + # test/matrix/triangular.cu + # test/sparse/spectral_matrix.cu LIB EXPLICIT_INSTANTIATE_ONLY ) - ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + # ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - ConfigureTest( - NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) + # ConfigureTest( + # NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) ConfigureTest( NAME RANDOM_TEST PATH - test/random/make_blobs.cu - test/random/make_regression.cu - test/random/multi_variable_gaussian.cu - test/random/rng_pcg_host_api.cu - test/random/permute.cu - test/random/rng.cu - test/random/rng_discrete.cu - test/random/rng_int.cu - test/random/rmat_rectangular_generator.cu - test/random/sample_without_replacement.cu + # test/random/make_blobs.cu + # test/random/make_regression.cu + # test/random/multi_variable_gaussian.cu + # test/random/rng_pcg_host_api.cu + # test/random/permute.cu + # test/random/rng.cu + # test/random/rng_discrete.cu + # test/random/rng_int.cu + # test/random/rmat_rectangular_generator.cu + # test/random/sample_without_replacement.cu test/random/excess_sampling.cu ) - ConfigureTest( - NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_TEST - PATH - test/sparse/add.cu - test/sparse/convert_coo.cu - test/sparse/convert_csr.cu - test/sparse/csr_row_slice.cu - test/sparse/csr_to_dense.cu - test/sparse/csr_transpose.cu - test/sparse/degree.cu - test/sparse/filter.cu - test/sparse/norm.cu - test/sparse/normalize.cu - test/sparse/reduce.cu - test/sparse/row_op.cu - test/sparse/sddmm.cu - test/sparse/sort.cu - test/sparse/spgemmi.cu - test/sparse/spmm.cu - test/sparse/symmetrize.cu - ) - - ConfigureTest( - NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_NEIGHBORS_TEST - PATH - test/sparse/neighbors/cross_component_nn.cu - test/sparse/neighbors/brute_force.cu - test/sparse/neighbors/knn_graph.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - NEIGHBORS_TEST - PATH - test/neighbors/knn.cu - test/neighbors/fused_l2_knn.cu - test/neighbors/tiled_knn.cu - test/neighbors/haversine.cu - test/neighbors/ball_cover.cu - test/neighbors/epsilon_neighborhood.cu - test/neighbors/refine.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB - EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_CAGRA_TEST - PATH - test/neighbors/ann_cagra/test_float_uint32_t.cu - test/neighbors/ann_cagra/test_half_uint32_t.cu - test/neighbors/ann_cagra/test_int8_t_uint32_t.cu - test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu - test/neighbors/ann_cagra/test_float_int64_t.cu - test/neighbors/ann_cagra/test_half_int64_t.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_IVF_TEST - PATH - test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu - test/neighbors/ann_ivf_flat/test_float_int64_t.cu - test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_float_uint32_t.cu - test/neighbors/ann_ivf_pq/test_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_NN_DESCENT_TEST - PATH - test/neighbors/ann_nn_descent/test_float_uint32_t.cu - test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu - test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - STATS_TEST - PATH - test/stats/accuracy.cu - test/stats/adjusted_rand_index.cu - test/stats/completeness_score.cu - test/stats/contingencyMatrix.cu - test/stats/cov.cu - test/stats/dispersion.cu - test/stats/entropy.cu - test/stats/histogram.cu - test/stats/homogeneity_score.cu - test/stats/information_criterion.cu - test/stats/kl_divergence.cu - test/stats/mean.cu - test/stats/meanvar.cu - test/stats/mean_center.cu - test/stats/minmax.cu - test/stats/mutual_info_score.cu - test/stats/neighborhood_recall.cu - test/stats/r2_score.cu - test/stats/rand_index.cu - test/stats/regression_metrics.cu - test/stats/silhouette_score.cu - test/stats/stddev.cu - test/stats/sum.cu - test/stats/trustworthiness.cu - test/stats/weighted_mean.cu - test/stats/v_measure.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - UTILS_TEST - PATH - test/core/seive.cu - test/util/bitonic_sort.cu - test/util/cudart_utils.cpp - test/util/device_atomics.cu - test/util/integer_utils.cpp - test/util/integer_utils.cu - test/util/memory_type_dispatcher.cu - test/util/pow2_utils.cu - test/util/reduction.cu - ) + # ConfigureTest( + # NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + # test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # SPARSE_TEST + # PATH + # test/sparse/add.cu + # test/sparse/convert_coo.cu + # test/sparse/convert_csr.cu + # test/sparse/csr_row_slice.cu + # test/sparse/csr_to_dense.cu + # test/sparse/csr_transpose.cu + # test/sparse/degree.cu + # test/sparse/filter.cu + # test/sparse/norm.cu + # test/sparse/normalize.cu + # test/sparse/reduce.cu + # test/sparse/row_op.cu + # test/sparse/sddmm.cu + # test/sparse/sort.cu + # test/sparse/spgemmi.cu + # test/sparse/spmm.cu + # test/sparse/symmetrize.cu + # ) + + # ConfigureTest( + # NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu + # test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # SPARSE_NEIGHBORS_TEST + # PATH + # test/sparse/neighbors/cross_component_nn.cu + # test/sparse/neighbors/brute_force.cu + # test/sparse/neighbors/knn_graph.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_TEST + # PATH + # test/neighbors/knn.cu + # test/neighbors/fused_l2_knn.cu + # test/neighbors/tiled_knn.cu + # test/neighbors/haversine.cu + # test/neighbors/ball_cover.cu + # test/neighbors/epsilon_neighborhood.cu + # test/neighbors/refine.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB + # EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_CAGRA_TEST + # PATH + # test/neighbors/ann_cagra/test_float_uint32_t.cu + # test/neighbors/ann_cagra/test_half_uint32_t.cu + # test/neighbors/ann_cagra/test_int8_t_uint32_t.cu + # test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu + # test/neighbors/ann_cagra/test_float_int64_t.cu + # test/neighbors/ann_cagra/test_half_int64_t.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu + # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_IVF_TEST + # PATH + # test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu + # test/neighbors/ann_ivf_flat/test_float_int64_t.cu + # test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu + # test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_float_uint32_t.cu + # test/neighbors/ann_ivf_pq/test_float_int64_t.cu + # test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu + # test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_NN_DESCENT_TEST + # PATH + # test/neighbors/ann_nn_descent/test_float_uint32_t.cu + # test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu + # test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME + # STATS_TEST + # PATH + # test/stats/accuracy.cu + # test/stats/adjusted_rand_index.cu + # test/stats/completeness_score.cu + # test/stats/contingencyMatrix.cu + # test/stats/cov.cu + # test/stats/dispersion.cu + # test/stats/entropy.cu + # test/stats/histogram.cu + # test/stats/homogeneity_score.cu + # test/stats/information_criterion.cu + # test/stats/kl_divergence.cu + # test/stats/mean.cu + # test/stats/meanvar.cu + # test/stats/mean_center.cu + # test/stats/minmax.cu + # test/stats/mutual_info_score.cu + # test/stats/neighborhood_recall.cu + # test/stats/r2_score.cu + # test/stats/rand_index.cu + # test/stats/regression_metrics.cu + # test/stats/silhouette_score.cu + # test/stats/stddev.cu + # test/stats/sum.cu + # test/stats/trustworthiness.cu + # test/stats/weighted_mean.cu + # test/stats/v_measure.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # UTILS_TEST + # PATH + # test/core/seive.cu + # test/util/bitonic_sort.cu + # test/util/cudart_utils.cpp + # test/util/device_atomics.cu + # test/util/integer_utils.cpp + # test/util/integer_utils.cu + # test/util/memory_type_dispatcher.cu + # test/util/pow2_utils.cu + # test/util/reduction.cu + # ) endif() # ################################################################################################## diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/test/matrix/sample_rows.cu new file mode 100644 index 0000000000..5ca93d0fe5 --- /dev/null +++ b/cpp/test/matrix/sample_rows.cu @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" + +#include +#include +#include +#include +#include +#include + +#include + +namespace raft { +namespace matrix { + +struct inputs { + int N; + int dim; + int n_samples; +}; + +::std::ostream& operator<<(::std::ostream& os, const inputs p) +{ + os << p.N << "#" << p.k << "#" << p.n_samples; + return os; +} + +template +class SampleRowsTest : public ::testing::TestWithParam { + public: + SampleRowsTest() + : params(::testing::TestWithParam::GetParam()), + state{137ULL}, + in(make_device_vector(res, params.N, params.dim)), + out(make_device_vector(res, 0, 0)) + + { + raft::random::uniform(res, state, in.data_handle(), in.size(), T(-1.0), T(1.0)); + } + + void check() + { + out = raft::random::excess_subsample(res, state, params.N, params.n_samples); + ASSERT_TRUE(out.extent(0) == params.n_samples); + ASSERT_TRUE(out.extent(1) == params.dim) + } + + protected: + inputs params; + raft::resources res; + cudaStream_t stream; + RngState state; + device_matrix out, in; +}; + +const std::vector input1 = { + {10, 1, 1}, {10, 4, 1}, {10, 4, 10}, {10, 10}, {137, 42, 59}, {10000, 128, 893}}; + +using SampleRowsTestInt64 = SampleRowsTest; +TEST_P(SampleRowsTestInt64, SamplingTest) { check(); } +INSTANTIATE_TEST_SUITE_P(SampleRowsTests, SampleRowsTestInt64, ::testing::ValuesIn(input1)); + +} // namespace matrix +} // namespace raft From eb7e6d14c677fa7507527811c92b558ec178fc27 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 13 Mar 2024 09:54:30 +0100 Subject: [PATCH 09/21] Add mdspan input API, fix cmakelists --- .../raft/matrix/detail/sample_rows.cuh | 9 +- cpp/include/raft/matrix/sample_rows.cuh | 36 +- cpp/test/CMakeLists.txt | 732 +++++++++--------- cpp/test/matrix/sample_rows.cu | 15 +- 4 files changed, 400 insertions(+), 392 deletions(-) diff --git a/cpp/include/raft/matrix/detail/sample_rows.cuh b/cpp/include/raft/matrix/detail/sample_rows.cuh index c8120c9ab2..6c598551d7 100644 --- a/cpp/include/raft/matrix/detail/sample_rows.cuh +++ b/cpp/include/raft/matrix/detail/sample_rows.cuh @@ -16,13 +16,16 @@ #pragma once +#include +#include #include +#include #include #include #include #include -namespace raft::matrix { +namespace raft::matrix::detail { /** Select rows randomly from input and copy to output. */ template @@ -30,7 +33,7 @@ void sample_rows(raft::resources const& res, const T* input, IdxT n_rows_input, raft::device_matrix_view output, - RngState random_state) + random::RngState random_state) { IdxT n_dim = output.extent(1); IdxT n_samples = output.extent(0); @@ -51,4 +54,4 @@ void sample_rows(raft::resources const& res, raft::matrix::detail::gather(res, dataset, make_const_mdspan(train_indices.view()), output); } } -} // namespace raft::matrix +} // namespace raft::matrix::detail diff --git a/cpp/include/raft/matrix/sample_rows.cuh b/cpp/include/raft/matrix/sample_rows.cuh index 2f8b8e6248..55b17800c7 100644 --- a/cpp/include/raft/matrix/sample_rows.cuh +++ b/cpp/include/raft/matrix/sample_rows.cuh @@ -16,36 +16,38 @@ #pragma once +#include +#include #include -#include +#include +#include #include -#include -#include namespace raft::matrix { /** Select rows randomly from input and copy to output. */ -template +template void sample_rows(raft::resources const& res, - const T* input, - IdxT n_rows_input, - raft::device_matrix_view output, - RngState random_state) + random::RngState random_state, + mdspan, row_major, accessor> dataset, + raft::device_matrix_view output) { detail::sample_rows(res, input, n_rows_input, output, random_state); + + detail::sample_rows(res, dataset.data_handle(), dataset.extent(0), output, random_state); } /** Subsample the dataset to create a training set*/ -template -raft::device_matrix sample_rows(raft::resources const& res, - const T* input, - IdxT n_rows_input, - IdxT n_train, - IdxT n_dim, - RngState random_state) +template +raft::device_matrix sample_rows( + raft::resources const& res, + random::RngState random_state, + mdspan, row_major, accessor> dataset, + IdxT n_samples) { - auto output = raft::make_device_matrix(res, n_train, n_dim); - detail::sample_rows(res, input, n_rows_input, output, random_state); + auto output = raft::make_device_matrix(res, n_samples, dataset.extent(1)); + detail::sample_rows(res, random_state, dataset.data_handle(), dataset.extent(0), output); return output; } + } // namespace raft::matrix diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index cda9ca69e8..7d31903f5e 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -95,391 +95,391 @@ endfunction() # * distance tests ------------------------------------------------------------------------- if(BUILD_TESTS) - # ConfigureTest( - # NAME - # CLUSTER_TEST - # PATH - # test/cluster/kmeans.cu - # test/cluster/kmeans_balanced.cu - # test/cluster/kmeans_find_k.cu - # test/cluster/cluster_solvers.cu - # test/cluster/linkage.cu - # test/cluster/spectral.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # CORE_TEST - # PATH - # test/core/bitset.cu - # test/core/device_resources_manager.cpp - # test/core/device_setter.cpp - # test/core/logger.cpp - # test/core/math_device.cu - # test/core/math_host.cpp - # test/core/operators_device.cu - # test/core/operators_host.cpp - # test/core/handle.cpp - # test/core/interruptible.cu - # test/core/nvtx.cpp - # test/core/mdarray.cu - # test/core/mdbuffer.cu - # test/core/mdspan_copy.cpp - # test/core/mdspan_copy.cu - # test/core/mdspan_utils.cu - # test/core/numpy_serializer.cu - # test/core/memory_type.cpp - # test/core/sparse_matrix.cu - # test/core/sparse_matrix.cpp - # test/core/span.cpp - # test/core/span.cu - # test/core/stream_view.cpp - # test/core/temporary_device_buffer.cu - # test/test.cpp - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB - # EXPLICIT_INSTANTIATE_ONLY NOCUDA - # ) - - # ConfigureTest( - # NAME - # DISTANCE_TEST - # PATH - # test/distance/dist_adj.cu - # test/distance/dist_adj_distance_instance.cu - # test/distance/dist_canberra.cu - # test/distance/dist_correlation.cu - # test/distance/dist_cos.cu - # test/distance/dist_hamming.cu - # test/distance/dist_hellinger.cu - # test/distance/dist_inner_product.cu - # test/distance/dist_jensen_shannon.cu - # test/distance/dist_kl_divergence.cu - # test/distance/dist_l1.cu - # test/distance/dist_l2_exp.cu - # test/distance/dist_l2_unexp.cu - # test/distance/dist_l2_sqrt_exp.cu - # test/distance/dist_l_inf.cu - # test/distance/dist_lp_unexp.cu - # test/distance/dist_russell_rao.cu - # test/distance/masked_nn.cu - # test/distance/masked_nn_compress_to_bits.cu - # test/distance/fused_l2_nn.cu - # test/distance/gram.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # list( - # APPEND - # EXT_HEADER_TEST_SOURCES - # test/ext_headers/raft_neighbors_brute_force.cu - # test/ext_headers/raft_distance_distance.cu - # test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - # test/ext_headers/raft_matrix_detail_select_k.cu - # test/ext_headers/raft_neighbors_ball_cover.cu - # test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - # test/ext_headers/raft_distance_fused_l2_nn.cu - # test/ext_headers/raft_neighbors_ivf_pq.cu - # test/ext_headers/raft_util_memory_pool.cpp - # test/ext_headers/raft_neighbors_ivf_flat.cu - # test/ext_headers/raft_core_logger.cpp - # test/ext_headers/raft_neighbors_refine.cu - # test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - # test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - # test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - # test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - # test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - # ) - - # # Test that the split headers compile in isolation with: - # # - # # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - # ConfigureTest( - # NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - # ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - # ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - # ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - # ConfigureTest( - # NAME - # LINALG_TEST - # PATH - # test/linalg/add.cu - # test/linalg/axpy.cu - # test/linalg/binary_op.cu - # test/linalg/cholesky_r1.cu - # test/linalg/coalesced_reduction.cu - # test/linalg/divide.cu - # test/linalg/dot.cu - # test/linalg/eig.cu - # test/linalg/eig_sel.cu - # test/linalg/gemm_layout.cu - # test/linalg/gemv.cu - # test/linalg/map.cu - # test/linalg/map_then_reduce.cu - # test/linalg/matrix_vector.cu - # test/linalg/matrix_vector_op.cu - # test/linalg/mean_squared_error.cu - # test/linalg/multiply.cu - # test/linalg/norm.cu - # test/linalg/normalize.cu - # test/linalg/power.cu - # test/linalg/randomized_svd.cu - # test/linalg/reduce.cu - # test/linalg/reduce_cols_by_key.cu - # test/linalg/reduce_rows_by_key.cu - # test/linalg/rsvd.cu - # test/linalg/sqrt.cu - # test/linalg/strided_reduction.cu - # test/linalg/subtract.cu - # test/linalg/svd.cu - # test/linalg/ternary_op.cu - # test/linalg/transpose.cu - # test/linalg/unary_op.cu - # ) + ConfigureTest( + NAME + CLUSTER_TEST + PATH + test/cluster/kmeans.cu + test/cluster/kmeans_balanced.cu + test/cluster/kmeans_find_k.cu + test/cluster/cluster_solvers.cu + test/cluster/linkage.cu + test/cluster/spectral.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + CORE_TEST + PATH + test/core/bitset.cu + test/core/device_resources_manager.cpp + test/core/device_setter.cpp + test/core/logger.cpp + test/core/math_device.cu + test/core/math_host.cpp + test/core/operators_device.cu + test/core/operators_host.cpp + test/core/handle.cpp + test/core/interruptible.cu + test/core/nvtx.cpp + test/core/mdarray.cu + test/core/mdbuffer.cu + test/core/mdspan_copy.cpp + test/core/mdspan_copy.cu + test/core/mdspan_utils.cu + test/core/numpy_serializer.cu + test/core/memory_type.cpp + test/core/sparse_matrix.cu + test/core/sparse_matrix.cpp + test/core/span.cpp + test/core/span.cu + test/core/stream_view.cpp + test/core/temporary_device_buffer.cu + test/test.cpp + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB + EXPLICIT_INSTANTIATE_ONLY NOCUDA + ) + + ConfigureTest( + NAME + DISTANCE_TEST + PATH + test/distance/dist_adj.cu + test/distance/dist_adj_distance_instance.cu + test/distance/dist_canberra.cu + test/distance/dist_correlation.cu + test/distance/dist_cos.cu + test/distance/dist_hamming.cu + test/distance/dist_hellinger.cu + test/distance/dist_inner_product.cu + test/distance/dist_jensen_shannon.cu + test/distance/dist_kl_divergence.cu + test/distance/dist_l1.cu + test/distance/dist_l2_exp.cu + test/distance/dist_l2_unexp.cu + test/distance/dist_l2_sqrt_exp.cu + test/distance/dist_l_inf.cu + test/distance/dist_lp_unexp.cu + test/distance/dist_russell_rao.cu + test/distance/masked_nn.cu + test/distance/masked_nn_compress_to_bits.cu + test/distance/fused_l2_nn.cu + test/distance/gram.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + list( + APPEND + EXT_HEADER_TEST_SOURCES + test/ext_headers/raft_neighbors_brute_force.cu + test/ext_headers/raft_distance_distance.cu + test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu + test/ext_headers/raft_matrix_detail_select_k.cu + test/ext_headers/raft_neighbors_ball_cover.cu + test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu + test/ext_headers/raft_distance_fused_l2_nn.cu + test/ext_headers/raft_neighbors_ivf_pq.cu + test/ext_headers/raft_util_memory_pool.cpp + test/ext_headers/raft_neighbors_ivf_flat.cu + test/ext_headers/raft_core_logger.cpp + test/ext_headers/raft_neighbors_refine.cu + test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu + test/ext_headers/raft_linalg_detail_coalesced_reduction.cu + test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu + test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu + test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu + ) + + # Test that the split headers compile in isolation with: + # + # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined + # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined + # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. + ConfigureTest( + NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + EXPLICIT_INSTANTIATE_ONLY + ) + ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) + ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + + ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) + + ConfigureTest( + NAME + LINALG_TEST + PATH + test/linalg/add.cu + test/linalg/axpy.cu + test/linalg/binary_op.cu + test/linalg/cholesky_r1.cu + test/linalg/coalesced_reduction.cu + test/linalg/divide.cu + test/linalg/dot.cu + test/linalg/eig.cu + test/linalg/eig_sel.cu + test/linalg/gemm_layout.cu + test/linalg/gemv.cu + test/linalg/map.cu + test/linalg/map_then_reduce.cu + test/linalg/matrix_vector.cu + test/linalg/matrix_vector_op.cu + test/linalg/mean_squared_error.cu + test/linalg/multiply.cu + test/linalg/norm.cu + test/linalg/normalize.cu + test/linalg/power.cu + test/linalg/randomized_svd.cu + test/linalg/reduce.cu + test/linalg/reduce_cols_by_key.cu + test/linalg/reduce_rows_by_key.cu + test/linalg/rsvd.cu + test/linalg/sqrt.cu + test/linalg/strided_reduction.cu + test/linalg/subtract.cu + test/linalg/svd.cu + test/linalg/ternary_op.cu + test/linalg/transpose.cu + test/linalg/unary_op.cu + ) ConfigureTest( NAME MATRIX_TEST PATH - # test/matrix/argmax.cu - # test/matrix/argmin.cu - # test/matrix/columnSort.cu - # test/matrix/diagonal.cu - # test/matrix/gather.cu - # test/matrix/scatter.cu - # test/matrix/eye.cu - # test/matrix/linewise_op.cu - # test/matrix/math.cu - # test/matrix/matrix.cu - # test/matrix/norm.cu - # test/matrix/reverse.cu + test/matrix/argmax.cu + test/matrix/argmin.cu + test/matrix/columnSort.cu + test/matrix/diagonal.cu + test/matrix/gather.cu + test/matrix/scatter.cu + test/matrix/eye.cu + test/matrix/linewise_op.cu + test/matrix/math.cu + test/matrix/matrix.cu + test/matrix/norm.cu + test/matrix/reverse.cu test/matrix/sample_rows.cu - # test/matrix/slice.cu - # test/matrix/triangular.cu - # test/sparse/spectral_matrix.cu + test/matrix/slice.cu + test/matrix/triangular.cu + test/sparse/spectral_matrix.cu LIB EXPLICIT_INSTANTIATE_ONLY ) - # ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - # ConfigureTest( - # NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - # ) + ConfigureTest( + NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) ConfigureTest( NAME RANDOM_TEST PATH - # test/random/make_blobs.cu - # test/random/make_regression.cu - # test/random/multi_variable_gaussian.cu - # test/random/rng_pcg_host_api.cu - # test/random/permute.cu - # test/random/rng.cu - # test/random/rng_discrete.cu - # test/random/rng_int.cu - # test/random/rmat_rectangular_generator.cu - # test/random/sample_without_replacement.cu + test/random/make_blobs.cu + test/random/make_regression.cu + test/random/multi_variable_gaussian.cu + test/random/rng_pcg_host_api.cu + test/random/permute.cu + test/random/rng.cu + test/random/rng_discrete.cu + test/random/rng_int.cu + test/random/rmat_rectangular_generator.cu + test/random/sample_without_replacement.cu test/random/excess_sampling.cu ) - # ConfigureTest( - # NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - # test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # SPARSE_TEST - # PATH - # test/sparse/add.cu - # test/sparse/convert_coo.cu - # test/sparse/convert_csr.cu - # test/sparse/csr_row_slice.cu - # test/sparse/csr_to_dense.cu - # test/sparse/csr_transpose.cu - # test/sparse/degree.cu - # test/sparse/filter.cu - # test/sparse/norm.cu - # test/sparse/normalize.cu - # test/sparse/reduce.cu - # test/sparse/row_op.cu - # test/sparse/sddmm.cu - # test/sparse/sort.cu - # test/sparse/spgemmi.cu - # test/sparse/spmm.cu - # test/sparse/symmetrize.cu - # ) - - # ConfigureTest( - # NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - # test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # SPARSE_NEIGHBORS_TEST - # PATH - # test/sparse/neighbors/cross_component_nn.cu - # test/sparse/neighbors/brute_force.cu - # test/sparse/neighbors/knn_graph.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_TEST - # PATH - # test/neighbors/knn.cu - # test/neighbors/fused_l2_knn.cu - # test/neighbors/tiled_knn.cu - # test/neighbors/haversine.cu - # test/neighbors/ball_cover.cu - # test/neighbors/epsilon_neighborhood.cu - # test/neighbors/refine.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB - # EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_ANN_CAGRA_TEST - # PATH - # test/neighbors/ann_cagra/test_float_uint32_t.cu - # test/neighbors/ann_cagra/test_half_uint32_t.cu - # test/neighbors/ann_cagra/test_int8_t_uint32_t.cu - # test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu - # test/neighbors/ann_cagra/test_float_int64_t.cu - # test/neighbors/ann_cagra/test_half_int64_t.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu - # src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # GPUS - # 1 - # PERCENT - # 100 - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_ANN_IVF_TEST - # PATH - # test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu - # test/neighbors/ann_ivf_flat/test_float_int64_t.cu - # test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - # test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu - # test/neighbors/ann_ivf_pq/test_float_uint32_t.cu - # test/neighbors/ann_ivf_pq/test_float_int64_t.cu - # test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - # test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - # test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu - # test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # GPUS - # 1 - # PERCENT - # 100 - # ) - - # ConfigureTest( - # NAME - # NEIGHBORS_ANN_NN_DESCENT_TEST - # PATH - # test/neighbors/ann_nn_descent/test_float_uint32_t.cu - # test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu - # test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # GPUS - # 1 - # PERCENT - # 100 - # ) - - # ConfigureTest( - # NAME - # STATS_TEST - # PATH - # test/stats/accuracy.cu - # test/stats/adjusted_rand_index.cu - # test/stats/completeness_score.cu - # test/stats/contingencyMatrix.cu - # test/stats/cov.cu - # test/stats/dispersion.cu - # test/stats/entropy.cu - # test/stats/histogram.cu - # test/stats/homogeneity_score.cu - # test/stats/information_criterion.cu - # test/stats/kl_divergence.cu - # test/stats/mean.cu - # test/stats/meanvar.cu - # test/stats/mean_center.cu - # test/stats/minmax.cu - # test/stats/mutual_info_score.cu - # test/stats/neighborhood_recall.cu - # test/stats/r2_score.cu - # test/stats/rand_index.cu - # test/stats/regression_metrics.cu - # test/stats/silhouette_score.cu - # test/stats/stddev.cu - # test/stats/sum.cu - # test/stats/trustworthiness.cu - # test/stats/weighted_mean.cu - # test/stats/v_measure.cu - # LIB - # EXPLICIT_INSTANTIATE_ONLY - # ) - - # ConfigureTest( - # NAME - # UTILS_TEST - # PATH - # test/core/seive.cu - # test/util/bitonic_sort.cu - # test/util/cudart_utils.cpp - # test/util/device_atomics.cu - # test/util/integer_utils.cpp - # test/util/integer_utils.cu - # test/util/memory_type_dispatcher.cu - # test/util/pow2_utils.cu - # test/util/reduction.cu - # ) + ConfigureTest( + NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + SPARSE_TEST + PATH + test/sparse/add.cu + test/sparse/convert_coo.cu + test/sparse/convert_csr.cu + test/sparse/csr_row_slice.cu + test/sparse/csr_to_dense.cu + test/sparse/csr_transpose.cu + test/sparse/degree.cu + test/sparse/filter.cu + test/sparse/norm.cu + test/sparse/normalize.cu + test/sparse/reduce.cu + test/sparse/row_op.cu + test/sparse/sddmm.cu + test/sparse/sort.cu + test/sparse/spgemmi.cu + test/sparse/spmm.cu + test/sparse/symmetrize.cu + ) + + ConfigureTest( + NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu + test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + SPARSE_NEIGHBORS_TEST + PATH + test/sparse/neighbors/cross_component_nn.cu + test/sparse/neighbors/brute_force.cu + test/sparse/neighbors/knn_graph.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + NEIGHBORS_TEST + PATH + test/neighbors/knn.cu + test/neighbors/fused_l2_knn.cu + test/neighbors/tiled_knn.cu + test/neighbors/haversine.cu + test/neighbors/ball_cover.cu + test/neighbors/epsilon_neighborhood.cu + test/neighbors/refine.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH test/neighbors/ann_brute_force/test_float.cu LIB + EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_CAGRA_TEST + PATH + test/neighbors/ann_cagra/test_float_uint32_t.cu + test/neighbors/ann_cagra/test_half_uint32_t.cu + test/neighbors/ann_cagra/test_int8_t_uint32_t.cu + test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu + test/neighbors/ann_cagra/test_float_int64_t.cu + test/neighbors/ann_cagra/test_half_int64_t.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu + src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS + 1 + PERCENT + 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_IVF_TEST + PATH + test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu + test/neighbors/ann_ivf_flat/test_float_int64_t.cu + test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu + test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + test/neighbors/ann_ivf_pq/test_float_uint32_t.cu + test/neighbors/ann_ivf_pq/test_float_int64_t.cu + test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu + test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu + test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu + test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS + 1 + PERCENT + 100 + ) + + ConfigureTest( + NAME + NEIGHBORS_ANN_NN_DESCENT_TEST + PATH + test/neighbors/ann_nn_descent/test_float_uint32_t.cu + test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu + test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + GPUS + 1 + PERCENT + 100 + ) + + ConfigureTest( + NAME + STATS_TEST + PATH + test/stats/accuracy.cu + test/stats/adjusted_rand_index.cu + test/stats/completeness_score.cu + test/stats/contingencyMatrix.cu + test/stats/cov.cu + test/stats/dispersion.cu + test/stats/entropy.cu + test/stats/histogram.cu + test/stats/homogeneity_score.cu + test/stats/information_criterion.cu + test/stats/kl_divergence.cu + test/stats/mean.cu + test/stats/meanvar.cu + test/stats/mean_center.cu + test/stats/minmax.cu + test/stats/mutual_info_score.cu + test/stats/neighborhood_recall.cu + test/stats/r2_score.cu + test/stats/rand_index.cu + test/stats/regression_metrics.cu + test/stats/silhouette_score.cu + test/stats/stddev.cu + test/stats/sum.cu + test/stats/trustworthiness.cu + test/stats/weighted_mean.cu + test/stats/v_measure.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest( + NAME + UTILS_TEST + PATH + test/core/seive.cu + test/util/bitonic_sort.cu + test/util/cudart_utils.cpp + test/util/device_atomics.cu + test/util/integer_utils.cpp + test/util/integer_utils.cu + test/util/memory_type_dispatcher.cu + test/util/pow2_utils.cu + test/util/reduction.cu + ) endif() # ################################################################################################## diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/test/matrix/sample_rows.cu index 5ca93d0fe5..80abeb7397 100644 --- a/cpp/test/matrix/sample_rows.cu +++ b/cpp/test/matrix/sample_rows.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,7 @@ struct inputs { ::std::ostream& operator<<(::std::ostream& os, const inputs p) { - os << p.N << "#" << p.k << "#" << p.n_samples; + os << p.N << "#" << p.dim << "#" << p.n_samples; return os; } @@ -46,8 +47,8 @@ class SampleRowsTest : public ::testing::TestWithParam { SampleRowsTest() : params(::testing::TestWithParam::GetParam()), state{137ULL}, - in(make_device_vector(res, params.N, params.dim)), - out(make_device_vector(res, 0, 0)) + in(make_device_matrix(res, params.N, params.dim)), + out(make_device_matrix(res, 0, 0)) { raft::random::uniform(res, state, in.data_handle(), in.size(), T(-1.0), T(1.0)); @@ -55,16 +56,18 @@ class SampleRowsTest : public ::testing::TestWithParam { void check() { - out = raft::random::excess_subsample(res, state, params.N, params.n_samples); + out = raft::matrix::sample_rows(res, state, make_const_mdspan(in.view())); ASSERT_TRUE(out.extent(0) == params.n_samples); - ASSERT_TRUE(out.extent(1) == params.dim) + ASSERT_TRUE(out.extent(1) == params.dim); + // TODO(tfeher): check sampled values + // TODO(tfeher): check host / device input } protected: inputs params; raft::resources res; cudaStream_t stream; - RngState state; + random::RngState state; device_matrix out, in; }; From 7857f2fd433c958d51a533a8ffe5b1e7881b93f0 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 13 Mar 2024 10:15:12 +0100 Subject: [PATCH 10/21] corrections --- cpp/include/raft/matrix/sample_rows.cuh | 6 +++--- cpp/test/matrix/sample_rows.cu | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/matrix/sample_rows.cuh b/cpp/include/raft/matrix/sample_rows.cuh index 55b17800c7..67281ff297 100644 --- a/cpp/include/raft/matrix/sample_rows.cuh +++ b/cpp/include/raft/matrix/sample_rows.cuh @@ -29,7 +29,7 @@ namespace raft::matrix { template void sample_rows(raft::resources const& res, random::RngState random_state, - mdspan, row_major, accessor> dataset, + mdspan, row_major, accessor> dataset, raft::device_matrix_view output) { detail::sample_rows(res, input, n_rows_input, output, random_state); @@ -42,11 +42,11 @@ template raft::device_matrix sample_rows( raft::resources const& res, random::RngState random_state, - mdspan, row_major, accessor> dataset, + mdspan, row_major, accessor> dataset, IdxT n_samples) { auto output = raft::make_device_matrix(res, n_samples, dataset.extent(1)); - detail::sample_rows(res, random_state, dataset.data_handle(), dataset.extent(0), output); + detail::sample_rows(res, random_state, dataset, output.view()); return output; } diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/test/matrix/sample_rows.cu index 80abeb7397..8d9be8e1e1 100644 --- a/cpp/test/matrix/sample_rows.cu +++ b/cpp/test/matrix/sample_rows.cu @@ -56,7 +56,8 @@ class SampleRowsTest : public ::testing::TestWithParam { void check() { - out = raft::matrix::sample_rows(res, state, make_const_mdspan(in.view())); + out = raft::matrix::sample_rows( + res, state, make_const_mdspan(in.view()), params.n_samples); ASSERT_TRUE(out.extent(0) == params.n_samples); ASSERT_TRUE(out.extent(1) == params.dim); // TODO(tfeher): check sampled values From 93ff94f936d29796fca2f69cf223ca4e2a16662d Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 13 Mar 2024 13:21:40 +0100 Subject: [PATCH 11/21] Add test to sample_rows --- .../raft/matrix/detail/sample_rows.cuh | 4 +- cpp/include/raft/matrix/sample_rows.cuh | 6 +- cpp/test/matrix/sample_rows.cu | 78 ++++++++++++++++--- cpp/test/random/excess_sampling.cu | 3 +- 4 files changed, 73 insertions(+), 18 deletions(-) diff --git a/cpp/include/raft/matrix/detail/sample_rows.cuh b/cpp/include/raft/matrix/detail/sample_rows.cuh index 6c598551d7..e28ad648da 100644 --- a/cpp/include/raft/matrix/detail/sample_rows.cuh +++ b/cpp/include/raft/matrix/detail/sample_rows.cuh @@ -30,10 +30,10 @@ namespace raft::matrix::detail { /** Select rows randomly from input and copy to output. */ template void sample_rows(raft::resources const& res, + random::RngState random_state, const T* input, IdxT n_rows_input, - raft::device_matrix_view output, - random::RngState random_state) + raft::device_matrix_view output) { IdxT n_dim = output.extent(1); IdxT n_samples = output.extent(0); diff --git a/cpp/include/raft/matrix/sample_rows.cuh b/cpp/include/raft/matrix/sample_rows.cuh index 67281ff297..7a1f9bf756 100644 --- a/cpp/include/raft/matrix/sample_rows.cuh +++ b/cpp/include/raft/matrix/sample_rows.cuh @@ -32,9 +32,7 @@ void sample_rows(raft::resources const& res, mdspan, row_major, accessor> dataset, raft::device_matrix_view output) { - detail::sample_rows(res, input, n_rows_input, output, random_state); - - detail::sample_rows(res, dataset.data_handle(), dataset.extent(0), output, random_state); + detail::sample_rows(res, random_state, dataset.data_handle(), dataset.extent(0), output); } /** Subsample the dataset to create a training set*/ @@ -46,7 +44,7 @@ raft::device_matrix sample_rows( IdxT n_samples) { auto output = raft::make_device_matrix(res, n_samples, dataset.extent(1)); - detail::sample_rows(res, random_state, dataset, output.view()); + sample_rows(res, random_state, dataset, output.view()); return output; } diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/test/matrix/sample_rows.cu index 8d9be8e1e1..048edde2ba 100644 --- a/cpp/test/matrix/sample_rows.cu +++ b/cpp/test/matrix/sample_rows.cu @@ -18,14 +18,18 @@ #include #include +#include #include #include #include #include #include +#include #include +#include + namespace raft { namespace matrix { @@ -33,11 +37,12 @@ struct inputs { int N; int dim; int n_samples; + bool host; }; ::std::ostream& operator<<(::std::ostream& os, const inputs p) { - os << p.N << "#" << p.dim << "#" << p.n_samples; + os << p.N << "#" << p.dim << "#" << p.n_samples << (p.host ? "#host" : "#device"); return os; } @@ -46,22 +51,50 @@ class SampleRowsTest : public ::testing::TestWithParam { public: SampleRowsTest() : params(::testing::TestWithParam::GetParam()), + stream(resource::get_cuda_stream(res)), state{137ULL}, in(make_device_matrix(res, params.N, params.dim)), - out(make_device_matrix(res, 0, 0)) - + out(make_device_matrix(res, 0, 0)), + in_h(make_host_matrix(res, params.N, params.dim)), + out_h(make_host_matrix(res, params.n_samples, params.dim)) { raft::random::uniform(res, state, in.data_handle(), in.size(), T(-1.0), T(1.0)); + for (int64_t i = 0; i < params.N; i++) { + for (int64_t k = 0; k < params.dim; k++) + in_h(i, k) = i * 1000 + k; + } + raft::copy(in.data_handle(), in_h.data_handle(), in_h.size(), stream); } void check() { - out = raft::matrix::sample_rows( - res, state, make_const_mdspan(in.view()), params.n_samples); + if (params.host) { + out = raft::matrix::sample_rows( + res, state, make_const_mdspan(in_h.view()), (int64_t)params.n_samples); + } else { + out = raft::matrix::sample_rows( + res, state, make_const_mdspan(in.view()), (int64_t)params.n_samples); + } + + raft::copy(out_h.data_handle(), out.data_handle(), out.size(), stream); + resource::sync_stream(res, stream); + ASSERT_TRUE(out.extent(0) == params.n_samples); ASSERT_TRUE(out.extent(1) == params.dim); - // TODO(tfeher): check sampled values - // TODO(tfeher): check host / device input + + std::unordered_set occurrence; + + for (int64_t i = 0; i < params.n_samples; ++i) { + int val = (int)out_h(i, 0) / 1000; + ASSERT_TRUE(0 <= val && val < params.N) + << "out-of-range index @i=" << i << " val=" << val << " params=" << params; + ASSERT_TRUE(occurrence.find(val) == occurrence.end()) + << "repeated index @i=" << i << " idx=" << val << " params=" << params; + occurrence.insert(val); + for (int64_t k = 0; k < params.dim; k++) { + ASSERT_TRUE(raft::match((int64_t)(out_h(i, k)), val * 1000 + k, raft::Compare())); + } + } } protected: @@ -69,15 +102,38 @@ class SampleRowsTest : public ::testing::TestWithParam { raft::resources res; cudaStream_t stream; random::RngState state; - device_matrix out, in; + device_matrix in, out; + host_matrix in_h, out_h; }; -const std::vector input1 = { - {10, 1, 1}, {10, 4, 1}, {10, 4, 10}, {10, 10}, {137, 42, 59}, {10000, 128, 893}}; +inline std::vector generate_inputs() +{ + std::vector input1 = + raft::util::itertools::product({10}, {1, 17, 96}, {1, 6, 9, 10}, {false}); + + std::vector input2 = + raft::util::itertools::product({137}, {1, 17, 128}, {1, 10, 100, 137}, {false}); + input1.insert(input1.end(), input2.begin(), input2.end()); + + input2 = raft::util::itertools::product( + {100000}, {1, 42}, {1, 137, 1000, 10000, 100000}, {false}); + input1.insert(input1.end(), input2.begin(), input2.end()); + + int n = input1.size(); + // Add same tests for host data + for (int i = 0; i < n; i++) { + inputs x = input1[i]; + x.host = true; + input1.push_back(x); + } + return input1; +} + +const std::vector inputs1 = generate_inputs(); using SampleRowsTestInt64 = SampleRowsTest; TEST_P(SampleRowsTestInt64, SamplingTest) { check(); } -INSTANTIATE_TEST_SUITE_P(SampleRowsTests, SampleRowsTestInt64, ::testing::ValuesIn(input1)); +INSTANTIATE_TEST_SUITE_P(SampleRowsTests, SampleRowsTestInt64, ::testing::ValuesIn(inputs1)); } // namespace matrix } // namespace raft diff --git a/cpp/test/random/excess_sampling.cu b/cpp/test/random/excess_sampling.cu index 8c788c491b..45ed2a6727 100644 --- a/cpp/test/random/excess_sampling.cu +++ b/cpp/test/random/excess_sampling.cu @@ -51,6 +51,7 @@ class ExcessSamplingTest : public ::testing::TestWithParam { public: ExcessSamplingTest() : params(::testing::TestWithParam::GetParam()), + stream(resource::get_cuda_stream(res)), state{137ULL}, in(make_device_vector(res, params.n_samples)), out(make_device_vector(res, 0)), @@ -89,7 +90,7 @@ class ExcessSamplingTest : public ::testing::TestWithParam { raft::resources res; cudaStream_t stream; RngState state; - device_vector out, in; + device_vector in, out; host_vector h_out; }; From f2c28ce6faf1fc22b09481c8a3aff266b3d3fbfa Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 14 Mar 2024 09:24:01 +0100 Subject: [PATCH 12/21] Revert "[HOTFIX] 24.02 Revert Random Sampling (#2144)" This reverts commit 7edd372e6a8a7629c4abcdb3de59ad9c5ddb25ea. Subsampling is implemented in a separate PR, therefore the reletad code was removed while reverting. --- .../src/raft/raft_ann_bench_param_parser.h | 3 + cpp/include/raft/matrix/detail/gather.cuh | 3 + .../raft/neighbors/detail/ivf_flat_build.cuh | 23 +-- .../raft/neighbors/detail/ivf_pq_build.cuh | 159 ++++++------------ cpp/include/raft/neighbors/ivf_pq_types.hpp | 8 + .../raft/spatial/knn/detail/ann_utils.cuh | 9 + cpp/test/neighbors/ann_ivf_pq.cuh | 2 +- docs/source/ann_benchmarks_param_tuning.md | 1 + .../neighbors/ivf_pq/cpp/c_ivf_pq.pxd | 1 + .../pylibraft/neighbors/ivf_pq/ivf_pq.pyx | 16 +- 10 files changed, 98 insertions(+), 127 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 2339677340..ae40deb50c 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -87,6 +87,9 @@ void parse_build_param(const nlohmann::json& conf, "', should be either 'cluster' or 'subspace'"); } } + if (conf.contains("max_train_points_per_pq_code")) { + param.max_train_points_per_pq_code = conf.at("max_train_points_per_pq_code"); + } } template diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 651fec81c3..a1bddb420f 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -17,6 +17,9 @@ #pragma once #include +#include +#include +#include #include #include diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh index 55184cc615..e39c41351c 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh @@ -364,28 +364,23 @@ inline auto build(raft::resources const& handle, // Train the kmeans clustering { + int random_seed = 137; auto trainset_ratio = std::max( 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); auto n_rows_train = n_rows / trainset_ratio; - rmm::device_uvector trainset(n_rows_train * index.dim(), stream); - // TODO: a proper sampling - RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(), - sizeof(T) * index.dim(), - dataset, - sizeof(T) * index.dim() * trainset_ratio, - sizeof(T) * index.dim(), - n_rows_train, - cudaMemcpyDefault, - stream)); - auto trainset_const_view = - raft::make_device_matrix_view(trainset.data(), n_rows_train, index.dim()); + auto trainset = make_device_matrix(handle, n_rows_train, index.dim()); + raft::spatial::knn::detail::utils::subsample( + handle, dataset, n_rows, trainset.view(), random_seed); auto centers_view = raft::make_device_matrix_view( index.centers().data_handle(), index.n_lists(), index.dim()); raft::cluster::kmeans_balanced_params kmeans_params; kmeans_params.n_iters = params.kmeans_n_iters; kmeans_params.metric = index.metric(); - raft::cluster::kmeans_balanced::fit( - handle, kmeans_params, trainset_const_view, centers_view, utils::mapping{}); + raft::cluster::kmeans_balanced::fit(handle, + kmeans_params, + make_const_mdspan(trainset.view()), + centers_view, + utils::mapping{}); } // add the data if necessary diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index b7796d52fa..433707de9a 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -61,51 +62,6 @@ namespace raft::neighbors::ivf_pq::detail { using namespace raft::spatial::knn::detail; // NOLINT -template -__launch_bounds__(BlockDim) RAFT_KERNEL copy_warped_kernel( - T* out, uint32_t ld_out, const S* in, uint32_t ld_in, uint32_t n_cols, size_t n_rows) -{ - using warp = Pow2; - size_t row_ix = warp::div(size_t(threadIdx.x) + size_t(BlockDim) * size_t(blockIdx.x)); - uint32_t i = warp::mod(threadIdx.x); - if (row_ix >= n_rows) return; - out += row_ix * ld_out; - in += row_ix * ld_in; - auto f = utils::mapping{}; - for (uint32_t col_ix = i; col_ix < n_cols; col_ix += warp::Value) { - auto x = f(in[col_ix]); - __syncwarp(); - out[col_ix] = x; - } -} - -/** - * Copy the data one warp-per-row: - * - * 1. load the data per-warp - * 2. apply the `utils::mapping{}` - * 3. sync within warp - * 4. store the data. - * - * Assuming sizeof(T) >= sizeof(S) and the data is properly aligned (see the usage in `build`), this - * allows to re-structure the data within rows in-place. - */ -template -void copy_warped(T* out, - uint32_t ld_out, - const S* in, - uint32_t ld_in, - uint32_t n_cols, - size_t n_rows, - rmm::cuda_stream_view stream) -{ - constexpr uint32_t kBlockDim = 128; - dim3 threads(kBlockDim, 1, 1); - dim3 blocks(div_rounding_up_safe(n_rows, kBlockDim / WarpSize), 1, 1); - copy_warped_kernel - <<>>(out, ld_out, in, ld_in, n_cols, n_rows); -} - /** * @brief Fill-in a random orthogonal transformation matrix. * @@ -395,14 +351,19 @@ void train_per_subset(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + uint32_t max_train_points_per_pq_code, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); auto device_memory = resource::get_workspace_resource(handle); rmm::device_uvector pq_centers_tmp(index.pq_centers().size(), stream, device_memory); - rmm::device_uvector sub_trainset(n_rows * size_t(index.pq_len()), stream, device_memory); - rmm::device_uvector sub_labels(n_rows, stream, device_memory); + // Subsampling the train set for codebook generation based on max_train_points_per_pq_code. + size_t big_enough = max_train_points_per_pq_code * size_t(index.pq_book_size()); + auto pq_n_rows = uint32_t(std::min(big_enough, n_rows)); + rmm::device_uvector sub_trainset( + pq_n_rows * size_t(index.pq_len()), stream, device_memory); + rmm::device_uvector sub_labels(pq_n_rows, stream, device_memory); rmm::device_uvector pq_cluster_sizes(index.pq_book_size(), stream, device_memory); @@ -413,7 +374,7 @@ void train_per_subset(raft::resources const& handle, // Get the rotated cluster centers for each training vector. // This will be subtracted from the input vectors afterwards. utils::copy_selected( - n_rows, + pq_n_rows, index.pq_len(), index.centers_rot().data_handle() + index.pq_len() * j, labels, @@ -429,7 +390,7 @@ void train_per_subset(raft::resources const& handle, true, false, index.pq_len(), - n_rows, + pq_n_rows, index.dim(), &alpha, index.rotation_matrix().data_handle() + index.dim() * index.pq_len() * j, @@ -442,13 +403,14 @@ void train_per_subset(raft::resources const& handle, stream); // train PQ codebook for this subspace - auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), n_rows, index.pq_len()); + auto sub_trainset_view = raft::make_device_matrix_view( + sub_trainset.data(), pq_n_rows, index.pq_len()); auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), n_rows); + auto sub_labels_view = + raft::make_device_vector_view(sub_labels.data(), pq_n_rows); auto cluster_sizes_view = raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; @@ -472,6 +434,7 @@ void train_per_cluster(raft::resources const& handle, const float* trainset, // [n_rows, dim] const uint32_t* labels, // [n_rows] uint32_t kmeans_n_iters, + uint32_t max_train_points_per_pq_code, rmm::mr::device_memory_resource* managed_memory) { auto stream = resource::get_cuda_stream(handle); @@ -519,9 +482,11 @@ void train_per_cluster(raft::resources const& handle, indices + cluster_offsets[l], device_memory); - // limit the cluster size to bound the training time. + // limit the cluster size to bound the training time based on max_train_points_per_pq_code + // If pq_book_size is less than pq_dim, use max_train_points_per_pq_code per pq_dim instead // [sic] we interpret the data as pq_len-dimensional - size_t big_enough = 256ul * std::max(index.pq_book_size(), index.pq_dim()); + size_t big_enough = + max_train_points_per_pq_code * std::max(index.pq_book_size(), index.pq_dim()); size_t available_rows = size_t(cluster_size) * size_t(index.pq_dim()); auto pq_n_rows = uint32_t(std::min(big_enough, available_rows)); // train PQ codebook for this cluster @@ -1699,76 +1664,46 @@ auto build(raft::resources const& handle, utils::memzero(index.inds_ptrs().data_handle(), index.inds_ptrs().size(), stream); { + int random_seed = 137; auto trainset_ratio = std::max( 1, size_t(n_rows) / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); size_t n_rows_train = n_rows / trainset_ratio; - auto* device_memory = resource::get_workspace_resource(handle); - rmm::mr::managed_memory_resource managed_memory_upstream; + auto* device_mr = resource::get_workspace_resource(handle); + rmm::mr::managed_memory_resource managed_mr; // Besides just sampling, we transform the input dataset into floats to make it easier // to use gemm operations from cublas. - rmm::device_uvector trainset(n_rows_train * index.dim(), stream, device_memory); - // TODO: a proper sampling + auto trainset = + make_device_mdarray(handle, device_mr, make_extents(n_rows_train, dim)); + if constexpr (std::is_same_v) { - RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset.data(), - sizeof(T) * index.dim(), - dataset, - sizeof(T) * index.dim() * trainset_ratio, - sizeof(T) * index.dim(), - n_rows_train, - cudaMemcpyDefault, - stream)); + raft::spatial::knn::detail::utils::subsample( + handle, dataset, n_rows, trainset.view(), random_seed); } else { - size_t dim = index.dim(); - cudaPointerAttributes dataset_attr; - RAFT_CUDA_TRY(cudaPointerGetAttributes(&dataset_attr, dataset)); - if (dataset_attr.devicePointer != nullptr) { - // data is available on device: just run the kernel to copy and map the data - auto p = reinterpret_cast(dataset_attr.devicePointer); - auto trainset_view = - raft::make_device_vector_view(trainset.data(), dim * n_rows_train); - linalg::map_offset(handle, trainset_view, [p, trainset_ratio, dim] __device__(size_t i) { - auto col = i % dim; - return utils::mapping{}(p[(i - col) * size_t(trainset_ratio) + col]); - }); - } else { - // data is not available: first copy, then map inplace - auto trainset_tmp = reinterpret_cast(reinterpret_cast(trainset.data()) + - (sizeof(float) - sizeof(T)) * index.dim()); - // We copy the data in strides, one row at a time, and place the smaller rows of type T - // at the end of float rows. - RAFT_CUDA_TRY(cudaMemcpy2DAsync(trainset_tmp, - sizeof(float) * index.dim(), - dataset, - sizeof(T) * index.dim() * trainset_ratio, - sizeof(T) * index.dim(), - n_rows_train, - cudaMemcpyDefault, - stream)); - // Transform the input `{T -> float}`, one row per warp. - // The threads in each warp copy the data synchronously; this and the layout of the data - // (content is aligned to the end of the rows) together allow doing the transform in-place. - copy_warped(trainset.data(), - index.dim(), - trainset_tmp, - index.dim() * sizeof(float) / sizeof(T), - index.dim(), - n_rows_train, - stream); - } + // TODO(tfeher): Enable codebook generation with any type T, and then remove + // trainset tmp. + auto trainset_tmp = + make_device_mdarray(handle, &managed_mr, make_extents(n_rows_train, dim)); + raft::spatial::knn::detail::utils::subsample( + handle, dataset, n_rows, trainset_tmp.view(), random_seed); + cudaDeviceSynchronize(); + raft::linalg::unaryOp(trainset.data_handle(), + trainset_tmp.data_handle(), + trainset.size(), + utils::mapping{}, // raft::cast_op(), + raft::resource::get_cuda_stream(handle)); } // NB: here cluster_centers is used as if it is [n_clusters, data_dim] not [n_clusters, // dim_ext]! rmm::device_uvector cluster_centers_buf( - index.n_lists() * index.dim(), stream, device_memory); + index.n_lists() * index.dim(), stream, device_mr); auto cluster_centers = cluster_centers_buf.data(); // Train balanced hierarchical kmeans clustering - auto trainset_const_view = - raft::make_device_matrix_view(trainset.data(), n_rows_train, index.dim()); + auto trainset_const_view = raft::make_const_mdspan(trainset.view()); auto centers_view = raft::make_device_matrix_view(cluster_centers, index.n_lists(), index.dim()); raft::cluster::kmeans_balanced_params kmeans_params; @@ -1778,7 +1713,7 @@ auto build(raft::resources const& handle, handle, kmeans_params, trainset_const_view, centers_view, utils::mapping{}); // Trainset labels are needed for training PQ codebooks - rmm::device_uvector labels(n_rows_train, stream, device_memory); + rmm::device_uvector labels(n_rows_train, stream, device_mr); auto centers_const_view = raft::make_device_matrix_view( cluster_centers, index.n_lists(), index.dim()); auto labels_view = raft::make_device_vector_view(labels.data(), n_rows_train); @@ -1804,19 +1739,21 @@ auto build(raft::resources const& handle, train_per_subset(handle, index, n_rows_train, - trainset.data(), + trainset.data_handle(), labels.data(), params.kmeans_n_iters, - &managed_memory_upstream); + params.max_train_points_per_pq_code, + &managed_mr); break; case codebook_gen::PER_CLUSTER: train_per_cluster(handle, index, n_rows_train, - trainset.data(), + trainset.data_handle(), labels.data(), params.kmeans_n_iters, - &managed_memory_upstream); + params.max_train_points_per_pq_code, + &managed_mr); break; default: RAFT_FAIL("Unreachable code"); } diff --git a/cpp/include/raft/neighbors/ivf_pq_types.hpp b/cpp/include/raft/neighbors/ivf_pq_types.hpp index 81e2886b18..d7d685f1e0 100644 --- a/cpp/include/raft/neighbors/ivf_pq_types.hpp +++ b/cpp/include/raft/neighbors/ivf_pq_types.hpp @@ -104,6 +104,14 @@ struct index_params : ann::index_params { * flag to `true` if you prefer to use as little GPU memory for the database as possible. */ bool conservative_memory_allocation = false; + /** + * The max number of data points to use per PQ code during PQ codebook training. Using more data + * points per PQ code may increase the quality of PQ codebook but may also increase the build + * time. The parameter is applied to both PQ codebook generation methods, i.e., PER_SUBSPACE and + * PER_CLUSTER. In both cases, we will use `pq_book_size * max_train_points_per_pq_code` training + * points to train each codebook. + */ + uint32_t max_train_points_per_pq_code = 256; }; struct search_params : ann::search_params { diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index d862e586e3..31a0f4ba8a 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -16,8 +16,17 @@ #pragma once +#include +#include +#include +#include +#include #include +#include #include +#include +#include +#include #include #include #include diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh index 4ebe02027f..3cf24e4fbf 100644 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ b/cpp/test/neighbors/ann_ivf_pq.cuh @@ -68,7 +68,7 @@ struct ivf_pq_inputs { ivf_pq_inputs() { index_params.n_lists = max(32u, min(1024u, num_db_vecs / 128u)); - index_params.kmeans_trainset_fraction = 1.0; + index_params.kmeans_trainset_fraction = 0.95; } }; diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index afb4ed18ea..e003aa879c 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -38,6 +38,7 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `pq_bits` | `build` | N | Positive Integer. [4-8] | 8 | Bit length of the vector element after quantization. | | `codebook_kind` | `build` | N | ["cluster", "subspace"] | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail | | `dataset_memory_type` | `build` | N | ["device", "host", "mmap"] | "host" | What memory type should the dataset reside? | +| `max_train_points_per_pq_code` | `build` | N | Positive Number >=1 | 256 | Max number of data points per PQ code used for PQ code book creation. Depending on input dataset size, the data points could be less than what user specifies. | | `query_memory_type` | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? | | `nprobe` | `search` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | | `internalDistanceDtype` | `search` | N | [`float`, `half`] | `half` | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy. | diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd index 895abbadca..930c3245f1 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/cpp/c_ivf_pq.pxd @@ -78,6 +78,7 @@ cdef extern from "raft/neighbors/ivf_pq_types.hpp" \ codebook_gen codebook_kind bool force_random_rotation bool conservative_memory_allocation + uint32_t max_train_points_per_pq_code cdef cppclass index[IdxT](ann_index): index(const device_resources& handle, diff --git a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx index 5b89f0d9a5..7081b65ce3 100644 --- a/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/pylibraft/pylibraft/neighbors/ivf_pq/ivf_pq.pyx @@ -156,6 +156,14 @@ cdef class IndexParams: repeated calls to `extend` (extending the database). To disable this behavior and use as little GPU memory for the database as possible, set this flat to `True`. + max_train_points_per_pq_code : int, default = 256 + The max number of data points to use per PQ code during PQ codebook + training. Using more data points per PQ code may increase the + quality of PQ codebook but may also increase the build time. The + parameter is applied to both PQ codebook generation methods, i.e., + PER_SUBSPACE and PER_CLUSTER. In both cases, we will use + pq_book_size * max_train_points_per_pq_code training points to + train each codebook. """ def __init__(self, *, n_lists=1024, @@ -167,7 +175,8 @@ cdef class IndexParams: codebook_kind="subspace", force_random_rotation=False, add_data_on_build=True, - conservative_memory_allocation=False): + conservative_memory_allocation=False, + max_train_points_per_pq_code=256): self.params.n_lists = n_lists self.params.metric = _get_metric(metric) self.params.metric_arg = 0 @@ -185,6 +194,8 @@ cdef class IndexParams: self.params.add_data_on_build = add_data_on_build self.params.conservative_memory_allocation = \ conservative_memory_allocation + self.params.max_train_points_per_pq_code = \ + max_train_points_per_pq_code @property def n_lists(self): @@ -226,6 +237,9 @@ cdef class IndexParams: def conservative_memory_allocation(self): return self.params.conservative_memory_allocation + @property + def max_train_points_per_pq_code(self): + return self.params.max_train_points_per_pq_code cdef class Index: # We store a pointer to the index because it dose not have a trivial From 47eefd4b296455269024d37dc4837b470b861be4 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 14 Mar 2024 09:36:06 +0100 Subject: [PATCH 13/21] Use the new matrix::sample_rows API --- .../raft/neighbors/detail/ivf_flat_build.cuh | 8 +++++--- cpp/include/raft/neighbors/detail/ivf_pq_build.cuh | 13 ++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh index e39c41351c..5c7cbb431b 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_build.cuh @@ -26,11 +26,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -364,13 +366,13 @@ inline auto build(raft::resources const& handle, // Train the kmeans clustering { - int random_seed = 137; + raft::random::RngState random_state{137}; auto trainset_ratio = std::max( 1, n_rows / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); auto n_rows_train = n_rows / trainset_ratio; auto trainset = make_device_matrix(handle, n_rows_train, index.dim()); - raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset.view(), random_seed); + raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset.view()); + auto centers_view = raft::make_device_matrix_view( index.centers().data_handle(), index.n_lists(), index.dim()); raft::cluster::kmeans_balanced_params kmeans_params; diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 433707de9a..29ab0b1524 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1664,7 +1665,7 @@ auto build(raft::resources const& handle, utils::memzero(index.inds_ptrs().data_handle(), index.inds_ptrs().size(), stream); { - int random_seed = 137; + raft::random::RngState random_state{137}; auto trainset_ratio = std::max( 1, size_t(n_rows) / std::max(params.kmeans_trainset_fraction * n_rows, index.n_lists())); @@ -1679,20 +1680,18 @@ auto build(raft::resources const& handle, make_device_mdarray(handle, device_mr, make_extents(n_rows_train, dim)); if constexpr (std::is_same_v) { - raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset.view(), random_seed); + raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset.view()); } else { // TODO(tfeher): Enable codebook generation with any type T, and then remove // trainset tmp. auto trainset_tmp = make_device_mdarray(handle, &managed_mr, make_extents(n_rows_train, dim)); - raft::spatial::knn::detail::utils::subsample( - handle, dataset, n_rows, trainset_tmp.view(), random_seed); - cudaDeviceSynchronize(); + raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset_tmp.view()); + raft::linalg::unaryOp(trainset.data_handle(), trainset_tmp.data_handle(), trainset.size(), - utils::mapping{}, // raft::cast_op(), + utils::mapping{}, raft::resource::get_cuda_stream(handle)); } From 3f9cbc36fd389dbe35f31dc842c44e5801569082 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 15 Mar 2024 12:41:21 +0100 Subject: [PATCH 14/21] Address issues --- cpp/include/raft/matrix/detail/gather.cuh | 2 -- cpp/include/raft/matrix/sample_rows.cuh | 28 +++++++++++++++++-- cpp/include/raft/random/detail/rng_device.cuh | 1 - cpp/include/raft/random/detail/rng_impl.cuh | 4 +++ cpp/include/raft/random/rng.cuh | 6 ++-- .../raft/spatial/knn/detail/ann_utils.cuh | 4 --- cpp/test/matrix/sample_rows.cu | 9 +++--- 7 files changed, 39 insertions(+), 15 deletions(-) diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 553f2d71f1..04541e4378 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -379,12 +379,10 @@ void gather(raft::resources const& res, const size_t max_batch_size = 32768; // Gather the vector on the host in tmp buffers. We use two buffers to overlap H2D sync // and gathering the data. - raft::common::nvtx::push_range("gather::alloc_buffers"); auto out_tmp1 = raft::make_pinned_matrix(res, max_batch_size, n_dim); auto out_tmp2 = raft::make_pinned_matrix(res, max_batch_size, n_dim); auto view1 = out_tmp1.view(); auto view2 = out_tmp2.view(); - raft::common::nvtx::pop_range(); gather_buff(dataset, make_const_mdspan(indices_host.view()), (IdxT)0, view1); #pragma omp parallel diff --git a/cpp/include/raft/matrix/sample_rows.cuh b/cpp/include/raft/matrix/sample_rows.cuh index 7a1f9bf756..7925d344e4 100644 --- a/cpp/include/raft/matrix/sample_rows.cuh +++ b/cpp/include/raft/matrix/sample_rows.cuh @@ -25,17 +25,41 @@ namespace raft::matrix { -/** Select rows randomly from input and copy to output. */ +/** @brief Select rows randomly from input and copy to output. + * + * The rows are selected randomly. The random sampling method does not guarantee completely unique + * selection of rows, but it is close to being unique. + * + * @param res RAFT resource handle + * @param random_state + * @param dataset input dataset + * @param output subsampled dataset + */ template void sample_rows(raft::resources const& res, random::RngState random_state, mdspan, row_major, accessor> dataset, raft::device_matrix_view output) { + RAFT_EXPECTS(dataset.extent(1) == output.extent(1), + "dataset dims must match, but received %ld vs %ld", + static_cast(dataset.extent(1)), + static_cast(output.extent(1))); detail::sample_rows(res, random_state, dataset.data_handle(), dataset.extent(0), output); } -/** Subsample the dataset to create a training set*/ +/** @brief Select rows randomly from input and copy to output. + * + * The rows are selected randomly. The random sampling method does not guarantee completely unique + * selection of rows, but it is close to being unique. + * + * @param res RAFT resource handle + * @param random_state + * @param dataset input dataset + * @param n_samples number of rows in the returned matrix + * + * @return subsampled dataset + * */ template raft::device_matrix sample_rows( raft::resources const& res, diff --git a/cpp/include/raft/random/detail/rng_device.cuh b/cpp/include/raft/random/detail/rng_device.cuh index 5e962fc982..12c67679ba 100644 --- a/cpp/include/raft/random/detail/rng_device.cuh +++ b/cpp/include/raft/random/detail/rng_device.cuh @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh index 70ef1bbfcc..98841cdf90 100644 --- a/cpp/include/raft/random/detail/rng_impl.cuh +++ b/cpp/include/raft/random/detail/rng_impl.cuh @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -444,6 +446,8 @@ auto excess_subsample(raft::resources const& res, RngState& state, IdxT N, IdxT RAFT_LOG_DEBUG("Subsampling returned with less unique indices (%zu) than requested (%zu)", (size_t)selected, (size_t)n_samples); + + // We continue to select n_samples elements, this will now contains a few duplicates. } // After duplicates are removed, we need to shuffle back to random order diff --git a/cpp/include/raft/random/rng.cuh b/cpp/include/raft/random/rng.cuh index 977d82830b..6fd1071d30 100644 --- a/cpp/include/raft/random/rng.cuh +++ b/cpp/include/raft/random/rng.cuh @@ -813,9 +813,11 @@ void sampleWithoutReplacement(raft::resources const& handle, rng_state, out, outIdx, in, wts, sampledLen, len, resource::get_cuda_stream(handle)); } -/** @brief Sample without replacement from range 0..N-1. +/** @brief Sample from range 0..N-1. + * + * Elements are sampled uniformly. The method aims to sample without replacement, + * but there is a small probability of a few having duplicate elements. * - * Elements are sampled uniformly. * The algorithm will allocate a workspace of size 4*n_samples*sizeof(IdxT) internally. * * We use max N random numbers. Depending on how large n_samples is w.r.t to N, we diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index 78e63f756d..d862e586e3 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -18,10 +18,6 @@ #include #include -#include -#include -#include -#include #include #include #include diff --git a/cpp/test/matrix/sample_rows.cu b/cpp/test/matrix/sample_rows.cu index 048edde2ba..e332a918fe 100644 --- a/cpp/test/matrix/sample_rows.cu +++ b/cpp/test/matrix/sample_rows.cu @@ -85,14 +85,14 @@ class SampleRowsTest : public ::testing::TestWithParam { std::unordered_set occurrence; for (int64_t i = 0; i < params.n_samples; ++i) { - int val = (int)out_h(i, 0) / 1000; + T val = out_h(i, 0) / 1000; ASSERT_TRUE(0 <= val && val < params.N) << "out-of-range index @i=" << i << " val=" << val << " params=" << params; - ASSERT_TRUE(occurrence.find(val) == occurrence.end()) + EXPECT_TRUE(occurrence.find(val) == occurrence.end()) << "repeated index @i=" << i << " idx=" << val << " params=" << params; occurrence.insert(val); for (int64_t k = 0; k < params.dim; k++) { - ASSERT_TRUE(raft::match((int64_t)(out_h(i, k)), val * 1000 + k, raft::Compare())); + ASSERT_TRUE(raft::match(out_h(i, k), val * 1000 + k, raft::CompareApprox(1e-6))); } } } @@ -116,7 +116,8 @@ inline std::vector generate_inputs() input1.insert(input1.end(), input2.begin(), input2.end()); input2 = raft::util::itertools::product( - {100000}, {1, 42}, {1, 137, 1000, 10000, 100000}, {false}); + {100000}, {1, 42}, {1, 137, 1000, 10000, 50000, 62000, 100000}, {false}); + input1.insert(input1.end(), input2.begin(), input2.end()); int n = input1.size(); From 57cb99c1423c215465ff7a1b317a67d90c3d96a4 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 15 Mar 2024 22:19:16 +0100 Subject: [PATCH 15/21] change member variables in test to local vars --- cpp/test/random/excess_sampling.cu | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/test/random/excess_sampling.cu b/cpp/test/random/excess_sampling.cu index 45ed2a6727..e86436fb7d 100644 --- a/cpp/test/random/excess_sampling.cu +++ b/cpp/test/random/excess_sampling.cu @@ -52,21 +52,20 @@ class ExcessSamplingTest : public ::testing::TestWithParam { ExcessSamplingTest() : params(::testing::TestWithParam::GetParam()), stream(resource::get_cuda_stream(res)), - state{137ULL}, - in(make_device_vector(res, params.n_samples)), - out(make_device_vector(res, 0)), - h_out(make_host_vector(res, params.n_samples)) - + state{137ULL} { } void check() { - out = raft::random::excess_subsample(res, state, params.N, params.n_samples); + device_vector out = + raft::random::excess_subsample(res, state, params.N, params.n_samples); ASSERT_TRUE(out.extent(0) == params.n_samples); - raft::copy(h_out.data_handle(), out.data_handle(), out.size(), stream); + auto h_out = make_host_vector(res, params.n_samples); + raft::copy(h_out.data_handle(), out.data_handle(), out.size(), stream); resource::sync_stream(res, stream); + std::unordered_set occurrence; int64_t sum = 0; for (int64_t i = 0; i < params.n_samples; ++i) { @@ -90,8 +89,6 @@ class ExcessSamplingTest : public ::testing::TestWithParam { raft::resources res; cudaStream_t stream; RngState state; - device_vector in, out; - host_vector h_out; }; const std::vector input1 = {{1, 0}, From 84e307e88109ddffa61be9ceb2cb65cfc9eed2b8 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 18 Mar 2024 14:31:01 +0100 Subject: [PATCH 16/21] Fix omp gather and add bench --- cpp/bench/prims/matrix/gather.cu | 38 ++++++++++- cpp/include/raft/matrix/detail/gather.cuh | 79 ++++++++++++++--------- 2 files changed, 83 insertions(+), 34 deletions(-) diff --git a/cpp/bench/prims/matrix/gather.cu b/cpp/bench/prims/matrix/gather.cu index e6f26ba925..078f9e6198 100644 --- a/cpp/bench/prims/matrix/gather.cu +++ b/cpp/bench/prims/matrix/gather.cu @@ -16,34 +16,48 @@ #include +#include +#include #include #include #include #include #include +#include namespace raft::bench::matrix { template struct GatherParams { IdxT rows, cols, map_length; + bool host; }; template inline auto operator<<(std::ostream& os, const GatherParams& p) -> std::ostream& { - os << p.rows << "#" << p.cols << "#" << p.map_length; + os << p.rows << "#" << p.cols << "#" << p.map_length << (p.host ? "#host" : "#device"); return os; } template struct Gather : public fixture { Gather(const GatherParams& p) - : params(p), matrix(this->handle), map(this->handle), out(this->handle), stencil(this->handle) + : params(p), + old_mr(rmm::mr::get_current_device_resource()), + pool_mr(rmm::mr::get_current_device_resource(), 2 * (1ULL << 30)), + matrix(this->handle), + map(this->handle), + out(this->handle), + stencil(this->handle), + matrix_h(this->handle) { + rmm::mr::set_current_device_resource(&pool_mr); } + ~Gather() { rmm::mr::set_current_device_resource(old_mr); } + void allocate_data(const ::benchmark::State& state) override { matrix = raft::make_device_matrix(handle, params.rows, params.cols); @@ -59,6 +73,11 @@ struct Gather : public fixture { if constexpr (Conditional) { raft::random::uniform(handle, rng, stencil.data_handle(), params.map_length, T(-1), T(1)); } + + if (params.host) { + matrix_h = raft::make_host_matrix(handle, params.rows, params.cols); + raft::copy(matrix_h.data_handle(), matrix.data_handle(), matrix.size(), stream); + } resource::sync_stream(handle, stream); } @@ -77,14 +96,22 @@ struct Gather : public fixture { raft::matrix::gather_if( handle, matrix_const_view, out.view(), map_const_view, stencil_const_view, pred_op); } else { - raft::matrix::gather(handle, matrix_const_view, map_const_view, out.view()); + if (params.host) { + raft::matrix::detail::gather( + handle, make_const_mdspan(matrix_h.view()), map_const_view, out.view()); + } else { + raft::matrix::gather(handle, matrix_const_view, map_const_view, out.view()); + } } }); } private: GatherParams params; + rmm::mr::device_memory_resource* old_mr; + rmm::mr::pool_memory_resource pool_mr; raft::device_matrix matrix, out; + raft::host_matrix matrix_h; raft::device_vector stencil; raft::device_vector map; }; // struct Gather @@ -100,4 +127,9 @@ RAFT_BENCH_REGISTER((Gather), "", gather_inputs_i64); RAFT_BENCH_REGISTER((Gather), "", gather_inputs_i64); RAFT_BENCH_REGISTER((GatherIf), "", gather_inputs_i64); RAFT_BENCH_REGISTER((GatherIf), "", gather_inputs_i64); + +auto inputs_host = raft::util::itertools::product>( + {10000000}, {100}, {1000, 1000000, 10000000}, {true}); +RAFT_BENCH_REGISTER((Gather), "Host", inputs_host); + } // namespace raft::bench::matrix diff --git a/cpp/include/raft/matrix/detail/gather.cuh b/cpp/include/raft/matrix/detail/gather.cuh index 04541e4378..05cc9204bf 100644 --- a/cpp/include/raft/matrix/detail/gather.cuh +++ b/cpp/include/raft/matrix/detail/gather.cuh @@ -27,6 +27,8 @@ #include #include +#include + #include namespace raft { @@ -344,11 +346,14 @@ void gather_if(const InputIteratorT in, gatherImpl(in, D, N, map, stencil, map_length, out, pred_op, transform_op, stream); } -template -void gather_buff(host_matrix_view dataset, - host_vector_view indices, - IdxT offset, - pinned_matrix_view buff) +/** + * Helper function to gather a set of vectors from a (host) dataset. + */ +template +void gather_buff(host_matrix_view dataset, + host_vector_view indices, + MatIdxT offset, + pinned_matrix_view buff) { raft::common::nvtx::range fun_scope("gather_host_buff"); IdxT batch_size = std::min(buff.extent(0), indices.extent(0) - offset); @@ -362,47 +367,59 @@ void gather_buff(host_matrix_view dataset, } } -template +template void gather(raft::resources const& res, - host_matrix_view dataset, - device_vector_view indices, - raft::device_matrix_view output) + host_matrix_view dataset, + device_vector_view indices, + raft::device_matrix_view output) { raft::common::nvtx::range fun_scope("gather"); IdxT n_dim = output.extent(1); IdxT n_train = output.extent(0); - auto indices_host = raft::make_host_vector(n_train); + auto indices_host = raft::make_host_vector(n_train); raft::copy( indices_host.data_handle(), indices.data_handle(), n_train, resource::get_cuda_stream(res)); resource::sync_stream(res); - const size_t max_batch_size = 32768; + const size_t buffer_size = 32768 * 1024; // bytes + const size_t max_batch_size = + std::min(round_up_safe(buffer_size / n_dim, 32), n_train); + RAFT_LOG_DEBUG("Gathering data with batch size %zu", max_batch_size); + // Gather the vector on the host in tmp buffers. We use two buffers to overlap H2D sync // and gathering the data. - auto out_tmp1 = raft::make_pinned_matrix(res, max_batch_size, n_dim); - auto out_tmp2 = raft::make_pinned_matrix(res, max_batch_size, n_dim); - auto view1 = out_tmp1.view(); - auto view2 = out_tmp2.view(); + auto out_tmp1 = raft::make_pinned_matrix(res, max_batch_size, n_dim); + auto out_tmp2 = raft::make_pinned_matrix(res, max_batch_size, n_dim); + + // Usually a limited number of threads provide sufficient bandwidth for gathering data. + int n_threads = std::min(omp_get_max_threads(), 32); + + // The gather_buff function has a parallel for loop. We start the the omp parallel + // region here, to avoid repeated overhead within the device_offset loop. +#pragma omp parallel num_threads(n_threads) + { + auto view1 = out_tmp1.view(); + auto view2 = out_tmp2.view(); + gather_buff(dataset, make_const_mdspan(indices_host.view()), (MatIdxT)0, view1); + for (MatIdxT device_offset = 0; device_offset < n_train; device_offset += max_batch_size) { + MatIdxT batch_size = std::min(max_batch_size, n_train - device_offset); - gather_buff(dataset, make_const_mdspan(indices_host.view()), (IdxT)0, view1); -#pragma omp parallel - for (IdxT device_offset = 0; device_offset < n_train; device_offset += max_batch_size) { - IdxT batch_size = std::min(max_batch_size, n_train - device_offset); #pragma omp master - raft::copy(output.data_handle() + device_offset * n_dim, - view1.data_handle(), - batch_size * n_dim, - resource::get_cuda_stream(res)); - // Start gathering the next batch on the host. - IdxT host_offset = device_offset + batch_size; - batch_size = std::min(max_batch_size, n_train - host_offset); - if (batch_size > 0) { - gather_buff(dataset, make_const_mdspan(indices_host.view()), host_offset, view2); - } + raft::copy(output.data_handle() + device_offset * n_dim, + view1.data_handle(), + batch_size * n_dim, + resource::get_cuda_stream(res)); + // Start gathering the next batch on the host. + MatIdxT host_offset = device_offset + batch_size; + batch_size = std::min(max_batch_size, n_train - host_offset); + if (batch_size > 0) { + gather_buff(dataset, make_const_mdspan(indices_host.view()), host_offset, view2); + } #pragma omp master - resource::sync_stream(res); + resource::sync_stream(res); #pragma omp barrier - std::swap(view1, view2); + std::swap(view1, view2); + } } } From 84609de5adfb7fa7a25710939d3e29c3a27d1a1e Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 19 Mar 2024 00:33:59 +0100 Subject: [PATCH 17/21] Adjust comment --- cpp/include/raft/neighbors/detail/ivf_pq_build.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 3f3adba4f8..54b4d774d3 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -1684,8 +1684,7 @@ auto build(raft::resources const& handle, if constexpr (std::is_same_v) { raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset.view()); } else { - // TODO(tfeher): Enable codebook generation with any type T, and then remove - // trainset tmp. + // TODO(tfeher): Enable codebook generation with any type T, and then remove trainset tmp. auto trainset_tmp = make_device_mdarray( handle, &managed_mr, make_extents(n_rows_train, dim)); raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset_tmp.view()); From 6ab5f9a69f950fa383864f9a8bc0496078ed39b6 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 19 Mar 2024 15:12:21 +0100 Subject: [PATCH 18/21] Fix params for sample_rows --- cpp/include/raft/neighbors/detail/ivf_pq_build.cuh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 54b4d774d3..5c0e2cfee3 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -1682,12 +1682,14 @@ auto build(raft::resources const& handle, handle, device_mr, make_extents(n_rows_train, dim)); if constexpr (std::is_same_v) { - raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset.view()); + raft::matrix::detail::sample_rows( + handle, random_state, dataset, n_rows, trainset.view()); } else { // TODO(tfeher): Enable codebook generation with any type T, and then remove trainset tmp. auto trainset_tmp = make_device_mdarray( handle, &managed_mr, make_extents(n_rows_train, dim)); - raft::matrix::detail::sample_rows(handle, random_state, dataset, n_rows, trainset_tmp.view()); + raft::matrix::detail::sample_rows( + handle, random_state, dataset, n_rows, trainset_tmp.view()); raft::linalg::unaryOp(trainset.data_handle(), trainset_tmp.data_handle(), From f01fa61e4c358df0359ef6b9ce08ec4717f56b33 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 19 Mar 2024 15:24:55 +0100 Subject: [PATCH 19/21] Change IVF cluster warning messages to debug msg --- cpp/include/raft/cluster/detail/kmeans_balanced.cuh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 6d3f430e88..509638bbd4 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -881,10 +881,10 @@ auto build_fine_clusters(const raft::resources& handle, if (labels_mptr[j] == LabelT(i)) { mc_trainset_ids[k++] = j; } } if (k != static_cast(mesocluster_sizes[i])) - RAFT_LOG_WARN("Incorrect mesocluster size at %d. %zu vs %zu", - static_cast(i), - static_cast(k), - static_cast(mesocluster_sizes[i])); + RAFT_LOG_DEBUG("Incorrect mesocluster size at %d. %zu vs %zu", + static_cast(i), + static_cast(k), + static_cast(mesocluster_sizes[i])); if (k == 0) { RAFT_LOG_DEBUG("Empty cluster %d", i); RAFT_EXPECTS(fine_clusters_nums[i] == 0, @@ -1030,7 +1030,7 @@ void build_hierarchical(const raft::resources& handle, const IdxT mesocluster_size_max_balanced = div_rounding_up_safe( 2lu * size_t(n_rows), std::max(size_t(n_mesoclusters), 1lu)); if (mesocluster_size_max > mesocluster_size_max_balanced) { - RAFT_LOG_WARN( + RAFT_LOG_DEBUG( "build_hierarchical: built unbalanced mesoclusters (max_mesocluster_size == %u > %u). " "At most %u points will be used for training within each mesocluster. " "Consider increasing the number of training iterations `n_iters`.", From 28a0ed7a11efdd936702bbf7c6736a02a2ce0995 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 19 Mar 2024 20:19:17 +0100 Subject: [PATCH 20/21] Remove changes from ann_utils.cuh --- cpp/include/raft/spatial/knn/detail/ann_utils.cuh | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index 31a0f4ba8a..d862e586e3 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -16,17 +16,8 @@ #pragma once -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include #include #include #include From d88e2d3d620a052fe9d13bb0b1cc04322ff9a768 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 19 Mar 2024 20:28:33 +0100 Subject: [PATCH 21/21] allocate trainset usind default allocator --- cpp/include/raft/neighbors/detail/ivf_pq_build.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 5c0e2cfee3..ec4225ae16 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -1678,8 +1678,7 @@ auto build(raft::resources const& handle, // Besides just sampling, we transform the input dataset into floats to make it easier // to use gemm operations from cublas. - auto trainset = make_device_mdarray( - handle, device_mr, make_extents(n_rows_train, dim)); + auto trainset = make_device_matrix(handle, n_rows_train, dim); if constexpr (std::is_same_v) { raft::matrix::detail::sample_rows(