diff --git a/build.sh b/build.sh index 5a26b6eadf..6ccfada555 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libraft pylibraft raft-dask docs tests bench-prims clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -29,7 +29,6 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool= is: -v - verbose build mode @@ -75,8 +74,8 @@ INSTALL_TARGET=install BUILD_REPORT_METRICS="" BUILD_REPORT_INCL_CACHE_STATS=OFF -TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_BRUTE_FORCE_TEST;NEIGHBORS_ANN_CAGRA_TEST;NEIGHBORS_ANN_NN_DESCENT_TEST;NEIGHBORS_ANN_IVF_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;STATS_TEST;UTILS_TEST" -BENCH_TARGETS="CLUSTER_BENCH;CORE_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH" +TEST_TARGETS="CORE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;STATS_TEST;UTILS_TEST" +BENCH_TARGETS="CORE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH" CACHE_ARGS="" NVTX=ON @@ -485,11 +484,3 @@ if hasArg docs; then sphinx-build -b html source _html fi -################################################################################ -# Initiate build for example RAFT application template (if needed) - -if hasArg template; then - pushd ${REPODIR}/cpp/template - ./build.sh - popd -fi diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh deleted file mode 100644 index 86c0fa11b6..0000000000 --- a/conda/recipes/libraft/build_libraft_template.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2022-2024, NVIDIA CORPORATION. - -# Just building template so we verify it uses libraft.so and fail if it doesn't build -./build.sh template --no-nvtx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e0ea893edf..a497872354 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -272,277 +272,11 @@ if(RAFT_COMPILE_LIBRARY) add_library( raft_objs OBJECT src/core/logger.cpp - src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu - src/distance/detail/pairwise_matrix/dispatch_rbf.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu - src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu - src/distance/distance.cu - src/distance/fused_l2_nn.cu - src/distance/fused_distance_nn.cu src/linalg/detail/coalesced_reduction.cu - src/matrix/detail/select_k_double_int64_t.cu - src/matrix/detail/select_k_double_uint32_t.cu - src/matrix/detail/select_k_float_int64_t.cu - src/matrix/detail/select_k_float_uint32_t.cu - src/matrix/detail/select_k_float_int32.cu - src/matrix/detail/select_k_half_int64_t.cu - src/matrix/detail/select_k_half_uint32_t.cu - src/neighbors/ball_cover.cu - src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu - src/neighbors/brute_force_knn_int64_t_float_int64_t.cu - src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu - src/neighbors/brute_force_knn_int_float_int.cu - src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu - src/neighbors/brute_force_knn_index_float.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu - src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu - src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu - src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu - src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu - src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu - src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu - src/neighbors/detail/ivf_flat_search.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu - src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu - src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu - src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu - src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu - src/neighbors/detail/refine_host_float_float.cpp - src/neighbors/detail/refine_host_half_float.cpp - src/neighbors/detail/refine_host_int8_t_float.cpp - src/neighbors/detail/refine_host_uint8_t_float.cpp - src/neighbors/ivf_flat_build_float_int64_t.cu - src/neighbors/ivf_flat_build_int8_t_int64_t.cu - src/neighbors/ivf_flat_build_uint8_t_int64_t.cu - src/neighbors/ivf_flat_extend_float_int64_t.cu - src/neighbors/ivf_flat_extend_int8_t_int64_t.cu - src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu - src/neighbors/ivf_flat_search_float_int64_t.cu - src/neighbors/ivf_flat_search_int8_t_int64_t.cu - src/neighbors/ivf_flat_search_uint8_t_int64_t.cu - src/neighbors/ivfpq_build_float_int64_t.cu - src/neighbors/ivfpq_build_half_int64_t.cu - src/neighbors/ivfpq_build_int8_t_int64_t.cu - src/neighbors/ivfpq_build_uint8_t_int64_t.cu - src/neighbors/ivfpq_extend_float_int64_t.cu - src/neighbors/ivfpq_extend_half_int64_t.cu - src/neighbors/ivfpq_extend_int8_t_int64_t.cu - src/neighbors/ivfpq_extend_uint8_t_int64_t.cu - src/neighbors/ivfpq_search_float_int64_t.cu - src/neighbors/ivfpq_search_half_int64_t.cu - src/neighbors/ivfpq_search_int8_t_int64_t.cu - src/neighbors/ivfpq_search_uint8_t_int64_t.cu - src/neighbors/refine_float_float.cu - src/neighbors/refine_half_float.cu - src/neighbors/refine_int8_t_float.cu - src/neighbors/refine_uint8_t_float.cu - src/raft_runtime/cluster/cluster_cost.cuh - src/raft_runtime/cluster/cluster_cost_double.cu - src/raft_runtime/cluster/cluster_cost_float.cu - src/raft_runtime/cluster/kmeans_fit_double.cu - src/raft_runtime/cluster/kmeans_fit_float.cu - src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu - src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu - src/raft_runtime/cluster/update_centroids.cuh - src/raft_runtime/cluster/update_centroids_double.cu - src/raft_runtime/cluster/update_centroids_float.cu - src/raft_runtime/distance/fused_distance_min_arg.cu - src/raft_runtime/distance/fused_l2_min_arg.cu - src/raft_runtime/distance/pairwise_distance.cu - src/raft_runtime/matrix/select_k_float_int64_t.cu - src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu - src/raft_runtime/neighbors/cagra_build.cu - src/raft_runtime/neighbors/cagra_search.cu - src/raft_runtime/neighbors/cagra_serialize.cu - src/raft_runtime/neighbors/eps_neighborhood.cu - $<$:src/raft_runtime/neighbors/hnsw.cpp> - src/raft_runtime/neighbors/ivf_flat_build.cu - src/raft_runtime/neighbors/ivf_flat_search.cu - src/raft_runtime/neighbors/ivf_flat_serialize.cu - src/raft_runtime/neighbors/ivfpq_build.cu - src/raft_runtime/neighbors/ivfpq_deserialize.cu - src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu - src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu - src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu - src/raft_runtime/neighbors/ivfpq_serialize.cu - src/raft_runtime/neighbors/refine_d_int64_t_float.cu - src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu - src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu - src/raft_runtime/neighbors/refine_h_int64_t_float.cu - src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu - src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu src/raft_runtime/random/rmat_rectangular_generator_int_double.cu src/raft_runtime/random/rmat_rectangular_generator_int_float.cu - src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu - src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu - src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu - src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu - src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu ) set_target_properties( raft_objs diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index 52c63ad73b..a27adada23 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -74,48 +74,13 @@ function(ConfigureBench) endfunction() if(BUILD_PRIMS_BENCH) - ConfigureBench( - NAME - CORE_BENCH - PATH - core/bitset.cu - core/copy.cu - main.cpp - ) + ConfigureBench(NAME CORE_BENCH PATH core/bitset.cu core/copy.cu main.cpp) - ConfigureBench( - NAME - UTIL_BENCH - PATH - util/popc.cu - main.cpp - ) - - ConfigureBench( - NAME CLUSTER_BENCH PATH cluster/kmeans_balanced.cu cluster/kmeans.cu - main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY - ) + ConfigureBench(NAME UTIL_BENCH PATH util/popc.cu main.cpp) ConfigureBench( - NAME TUNE_DISTANCE PATH distance/tune_pairwise/kernel.cu - distance/tune_pairwise/bench.cu main.cpp - ) - - ConfigureBench( - NAME - DISTANCE_BENCH - PATH - distance/distance_cosine.cu - distance/distance_exp_l2.cu - distance/distance_l1.cu - distance/distance_unexp_l2.cu - distance/fused_l2_nn.cu - distance/masked_nn.cu - distance/kernels.cu + NAME TUNE_DISTANCE PATH distance/tune_pairwise/kernel.cu distance/tune_pairwise/bench.cu main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY ) ConfigureBench( @@ -137,54 +102,18 @@ if(BUILD_PRIMS_BENCH) ) ConfigureBench( - NAME MATRIX_BENCH PATH matrix/argmin.cu matrix/gather.cu - matrix/select_k.cu main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME RANDOM_BENCH PATH random/make_blobs.cu random/permute.cu - random/rng.cu random/subsample.cu main.cpp + NAME MATRIX_BENCH PATH matrix/argmin.cu matrix/gather.cu matrix/select_k.cu main.cpp OPTIONAL + LIB EXPLICIT_INSTANTIATE_ONLY ) ConfigureBench( - NAME - SPARSE_BENCH - PATH - sparse/bitmap_to_csr.cu - sparse/convert_csr.cu - sparse/select_k_csr.cu + NAME RANDOM_BENCH PATH random/make_blobs.cu random/permute.cu random/rng.cu random/subsample.cu main.cpp ) ConfigureBench( - NAME - NEIGHBORS_BENCH - PATH - neighbors/knn/brute_force_float_int64_t.cu - neighbors/knn/brute_force_float_uint32_t.cu - neighbors/knn/cagra_float_uint32_t.cu - neighbors/knn/ivf_flat_filter_float_int64_t.cu - neighbors/knn/ivf_flat_float_int64_t.cu - neighbors/knn/ivf_flat_int8_t_int64_t.cu - neighbors/knn/ivf_flat_uint8_t_int64_t.cu - neighbors/knn/ivf_pq_float_int64_t.cu - neighbors/knn/ivf_pq_filter_float_int64_t.cu - neighbors/knn/ivf_pq_int8_t_int64_t.cu - neighbors/knn/ivf_pq_uint8_t_int64_t.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu - neighbors/refine_float_int64_t.cu - neighbors/refine_uint8_t_int64_t.cu + NAME SPARSE_BENCH PATH sparse/bitmap_to_csr.cu sparse/convert_csr.cu sparse/select_k_csr.cu main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY ) endif() diff --git a/cpp/bench/prims/cluster/kmeans.cu b/cpp/bench/prims/cluster/kmeans.cu deleted file mode 100644 index 6387211135..0000000000 --- a/cpp/bench/prims/cluster/kmeans.cu +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -namespace raft::bench::cluster { - -struct KMeansBenchParams { - DatasetParams data; - BlobsParams blobs; - raft::cluster::KMeansParams kmeans; -}; - -inline auto operator<<(std::ostream& os, const KMeansBenchParams& p) -> std::ostream& -{ - os << p.data.rows << "#" << p.data.cols << "#" << p.kmeans.n_clusters; - return os; -} - -template -struct KMeans : public BlobsFixture { - KMeans(const KMeansBenchParams& p) - : BlobsFixture(p.data, p.blobs), - params(p), - centroids(this->handle), - labels(this->handle) - { - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - raft::device_matrix_view X_view = this->X.view(); - std::optional> opt_weights_view = std::nullopt; - std::optional> centroids_view = - std::make_optional>(centroids.view()); - raft::device_vector_view labels_view = labels.view(); - raft::host_scalar_view inertia_view = raft::make_host_scalar_view(&inertia); - raft::host_scalar_view n_iter_view = raft::make_host_scalar_view(&n_iter); - - this->loop_on_state(state, [&]() { - raft::cluster::kmeans_fit_predict(this->handle, - params.kmeans, - X_view, - opt_weights_view, - centroids_view, - labels_view, - inertia_view, - n_iter_view); - }); - } - - void allocate_temp_buffers(const ::benchmark::State& state) override - { - centroids = - raft::make_device_matrix(this->handle, params.kmeans.n_clusters, params.data.cols); - labels = raft::make_device_vector(this->handle, params.data.rows); - } - - private: - KMeansBenchParams params; - raft::device_matrix centroids; - raft::device_vector labels; - T inertia; - IndexT n_iter; -}; // struct KMeans - -std::vector getKMeansInputs() -{ - std::vector out; - KMeansBenchParams p; - p.data.row_major = true; - p.blobs.cluster_std = 1.0; - p.blobs.shuffle = false; - p.blobs.center_box_min = -10.0; - p.blobs.center_box_max = 10.0; - p.blobs.seed = 12345ULL; - p.kmeans.init = raft::cluster::KMeansParams::KMeansPlusPlus; - p.kmeans.max_iter = 300; - p.kmeans.tol = 1e-4; - p.kmeans.verbosity = RAFT_LEVEL_INFO; - p.kmeans.metric = raft::distance::DistanceType::L2Expanded; - p.kmeans.inertia_check = true; - std::vector> row_cols_k = { - {1000000, 20, 1000}, - {3000000, 50, 20}, - {10000000, 50, 5}, - }; - for (auto& rck : row_cols_k) { - p.data.rows = std::get<0>(rck); - p.data.cols = std::get<1>(rck); - p.blobs.n_clusters = std::get<2>(rck); - p.kmeans.n_clusters = std::get<2>(rck); - out.push_back(p); - } - return out; -} - -// note(lsugy): commenting out int64_t because the templates are not compiled in the distance -// library, resulting in long compilation times. -RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs()); -RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs()); -// RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs()); -// RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs()); - -} // namespace raft::bench::cluster diff --git a/cpp/bench/prims/cluster/kmeans_balanced.cu b/cpp/bench/prims/cluster/kmeans_balanced.cu deleted file mode 100644 index dc05783989..0000000000 --- a/cpp/bench/prims/cluster/kmeans_balanced.cu +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include - -namespace raft::bench::cluster { - -struct KMeansBalancedBenchParams { - DatasetParams data; - uint32_t n_lists; - raft::cluster::kmeans_balanced_params kb_params; -}; - -template -struct KMeansBalanced : public fixture { - KMeansBalanced(const KMeansBalancedBenchParams& p) : params(p), X(handle), centroids(handle) {} - - void run_benchmark(::benchmark::State& state) override - { - this->loop_on_state(state, [this]() { - raft::device_matrix_view X_view = this->X.view(); - raft::device_matrix_view centroids_view = this->centroids.view(); - raft::cluster::kmeans_balanced::fit( - this->handle, this->params.kb_params, X_view, centroids_view); - }); - } - - void allocate_data(const ::benchmark::State& state) override - { - X = raft::make_device_matrix(handle, params.data.rows, params.data.cols); - - raft::random::RngState rng{1234}; - constexpr T kRangeMax = std::is_integral_v ? std::numeric_limits::max() : T(1); - constexpr T kRangeMin = std::is_integral_v ? std::numeric_limits::min() : T(-1); - if constexpr (std::is_integral_v) { - raft::random::uniformInt( - handle, rng, X.data_handle(), params.data.rows * params.data.cols, kRangeMin, kRangeMax); - } else { - raft::random::uniform( - handle, rng, X.data_handle(), params.data.rows * params.data.cols, kRangeMin, kRangeMax); - } - resource::sync_stream(handle, stream); - } - - void allocate_temp_buffers(const ::benchmark::State& state) override - { - centroids = - raft::make_device_matrix(this->handle, params.n_lists, params.data.cols); - } - - private: - KMeansBalancedBenchParams params; - raft::device_matrix X; - raft::device_matrix centroids; -}; // struct KMeansBalanced - -std::vector getKMeansBalancedInputs() -{ - std::vector out; - KMeansBalancedBenchParams p; - p.data.row_major = true; - p.kb_params.n_iters = 20; - p.kb_params.metric = raft::distance::DistanceType::L2Expanded; - std::vector> row_cols = { - {100000, 128}, {1000000, 128}, {10000000, 128}, - // The following dataset sizes are too large for most GPUs. - // {100000000, 128}, - }; - for (auto& rc : row_cols) { - p.data.rows = rc.first; - p.data.cols = rc.second; - for (auto n_lists : std::vector({1000, 10000, 100000})) { - p.n_lists = n_lists; - out.push_back(p); - } - } - return out; -} - -// Note: the datasets sizes are too large for 32-bit index types. -RAFT_BENCH_REGISTER((KMeansBalanced), "", getKMeansBalancedInputs()); - -} // namespace raft::bench::cluster diff --git a/cpp/bench/prims/distance/distance_common.cuh b/cpp/bench/prims/distance/distance_common.cuh deleted file mode 100644 index 8368062168..0000000000 --- a/cpp/bench/prims/distance/distance_common.cuh +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include - -namespace raft::bench::distance { - -struct distance_params { - int m, n, k; - bool isRowMajor; -}; // struct distance_params - -template -struct distance : public fixture { - distance(const distance_params& p) - : params(p), - x(p.m * p.k, stream), - y(p.n * p.k, stream), - out(p.m * p.n, stream), - workspace(0, stream) - { - RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream)); - RAFT_CUDA_TRY(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream)); - worksize = raft::distance::getWorkspaceSize( - x.data(), y.data(), params.m, params.n, params.k); - workspace.resize(worksize, stream); - } - - void run_benchmark(::benchmark::State& state) override - { - loop_on_state(state, [this]() { - raft::distance::distance(handle, - x.data(), - y.data(), - out.data(), - params.m, - params.n, - params.k, - (void*)workspace.data(), - worksize, - params.isRowMajor); - }); - } - - private: - distance_params params; - rmm::device_uvector x, y, out; - rmm::device_uvector workspace; - size_t worksize; -}; // struct Distance - -const std::vector dist_input_vecs{ - {32, 16384, 16384, true}, {64, 16384, 16384, true}, {128, 16384, 16384, true}, - {256, 16384, 16384, true}, {512, 16384, 16384, true}, {1024, 16384, 16384, true}, - {16384, 32, 16384, true}, {16384, 64, 16384, true}, {16384, 128, 16384, true}, - {16384, 256, 16384, true}, {16384, 512, 16384, true}, {16384, 1024, 16384, true}, - {16384, 16384, 32, true}, {16384, 16384, 64, true}, {16384, 16384, 128, true}, - {16384, 16384, 256, true}, {16384, 16384, 512, true}, {16384, 16384, 1024, true}, - {16384, 16384, 16384, true}, {32, 16384, 16384, false}, {64, 16384, 16384, false}, - {128, 16384, 16384, false}, {256, 16384, 16384, false}, {512, 16384, 16384, false}, - {1024, 16384, 16384, false}, {16384, 32, 16384, false}, {16384, 64, 16384, false}, - {16384, 128, 16384, false}, {16384, 256, 16384, false}, {16384, 512, 16384, false}, - {16384, 1024, 16384, false}, {16384, 16384, 32, false}, {16384, 16384, 64, false}, - {16384, 16384, 128, false}, {16384, 16384, 256, false}, {16384, 16384, 512, false}, - {16384, 16384, 1024, false}, {16384, 16384, 16384, false} - -}; - -#define DIST_BENCH_REGISTER(Name, Metric) \ - using Name##F = distance; \ - RAFT_BENCH_REGISTER(Name##F, "", dist_input_vecs); \ - using Name##D = distance; \ - RAFT_BENCH_REGISTER(Name##D, "", dist_input_vecs); - -} // namespace raft::bench::distance diff --git a/cpp/bench/prims/distance/distance_cosine.cu b/cpp/bench/prims/distance/distance_cosine.cu deleted file mode 100644 index c8ac8067c8..0000000000 --- a/cpp/bench/prims/distance/distance_cosine.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "distance_common.cuh" - -namespace raft::bench::distance { - -DIST_BENCH_REGISTER(DistanceCosine, raft::distance::DistanceType::CosineExpanded); - -} // namespace raft::bench::distance diff --git a/cpp/bench/prims/distance/distance_exp_l2.cu b/cpp/bench/prims/distance/distance_exp_l2.cu deleted file mode 100644 index 52b7fff05c..0000000000 --- a/cpp/bench/prims/distance/distance_exp_l2.cu +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "distance_common.cuh" - -namespace raft::bench::distance { - -DIST_BENCH_REGISTER(DistanceL2Sq, raft::distance::DistanceType::L2Expanded); -DIST_BENCH_REGISTER(DistanceL2Sqrt, raft::distance::DistanceType::L2SqrtExpanded); - -} // namespace raft::bench::distance diff --git a/cpp/bench/prims/distance/distance_l1.cu b/cpp/bench/prims/distance/distance_l1.cu deleted file mode 100644 index e80db48ef0..0000000000 --- a/cpp/bench/prims/distance/distance_l1.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "distance_common.cuh" - -namespace raft::bench::distance { - -DIST_BENCH_REGISTER(DistanceL1, raft::distance::DistanceType::L1); - -} // namespace raft::bench::distance diff --git a/cpp/bench/prims/distance/distance_unexp_l2.cu b/cpp/bench/prims/distance/distance_unexp_l2.cu deleted file mode 100644 index 7ac1a8a4b5..0000000000 --- a/cpp/bench/prims/distance/distance_unexp_l2.cu +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "distance_common.cuh" - -namespace raft::bench::distance { - -DIST_BENCH_REGISTER(DistanceUnexpL2Sq, raft::distance::DistanceType::L2Unexpanded); -DIST_BENCH_REGISTER(DistanceUnexpL2Sqrt, raft::distance::DistanceType::L2SqrtUnexpanded); - -} // namespace raft::bench::distance diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/prims/distance/fused_l2_nn.cu deleted file mode 100644 index a263bef6ba..0000000000 --- a/cpp/bench/prims/distance/fused_l2_nn.cu +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include - -#include - -namespace raft::bench::distance { - -struct fusedl2nn_inputs { - int64_t m, n, k; -}; // struct fusedl2nn_inputs - -inline auto operator<<(std::ostream& os, const fusedl2nn_inputs& p) -> std::ostream& -{ - os << p.m << "#" << p.n << "#" << p.k; - return os; -} - -template -struct fusedl2nn : public fixture { - fusedl2nn(const fusedl2nn_inputs& p) - : params(p), - workspace(this->handle), - x(this->handle), - y(this->handle), - x_norm(this->handle), - y_norm(this->handle), - out(this->handle) - { - } - - void allocate_data(const ::benchmark::State& state) override - { - x = raft::make_device_matrix(handle, params.m, params.k); - y = raft::make_device_matrix(handle, params.n, params.k); - x_norm = raft::make_device_vector(handle, params.m); - y_norm = raft::make_device_vector(handle, params.n); - out = raft::make_device_vector(handle, params.m); - - raft::random::RngState rng{1234}; - raft::random::uniform( - handle, rng, x.data_handle(), params.m * params.k, (DataT)-1.0, (DataT)1.0); - raft::random::uniform( - handle, rng, y.data_handle(), params.n * params.k, (DataT)-1.0, (DataT)1.0); - - // Pre-compute norms - raft::linalg::rowNorm(x_norm.data_handle(), - x.data_handle(), - params.k, - params.m, - raft::linalg::L2Norm, - true, - stream); - raft::linalg::rowNorm(y_norm.data_handle(), - y.data_handle(), - params.k, - params.n, - raft::linalg::L2Norm, - true, - stream); - resource::sync_stream(handle, stream); - } - - void allocate_temp_buffers(const ::benchmark::State& state) override - { - workspace = raft::make_device_vector(handle, params.m * sizeof(IdxT)); - } - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << params; - state.SetLabel(label_stream.str()); - - loop_on_state(state, [this]() { - raft::distance::fusedL2NNMinReduce(out.data_handle(), - x.data_handle(), - y.data_handle(), - x_norm.data_handle(), - y_norm.data_handle(), - static_cast(params.m), - static_cast(params.n), - static_cast(params.k), - (void*)workspace.data_handle(), - false, - true, - stream); - }); - - int64_t num_flops = 2 * params.m * params.n * params.k; - - int64_t read_elts = params.n * params.k + params.m * params.k; - int64_t write_elts = params.m; - - state.counters["FLOP/s"] = benchmark::Counter( - num_flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1000); - - state.counters["BW Wr"] = benchmark::Counter(write_elts * sizeof(OutT), - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - state.counters["BW Rd"] = benchmark::Counter(read_elts * sizeof(DataT), - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - } - - private: - fusedl2nn_inputs params; - raft::device_matrix x, y; - raft::device_vector x_norm, y_norm; - raft::device_vector out; - raft::device_vector workspace; -}; // struct fusedl2nn - -template -std::vector getFusedL2NNInputs() -{ - std::vector inputs; - std::vector m_list = {100000, 1000000}; - if constexpr (sizeof(IdxT) == 8) { m_list.push_back(10000000); } - std::vector n_list = {100, 1000, 10000}; - std::vector k_list = {64, 128, 256}; - for (auto m : m_list) { - for (auto n : n_list) { - for (auto k : k_list) { - inputs.push_back({m, n, k}); - } - } - } - return inputs; -} - -#define FUSEDL2NN_BENCH(DataT, IdxT, OutT) \ - RAFT_BENCH_REGISTER((fusedl2nn), "", getFusedL2NNInputs()) - -FUSEDL2NN_BENCH(float, int, float); -FUSEDL2NN_BENCH(double, int, double); -FUSEDL2NN_BENCH(float, int, (raft::KeyValuePair)); -FUSEDL2NN_BENCH(double, int, (raft::KeyValuePair)); -FUSEDL2NN_BENCH(float, int64_t, float); -FUSEDL2NN_BENCH(double, int64_t, double); -FUSEDL2NN_BENCH(float, int64_t, (raft::KeyValuePair)); -FUSEDL2NN_BENCH(double, int64_t, (raft::KeyValuePair)); - -} // namespace raft::bench::distance diff --git a/cpp/bench/prims/distance/kernels.cu b/cpp/bench/prims/distance/kernels.cu deleted file mode 100644 index eb86330637..0000000000 --- a/cpp/bench/prims/distance/kernels.cu +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace raft::bench::distance::kernels { - -using namespace raft::distance::kernels; -struct GramTestParams { - int m; // m parameter of the GEMM - int k; // k parameter of the GEMM - int n; // n parameter of the GEMM - KernelParams kernel_params; - bool is_row_major; -}; // struct GramTestParams - -template -struct GramMatrix : public fixture { - GramMatrix(const GramTestParams& p) - : params(p), handle(stream), A(0, stream), B(0, stream), C(0, stream) - { - kernel = std::unique_ptr>( - KernelFactory::create(p.kernel_params, resource::get_cublas_handle(handle))); - - A.resize(params.m * params.k, stream); - B.resize(params.k * params.n, stream); - C.resize(params.m * params.n, stream); - raft::random::RngState rng(123456ULL); - raft::random::uniform(handle, rng, A.data(), params.m * params.k, T(-1.0), T(1.0)); - raft::random::uniform(handle, rng, B.data(), params.k * params.n, T(-1.0), T(1.0)); - } - - ~GramMatrix() - { - A.release(); - B.release(); - C.release(); - } - - void run_benchmark(::benchmark::State& state) override - { - if (!this->kernel) { state.SkipWithError("Kernel matrix is not initialized"); } - loop_on_state(state, [this]() { - (*this->kernel)(A.data(), - this->params.m, - this->params.k, - B.data(), - this->params.n, - C.data(), - this->params.is_row_major, - this->stream); - }); - } - - private: - const raft::device_resources handle; - std::unique_ptr> kernel; - GramTestParams params; - - rmm::device_uvector A; // input matrix A, size [m * k] - rmm::device_uvector B; // input matrix B, size [n * k] - rmm::device_uvector C; // output matrix C, size [m*n] -}; - -static std::vector getInputs() -{ - std::vector param_vec; - std::vector kernel_params{KernelParams{LINEAR, 3, 1, 0}, - KernelParams{POLYNOMIAL, 2, 1.3, 1}, - KernelParams{TANH, 2, 0.5, 2.4}, - KernelParams{RBF, 2, 0.5, 0}}; - struct TestSize { - int m; - int k; - int n; - }; - std::vector data_size{{4096, 10, 1024}, - {4096, 100, 1024}, - {4096, 1000, 1024}, - {4096, 10000, 1024}, - {100000, 10, 1024}, - {100000, 100, 1024}, - {100000, 1000, 1024}}; - - param_vec.reserve(kernel_params.size() * data_size.size()); - for (TestSize s : data_size) { - for (auto kernel : kernel_params) { - for (bool row_major : {false, true}) { - param_vec.push_back(GramTestParams{s.m, s.k, s.n, kernel, row_major}); - } - } - } - return param_vec; -} - -RAFT_BENCH_REGISTER(GramMatrix, "", getInputs()); -RAFT_BENCH_REGISTER(GramMatrix, "", getInputs()); - -} // namespace raft::bench::distance::kernels diff --git a/cpp/bench/prims/distance/masked_nn.cu b/cpp/bench/prims/distance/masked_nn.cu deleted file mode 100644 index 979d438b67..0000000000 --- a/cpp/bench/prims/distance/masked_nn.cu +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace raft::bench::distance::masked_nn { - -// Introduce various sparsity patterns -enum AdjacencyPattern { - checkerboard = 0, - checkerboard_4 = 1, - checkerboard_64 = 2, - all_true = 3, - all_false = 4 -}; - -struct Params { - int m, n, k, num_groups; - AdjacencyPattern pattern; -}; // struct Params - -RAFT_KERNEL init_adj(AdjacencyPattern pattern, - int n, - raft::device_matrix_view adj, - raft::device_vector_view group_idxs) -{ - int m = adj.extent(0); - int num_groups = adj.extent(1); - - for (int idx_m = blockIdx.y * blockDim.y + threadIdx.y; idx_m < m; - idx_m += blockDim.y * gridDim.y) { - for (int idx_g = blockIdx.x * blockDim.x + threadIdx.x; idx_g < num_groups; - idx_g += blockDim.x * gridDim.x) { - switch (pattern) { - case checkerboard: adj(idx_m, idx_g) = (idx_m + idx_g) % 2; break; - case checkerboard_4: adj(idx_m, idx_g) = (idx_m / 4 + idx_g) % 2; break; - case checkerboard_64: adj(idx_m, idx_g) = (idx_m / 64 + idx_g) % 2; break; - case all_true: adj(idx_m, idx_g) = true; break; - case all_false: adj(idx_m, idx_g) = false; break; - default: assert(false && "unknown pattern"); - } - } - } - // Each group is of size n / num_groups. - // - // - group_idxs[j] indicates the start of group j + 1 (i.e. is the inclusive - // scan of the group lengths) - // - // - The first group always starts at index zero, so we do not store it. - // - // - The group_idxs[num_groups - 1] should always equal n. - - if (blockIdx.y == 0 && threadIdx.y == 0) { - const int g_stride = blockDim.x * gridDim.x; - for (int idx_g = blockIdx.x * blockDim.x + threadIdx.x; idx_g < num_groups; idx_g += g_stride) { - group_idxs(idx_g) = (idx_g + 1) * (n / num_groups); - } - group_idxs(num_groups - 1) = n; - } -} - -template -struct masked_l2_nn : public fixture { - using DataT = T; - using IdxT = int; - using OutT = raft::KeyValuePair; - using RedOpT = raft::distance::MinAndDistanceReduceOp; - using PairRedOpT = raft::distance::KVPMinReduce; - using ParamT = raft::distance::masked_l2_nn_params; - - // Parameters - Params params; - // Data - raft::device_vector out; - raft::device_matrix x, y; - raft::device_vector xn, yn; - raft::device_matrix adj; - raft::device_vector group_idxs; - - masked_l2_nn(const Params& p) - : params(p), - out{raft::make_device_vector(handle, p.m)}, - x{raft::make_device_matrix(handle, p.m, p.k)}, - y{raft::make_device_matrix(handle, p.n, p.k)}, - xn{raft::make_device_vector(handle, p.m)}, - yn{raft::make_device_vector(handle, p.n)}, - adj{raft::make_device_matrix(handle, p.m, p.num_groups)}, - group_idxs{raft::make_device_vector(handle, p.num_groups)} - { - raft::random::RngState r(123456ULL); - - uniform(handle, r, x.data_handle(), p.m * p.k, T(-1.0), T(1.0)); - uniform(handle, r, y.data_handle(), p.n * p.k, T(-1.0), T(1.0)); - raft::linalg::rowNorm( - xn.data_handle(), x.data_handle(), p.k, p.m, raft::linalg::L2Norm, true, stream); - raft::linalg::rowNorm( - yn.data_handle(), y.data_handle(), p.k, p.n, raft::linalg::L2Norm, true, stream); - raft::distance::initialize, int>( - handle, out.data_handle(), p.m, std::numeric_limits::max(), RedOpT{}); - - dim3 block(32, 32); - dim3 grid(10, 10); - init_adj<<>>(p.pattern, p.n, adj.view(), group_idxs.view()); - RAFT_CUDA_TRY(cudaGetLastError()); - } - - void run_benchmark(::benchmark::State& state) override - { - bool init_out = true; - bool sqrt = false; - ParamT masked_l2_params{RedOpT{}, PairRedOpT{}, sqrt, init_out}; - - loop_on_state(state, [this, masked_l2_params]() { - // It is sufficient to only benchmark the L2-squared metric - raft::distance::masked_l2_nn(handle, - masked_l2_params, - x.view(), - y.view(), - xn.view(), - yn.view(), - adj.view(), - group_idxs.view(), - out.view()); - }); - - // Virtual flop count if no skipping had occurred. - size_t virtual_flops = size_t(2) * size_t(params.m) * size_t(params.n) * size_t(params.k); - - int64_t read_elts = params.n * params.k + params.m * params.k; - int64_t write_elts = params.m; - - // Virtual min flops is the number of flops that would have been executed if - // the algorithm had actually skipped each computation that it could have - // skipped. - size_t virtual_min_flops = 0; - switch (params.pattern) { - case checkerboard: - case checkerboard_4: - case checkerboard_64: virtual_min_flops = virtual_flops / 2; break; - case all_true: virtual_min_flops = virtual_flops; break; - case all_false: virtual_min_flops = 0; break; - default: assert(false && "unknown pattern"); - } - - // VFLOP/s is the "virtual" flop count that would have executed if there was - // no adjacency pattern. This is useful for comparing to fusedL2NN - state.counters["VFLOP/s"] = benchmark::Counter(virtual_flops, - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - // Virtual min flops is the number of flops that would have been executed if - // the algorithm had actually skipped each computation that it could have - // skipped. - state.counters["VminFLOP/s"] = benchmark::Counter(virtual_min_flops, - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - - state.counters["BW Wr"] = benchmark::Counter(write_elts * sizeof(OutT), - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - state.counters["BW Rd"] = benchmark::Counter(read_elts * sizeof(DataT), - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - - state.counters["m"] = benchmark::Counter(params.m); - state.counters["n"] = benchmark::Counter(params.n); - state.counters["k"] = benchmark::Counter(params.k); - state.counters["num_groups"] = benchmark::Counter(params.num_groups); - state.counters["group size"] = benchmark::Counter(params.n / params.num_groups); - state.counters["Pat"] = benchmark::Counter(static_cast(params.pattern)); - - state.counters["SM count"] = raft::getMultiProcessorCount(); - } -}; - -const std::vector masked_l2_nn_input_vecs = { - // Very fat matrices... - {32, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - {64, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - {128, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - {256, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - {512, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - {1024, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - {16384, 32, 16384, 32, AdjacencyPattern::checkerboard}, - {16384, 64, 16384, 32, AdjacencyPattern::checkerboard}, - {16384, 128, 16384, 32, AdjacencyPattern::checkerboard}, - {16384, 256, 16384, 32, AdjacencyPattern::checkerboard}, - {16384, 512, 16384, 32, AdjacencyPattern::checkerboard}, - {16384, 1024, 16384, 32, AdjacencyPattern::checkerboard}, - - // Representative matrices... - {16384, 16384, 32, 32, AdjacencyPattern::checkerboard}, - {16384, 16384, 64, 32, AdjacencyPattern::checkerboard}, - {16384, 16384, 128, 32, AdjacencyPattern::checkerboard}, - {16384, 16384, 256, 32, AdjacencyPattern::checkerboard}, - {16384, 16384, 512, 32, AdjacencyPattern::checkerboard}, - {16384, 16384, 1024, 32, AdjacencyPattern::checkerboard}, - {16384, 16384, 16384, 32, AdjacencyPattern::checkerboard}, - - {16384, 16384, 32, 32, AdjacencyPattern::checkerboard_4}, - {16384, 16384, 64, 32, AdjacencyPattern::checkerboard_4}, - {16384, 16384, 128, 32, AdjacencyPattern::checkerboard_4}, - {16384, 16384, 256, 32, AdjacencyPattern::checkerboard_4}, - {16384, 16384, 512, 32, AdjacencyPattern::checkerboard_4}, - {16384, 16384, 1024, 32, AdjacencyPattern::checkerboard_4}, - {16384, 16384, 16384, 32, AdjacencyPattern::checkerboard_4}, - - {16384, 16384, 32, 32, AdjacencyPattern::checkerboard_64}, - {16384, 16384, 64, 32, AdjacencyPattern::checkerboard_64}, - {16384, 16384, 128, 32, AdjacencyPattern::checkerboard_64}, - {16384, 16384, 256, 32, AdjacencyPattern::checkerboard_64}, - {16384, 16384, 512, 32, AdjacencyPattern::checkerboard_64}, - {16384, 16384, 1024, 32, AdjacencyPattern::checkerboard_64}, - {16384, 16384, 16384, 32, AdjacencyPattern::checkerboard_64}, - - {16384, 16384, 32, 32, AdjacencyPattern::all_true}, - {16384, 16384, 64, 32, AdjacencyPattern::all_true}, - {16384, 16384, 128, 32, AdjacencyPattern::all_true}, - {16384, 16384, 256, 32, AdjacencyPattern::all_true}, - {16384, 16384, 512, 32, AdjacencyPattern::all_true}, - {16384, 16384, 1024, 32, AdjacencyPattern::all_true}, - {16384, 16384, 16384, 32, AdjacencyPattern::all_true}, - - {16384, 16384, 32, 32, AdjacencyPattern::all_false}, - {16384, 16384, 64, 32, AdjacencyPattern::all_false}, - {16384, 16384, 128, 32, AdjacencyPattern::all_false}, - {16384, 16384, 256, 32, AdjacencyPattern::all_false}, - {16384, 16384, 512, 32, AdjacencyPattern::all_false}, - {16384, 16384, 1024, 32, AdjacencyPattern::all_false}, - {16384, 16384, 16384, 32, AdjacencyPattern::all_false}, -}; - -RAFT_BENCH_REGISTER(masked_l2_nn, "", masked_l2_nn_input_vecs); -// We don't benchmark double to keep compile times in check when not using the -// distance library. - -} // namespace raft::bench::distance::masked_nn diff --git a/cpp/bench/prims/distance/tune_pairwise/bench.cu b/cpp/bench/prims/distance/tune_pairwise/bench.cu deleted file mode 100644 index 81105cdefe..0000000000 --- a/cpp/bench/prims/distance/tune_pairwise/bench.cu +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Tuning benchmarks. -// -// Goals: -// -// 1. Fast compile times to maintain iteration speed. -// 2. Create benchmarks that can inform the design of the kernels. -// -// Non-goals: -// -// 1. Measure every distance operation. Instead measures just one distance -// operation at the same time. -// 2. Be useful for finding performance regressions. This is handled by the -// normal benchmarks. -// -// So far, both goals are partly achieved. -// -// RE (1), COMPILE TIMES: kernel.cu is fast to compile. This file is not. -// When the internals of a pairwise distance kernel is changed, this file is not -// recompiled. -// -// RE 2, benchmarks with intent: this file contains a benchmark to check the -// maximal throughput of a kernel. Measuring other things, like performance on -// skinny or wide matrices is not yet implemented. - -#include "kernel.cuh" // launch_kernel - -#include // RAFT_BENCH_REGISTER - -#include // pairwise_matrix_params - -#include // rmm::device_uvector - -#include // std::min -#include // std::vector - -namespace raft::bench::distance::tune { - -// Max throughput benchmark. -// -// Goal: Measure the maximum distances/sec that can be computed. -// -// To achieve this, we make sure that: -// -// - Input data size is a multiple of the block tile size. -// -// - Perfect distribution of work between SMs, i.e. the number of block tiles is -// a large multiple (num_waves) of the number of blocks (#SMs * occupancy). -// -// - Multiple iterations over Kblk are executed (num_k_iters). -struct throughput_param { - int num_waves; - int occupancy; - int num_k_iters; -}; - -const std::vector throughput_params{ - // 32 waves, requested occupancy of 4, and 32 k iterations typically achieves - // maximum throughput. No need to pick higher values. - {32, 4, 32}, -}; - -struct throughput_bench : public fixture { - const throughput_param p; - - throughput_bench(const throughput_param& p_) : p(p_) {} - - void run_benchmark(::benchmark::State& state) override - { - // Get block size: - int block_m, block_n, block_k; - get_block_size(block_m, block_n, block_k); - - // Determine number of blocks that will be launched. This informs the size - // of the inputs as well as the grid size. - const int num_sms = raft::getMultiProcessorCount(); - const int max_occupancy = get_max_occupancy(); - const int occupancy = std::min(p.occupancy, max_occupancy); - const int num_blocks = occupancy * num_sms; - dim3 grid(num_blocks); - - // Create input sizes that are a multiple of the block tile size. - size_t m = block_m; - size_t n = block_n * p.num_waves * num_blocks; - size_t k = block_k * p.num_k_iters; - - // DataT, OutT, IdxT, etc, are defined in tuned_kernel.cuh - rmm::device_uvector x_vec(m * k, stream); - rmm::device_uvector y_vec(n * k, stream); - rmm::device_uvector x_norm_vec(m, stream); - rmm::device_uvector y_norm_vec(n, stream); - rmm::device_uvector out_vec(m * n, stream); - - auto x = x_vec.data(); - auto y = y_vec.data(); - auto x_norm = x_norm_vec.data(); - auto y_norm = y_norm_vec.data(); - auto out = out_vec.data(); - FinOpT fin_op{}; - - // Create kernel parameter struct. Flip x and y if column major. - IdxT ldx = row_major ? k : m; - IdxT ldy = row_major ? k : n; - IdxT ld_out = row_major ? n : m; - - // Template parameters of pairwise_matrix_params are defined in kernel.cuh - pairwise_matrix_params kparams{ - IdxT(m), IdxT(n), IdxT(k), ldx, ldy, ld_out, x, y, x_norm, y_norm, out, fin_op, row_major}; - - // Run benchmark - loop_on_state(state, [&]() { launch_kernel(kparams, grid, stream); }); - - // Report metrics. We don't report flop/s because we do not know for each - // distance operation how many flops it costs. For L2_unexp and l1, we can - // double this number to get the flop/s. For l2 expanded, core_ops/s should - // equal flop/s (modulo the sqrt and subtracting from the norm). - size_t num_core_ops = m * n * k; - size_t read_elts = n * k + m * k; - size_t write_elts = m * n; - - state.counters["m"] = benchmark::Counter(m); - state.counters["n"] = benchmark::Counter(n); - state.counters["k"] = benchmark::Counter(k); - state.counters["occupancy"] = benchmark::Counter(occupancy); - state.counters["# waves"] = benchmark::Counter(p.num_waves); - state.counters["# k iters"] = benchmark::Counter(p.num_k_iters); - - state.counters["core_ops/s"] = benchmark::Counter(num_core_ops, - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - - state.counters["BW"] = benchmark::Counter(write_elts * sizeof(OutT) + read_elts * sizeof(DataT), - benchmark::Counter::kIsIterationInvariantRate, - benchmark::Counter::OneK::kIs1000); - } -}; - -RAFT_BENCH_REGISTER(throughput_bench, "", throughput_params); - -} // namespace raft::bench::distance::tune diff --git a/cpp/bench/prims/distance/tune_pairwise/kernel.cu b/cpp/bench/prims/distance/tune_pairwise/kernel.cu deleted file mode 100644 index 42173c51f5..0000000000 --- a/cpp/bench/prims/distance/tune_pairwise/kernel.cu +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "kernel.cuh" - -#include // pairwise_matrix_sm60_wrapper -#include // raft::linalg::Policy4x4 -#include // raft::util::arch::SM_compute_arch - -namespace raft::bench::distance::tune { - -// Distance op -using OpT = raft::distance::detail::ops::lp_unexp_distance_op; -constexpr float metric_arg = 2.0; -OpT distance_op{metric_arg}; - -// Kernel policy -constexpr int vec_len = 1; -using Policy = typename raft::linalg::Policy4x4::Policy; - -// Architecture -namespace arch = raft::util::arch; -constexpr auto sm_compat_range = arch::SM_range(arch::SM_min(), arch::SM_future()); - -void launch_kernel(pairwise_matrix_params params, dim3 grid, cudaStream_t stream) -{ - dim3 block(Policy::Nthreads); - int smem_size = OpT::shared_mem_size(); - - // Obtain function pointer to kernel - auto kernel = raft::distance::detail::pairwise_matrix_kernel; - - kernel<<>>(distance_op, params); - RAFT_CUDA_TRY(cudaGetLastError()); -} - -void get_block_size(int& m, int& n, int& k) -{ - m = Policy::Mblk; - n = Policy::Nblk; - k = Policy::Kblk; -} - -void* get_kernel_ptr() -{ - auto kernel = raft::distance::detail::pairwise_matrix_kernel; - return reinterpret_cast(kernel); -} - -int get_max_occupancy() -{ - void* kernel_ptr = get_kernel_ptr(); - int max_occupancy; - int smem_size = OpT::shared_mem_size(); - - RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( - &max_occupancy, kernel_ptr, Policy::Nthreads, smem_size)); - - return max_occupancy; -} - -} // namespace raft::bench::distance::tune diff --git a/cpp/bench/prims/distance/tune_pairwise/kernel.cuh b/cpp/bench/prims/distance/tune_pairwise/kernel.cuh deleted file mode 100644 index 5da54a343c..0000000000 --- a/cpp/bench/prims/distance/tune_pairwise/kernel.cuh +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include // lp_unexp_distance_op -#include // pairwise_matrix_params - -namespace raft::bench::distance::tune { - -// Launch one specific kernel with the following template parameters -constexpr bool row_major = true; -using DataT = float; -using AccT = float; -using OutT = DataT; -using IdxT = int; - -using FinOpT = raft::identity_op; - -using pairwise_matrix_params = - raft::distance::detail::pairwise_matrix_params; - -// Launches kernel -void launch_kernel(pairwise_matrix_params, dim3, cudaStream_t); - -// Describes the block size that is decided by the policy -void get_block_size(int& m, int& n, int& k); - -int get_max_occupancy(); - -} // namespace raft::bench::distance::tune diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh deleted file mode 100644 index acbeba375a..0000000000 --- a/cpp/bench/prims/neighbors/cagra_bench.cuh +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include -#include -#include - -#include - -#include - -namespace raft::bench::neighbors { - -struct params { - /** Size of the dataset. */ - size_t n_samples; - /** Number of dimensions in the dataset. */ - int n_dims; - /** The batch size -- number of KNN searches. */ - int n_queries; - /** Number of nearest neighbours to find for every probe. */ - int k; - /** kNN graph degree*/ - int degree; - int itopk_size; - int block_size; - int search_width; - int max_iterations; - /** Ratio of removed indices. */ - double removed_ratio; -}; - -template -struct CagraBench : public fixture { - explicit CagraBench(const params& ps) - : fixture(true), - params_(ps), - queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)), - dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)), - knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)), - removed_indices_bitset_(handle, ps.n_samples) - { - // Generate random dataset and queriees - raft::random::RngState state{42}; - constexpr T kRangeMax = std::is_integral_v ? std::numeric_limits::max() : T(1); - constexpr T kRangeMin = std::is_integral_v ? std::numeric_limits::min() : T(-1); - if constexpr (std::is_integral_v) { - raft::random::uniformInt( - handle, state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax); - raft::random::uniformInt( - handle, state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax); - } else { - raft::random::uniform( - handle, state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax); - raft::random::uniform( - handle, state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax); - } - - // Generate random knn graph - - raft::random::uniformInt( - handle, state, knn_graph_.data_handle(), knn_graph_.size(), 0, ps.n_samples - 1); - - auto metric = raft::distance::DistanceType::L2Expanded; - - auto removed_indices = - raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples); - thrust::sequence( - resource::get_thrust_policy(handle), - thrust::device_pointer_cast(removed_indices.data_handle()), - thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); - removed_indices_bitset_.set(handle, removed_indices.view()); - index_.emplace(raft::neighbors::cagra::index( - handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view()))); - } - - void run_benchmark(::benchmark::State& state) override - { - raft::neighbors::cagra::search_params search_params; - search_params.max_queries = 1024; - search_params.itopk_size = params_.itopk_size; - search_params.team_size = 0; - search_params.thread_block_size = params_.block_size; - search_params.search_width = params_.search_width; - - auto indices = make_device_matrix(handle, params_.n_queries, params_.k); - auto distances = make_device_matrix(handle, params_.n_queries, params_.k); - auto ind_v = make_device_matrix_view( - indices.data_handle(), params_.n_queries, params_.k); - auto dist_v = make_device_matrix_view( - distances.data_handle(), params_.n_queries, params_.k); - - auto queries_v = make_const_mdspan(queries_.view()); - if (params_.removed_ratio > 0) { - auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view()); - loop_on_state(state, [&]() { - raft::neighbors::cagra::search_with_filtering( - this->handle, search_params, *this->index_, queries_v, ind_v, dist_v, filter); - }); - } else { - loop_on_state(state, [&]() { - raft::neighbors::cagra::search( - this->handle, search_params, *this->index_, queries_v, ind_v, dist_v); - }); - } - - double data_size = params_.n_samples * params_.n_dims * sizeof(T); - double graph_size = params_.n_samples * params_.degree * sizeof(IdxT); - - int iterations = params_.max_iterations; - if (iterations == 0) { - // see search_plan_impl::adjust_search_params() - double r = params_.itopk_size / static_cast(params_.search_width); - iterations = 1 + std::min(r * 1.1, r + 10); - } - state.counters["dataset (GiB)"] = data_size / (1 << 30); - state.counters["graph (GiB)"] = graph_size / (1 << 30); - state.counters["n_rows"] = params_.n_samples; - state.counters["n_cols"] = params_.n_dims; - state.counters["degree"] = params_.degree; - state.counters["n_queries"] = params_.n_queries; - state.counters["k"] = params_.k; - state.counters["itopk_size"] = params_.itopk_size; - state.counters["block_size"] = params_.block_size; - state.counters["search_width"] = params_.search_width; - state.counters["iterations"] = iterations; - state.counters["removed_ratio"] = params_.removed_ratio; - } - - private: - const params params_; - std::optional> index_; - raft::device_matrix queries_; - raft::device_matrix dataset_; - raft::device_matrix knn_graph_; - raft::core::bitset removed_indices_bitset_; -}; - -inline const std::vector generate_inputs() -{ - std::vector inputs = - raft::util::itertools::product({2000000ull}, // n_samples - {128, 256, 512, 1024}, // dataset dim - {1000}, // n_queries - {32}, // k - {64}, // knn graph degree - {64}, // itopk_size - {0}, // block_size - {1}, // search_width - {0}, // max_iterations - {0.0} // removed_ratio - ); - auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples - {128}, // dataset dim - {1000}, // n_queries - {32}, // k - {64}, // knn graph degree - {64}, // itopk_size - {64, 128, 256, 512, 1024}, // block_size - {1}, // search_width - {0}, // max_iterations - {0.0} // removed_ratio - ); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - - inputs2 = raft::util::itertools::product( - {2000000ull, 10000000ull}, // n_samples - {128}, // dataset dim - {1, 10, 10000}, // n_queries - {255}, // k - {64}, // knn graph degree - {300}, // itopk_size - {256}, // block_size - {2}, // search_width - {0}, // max_iterations - {0.0, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64} // removed_ratio - ); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - return inputs; -} - -const std::vector kCagraInputs = generate_inputs(); - -#define CAGRA_REGISTER(ValT, IdxT, inputs) \ - namespace BENCHMARK_PRIVATE_NAME(knn) { \ - using AnnCagra = CagraBench; \ - RAFT_BENCH_REGISTER(AnnCagra, #ValT "/" #IdxT, inputs); \ - } - -} // namespace raft::bench::neighbors diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh deleted file mode 100644 index 6499078623..0000000000 --- a/cpp/bench/prims/neighbors/knn.cuh +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include - -namespace raft::bench::spatial { - -struct params { - /** Size of the dataset. */ - size_t n_samples; - /** Number of dimensions in the dataset. */ - size_t n_dims; - /** The batch size -- number of KNN searches. */ - size_t n_queries; - /** Number of nearest neighbours to find for every probe. */ - size_t k; - /** Ratio of removed indices. */ - double removed_ratio; -}; - -inline auto operator<<(std::ostream& os, const params& p) -> std::ostream& -{ - os << p.n_samples << "#" << p.n_dims << "#" << p.n_queries << "#" << p.k << "#" - << p.removed_ratio; - return os; -} - -enum class TransferStrategy { NO_COPY, COPY_PLAIN, COPY_PINNED, MAP_PINNED, MANAGED }; // NOLINT -enum class Scope { BUILD, SEARCH, BUILD_SEARCH }; // NOLINT - -inline auto operator<<(std::ostream& os, const TransferStrategy& ts) -> std::ostream& -{ - switch (ts) { - case TransferStrategy::NO_COPY: os << "NO_COPY"; break; - case TransferStrategy::COPY_PLAIN: os << "COPY_PLAIN"; break; - case TransferStrategy::COPY_PINNED: os << "COPY_PINNED"; break; - case TransferStrategy::MAP_PINNED: os << "MAP_PINNED"; break; - case TransferStrategy::MANAGED: os << "MANAGED"; break; - default: os << "UNKNOWN"; - } - return os; -} - -inline auto operator<<(std::ostream& os, const Scope& s) -> std::ostream& -{ - switch (s) { - case Scope::BUILD: os << "BUILD"; break; - case Scope::SEARCH: os << "SEARCH"; break; - case Scope::BUILD_SEARCH: os << "BUILD_SEARCH"; break; - default: os << "UNKNOWN"; - } - return os; -} - -struct device_resource { - public: - explicit device_resource(bool managed) : managed_(managed) - { - if (managed_) { - res_ = new rmm::mr::managed_memory_resource(); - } else { - res_ = rmm::mr::get_current_device_resource(); - } - } - - ~device_resource() - { - if (managed_) { delete res_; } - } - - [[nodiscard]] auto get() const -> rmm::device_async_resource_ref { return res_; } - - private: - const bool managed_; - rmm::mr::device_memory_resource* res_; -}; - -template -struct host_uvector { - host_uvector(size_t n, bool pinned) : n_(n) - { - if (pinned) { - res_ = new rmm::mr::pinned_memory_resource(); - } else { - res_ = new rmm::mr::new_delete_resource(); - } - arr_ = static_cast(res_->allocate(n_ * sizeof(T))); - } - - ~host_uvector() noexcept - { - res_->deallocate(arr_, n_ * sizeof(T)); - delete res_; - } - - auto data() -> T* { return arr_; } - [[nodiscard]] auto size() const -> size_t { return n_; } - - private: - rmm::mr::host_memory_resource* res_; - size_t n_; - T* arr_; -}; - -template -struct ivf_flat_knn { - using dist_t = float; - - std::optional> index; - raft::neighbors::ivf_flat::index_params index_params; - raft::neighbors::ivf_flat::search_params search_params; - params ps; - - ivf_flat_knn(const raft::device_resources& handle, const params& ps, const ValT* data) : ps(ps) - { - index_params.n_lists = 4096; - index_params.metric = raft::distance::DistanceType::L2Expanded; - index.emplace(raft::neighbors::ivf_flat::build( - handle, index_params, data, IdxT(ps.n_samples), uint32_t(ps.n_dims))); - } - - void search(const raft::device_resources& handle, - const ValT* search_items, - dist_t* out_dists, - IdxT* out_idxs) - { - search_params.n_probes = 20; - raft::neighbors::ivf_flat::search(handle, - search_params, - *index, - search_items, - ps.n_queries, - ps.k, - out_idxs, - out_dists, - resource::get_workspace_resource(handle)); - } -}; - -template -struct ivf_pq_knn { - using dist_t = float; - - std::optional> index; - raft::neighbors::ivf_pq::index_params index_params; - raft::neighbors::ivf_pq::search_params search_params; - params ps; - - ivf_pq_knn(const raft::device_resources& handle, const params& ps, const ValT* data) : ps(ps) - { - index_params.n_lists = 4096; - index_params.metric = raft::distance::DistanceType::L2Expanded; - auto data_view = raft::make_device_matrix_view(data, ps.n_samples, ps.n_dims); - index.emplace(raft::neighbors::ivf_pq::build(handle, index_params, data_view)); - } - - void search(const raft::device_resources& handle, - const ValT* search_items, - dist_t* out_dists, - IdxT* out_idxs) - { - search_params.n_probes = 20; - auto queries_view = - raft::make_device_matrix_view(search_items, ps.n_queries, ps.n_dims); - auto idxs_view = raft::make_device_matrix_view(out_idxs, ps.n_queries, ps.k); - auto dists_view = - raft::make_device_matrix_view(out_dists, ps.n_queries, ps.k); - raft::neighbors::ivf_pq::search( - handle, search_params, *index, queries_view, idxs_view, dists_view); - } -}; - -template -struct brute_force_knn { - using dist_t = ValT; - - ValT* index; - params ps; - - brute_force_knn(const raft::device_resources& handle, const params& ps, const ValT* data) - : index(const_cast(data)), ps(ps) - { - } - - void search(const raft::device_resources& handle, - const ValT* search_items, - dist_t* out_dists, - IdxT* out_idxs) - { - std::vector input{index}; - std::vector sizes{ps.n_samples}; - raft::spatial::knn::brute_force_knn(handle, - input, - sizes, - ps.n_dims, - const_cast(search_items), - ps.n_queries, - out_idxs, - out_dists, - ps.k); - } -}; - -template -struct ivf_flat_filter_knn { - using dist_t = float; - - std::optional> index; - raft::neighbors::ivf_flat::index_params index_params; - raft::neighbors::ivf_flat::search_params search_params; - raft::core::bitset removed_indices_bitset_; - params ps; - - ivf_flat_filter_knn(const raft::device_resources& handle, const params& ps, const ValT* data) - : ps(ps), removed_indices_bitset_(handle, ps.n_samples) - { - index_params.n_lists = 4096; - index_params.metric = raft::distance::DistanceType::L2Expanded; - index.emplace(raft::neighbors::ivf_flat::build( - handle, index_params, data, IdxT(ps.n_samples), uint32_t(ps.n_dims))); - auto removed_indices = - raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples); - thrust::sequence( - resource::get_thrust_policy(handle), - thrust::device_pointer_cast(removed_indices.data_handle()), - thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); - removed_indices_bitset_.set(handle, removed_indices.view()); - } - - void search(const raft::device_resources& handle, - const ValT* search_items, - dist_t* out_dists, - IdxT* out_idxs) - { - search_params.n_probes = 20; - auto queries_view = - raft::make_device_matrix_view(search_items, ps.n_queries, ps.n_dims); - auto neighbors_view = raft::make_device_matrix_view(out_idxs, ps.n_queries, ps.k); - auto distance_view = raft::make_device_matrix_view(out_dists, ps.n_queries, ps.k); - auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view()); - - if (ps.removed_ratio > 0) { - raft::neighbors::ivf_flat::search_with_filtering( - handle, search_params, *index, queries_view, neighbors_view, distance_view, filter); - } else { - raft::neighbors::ivf_flat::search( - handle, search_params, *index, queries_view, neighbors_view, distance_view); - } - } -}; - -template -struct ivf_pq_filter_knn { - using dist_t = float; - - std::optional> index; - raft::neighbors::ivf_pq::index_params index_params; - raft::neighbors::ivf_pq::search_params search_params; - raft::core::bitset removed_indices_bitset_; - params ps; - - ivf_pq_filter_knn(const raft::device_resources& handle, const params& ps, const ValT* data) - : ps(ps), removed_indices_bitset_(handle, ps.n_samples) - { - index_params.n_lists = 4096; - index_params.metric = raft::distance::DistanceType::L2Expanded; - auto data_view = raft::make_device_matrix_view(data, ps.n_samples, ps.n_dims); - index.emplace(raft::neighbors::ivf_pq::build(handle, index_params, data_view)); - auto removed_indices = - raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples); - thrust::sequence( - resource::get_thrust_policy(handle), - thrust::device_pointer_cast(removed_indices.data_handle()), - thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); - removed_indices_bitset_.set(handle, removed_indices.view()); - } - - void search(const raft::device_resources& handle, - const ValT* search_items, - dist_t* out_dists, - IdxT* out_idxs) - { - search_params.n_probes = 20; - auto queries_view = - raft::make_device_matrix_view(search_items, ps.n_queries, ps.n_dims); - auto neighbors_view = - raft::make_device_matrix_view(out_idxs, ps.n_queries, ps.k); - auto distance_view = - raft::make_device_matrix_view(out_dists, ps.n_queries, ps.k); - auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view()); - - if (ps.removed_ratio > 0) { - raft::neighbors::ivf_pq::search_with_filtering( - handle, search_params, *index, queries_view, neighbors_view, distance_view, filter); - } else { - raft::neighbors::ivf_pq::search( - handle, search_params, *index, queries_view, neighbors_view, distance_view); - } - } -}; - -template -struct knn : public fixture { - explicit knn(const params& p, const TransferStrategy& strategy, const Scope& scope) - : fixture(true), - params_(p), - strategy_(strategy), - scope_(scope), - dev_mem_res_(strategy == TransferStrategy::MANAGED), - data_host_(0), - search_items_(p.n_queries * p.n_dims, stream), - out_dists_(p.n_queries * p.k, stream), - out_idxs_(p.n_queries * p.k, stream) - { - raft::random::RngState state{42}; - gen_data(state, search_items_, search_items_.size(), stream); - try { - size_t total_size = p.n_samples * p.n_dims; - data_host_.resize(total_size); - constexpr size_t kGenMinibatchSize = 1024 * 1024 * 1024; - rmm::device_uvector d(std::min(kGenMinibatchSize, total_size), stream); - for (size_t offset = 0; offset < total_size; offset += kGenMinibatchSize) { - size_t actual_size = std::min(total_size - offset, kGenMinibatchSize); - gen_data(state, d, actual_size, stream); - copy(data_host_.data() + offset, d.data(), actual_size, stream); - } - } catch (std::bad_alloc& e) { - data_does_not_fit_ = true; - } - } - - template - void gen_data(raft::random::RngState& state, // NOLINT - rmm::device_uvector& vec, - size_t n, - rmm::cuda_stream_view stream) - { - constexpr T kRangeMax = std::is_integral_v ? std::numeric_limits::max() : T(1); - constexpr T kRangeMin = std::is_integral_v ? std::numeric_limits::min() : T(-1); - if constexpr (std::is_integral_v) { - raft::random::uniformInt(handle, state, vec.data(), n, kRangeMin, kRangeMax); - } else { - raft::random::uniform(handle, state, vec.data(), n, kRangeMin, kRangeMax); - } - } - - void run_benchmark(::benchmark::State& state) override - { - if (data_does_not_fit_) { - state.SkipWithError("The data size is too big to fit into the host memory."); - } - if (scope_ == Scope::SEARCH && strategy_ != TransferStrategy::NO_COPY) { - state.SkipWithError( - "When benchmarking without index building (Scope::SEARCH), the data must be already on the " - "device (TransferStrategy::NO_COPY)"); - } - - try { - std::ostringstream label_stream; - label_stream << params_ << "#" << strategy_ << "#" << scope_; - state.SetLabel(label_stream.str()); - raft::device_resources handle(stream); - std::optional index; - - if (scope_ == Scope::SEARCH) { // also implies TransferStrategy::NO_COPY - rmm::device_uvector data(data_host_.size(), stream); - copy(data.data(), data_host_.data(), data_host_.size(), stream); - index.emplace(handle, params_, data.data()); - stream.synchronize(); - } - - // benchmark loop - for (auto _ : state) { - // managed or plain device memory initialized anew every time - rmm::device_uvector data(data_host_.size(), stream, dev_mem_res_.get()); - ValT* data_ptr = data.data(); - size_t allocation_size = data_host_.size() * sizeof(ValT); - - // Non-benchmarked part: using different methods to copy the data if necessary - switch (strategy_) { - case TransferStrategy::NO_COPY: // copy data to GPU before starting the timer. - copy(data_ptr, data_host_.data(), data_host_.size(), stream); - break; - case TransferStrategy::COPY_PINNED: - RAFT_CUDA_TRY( - cudaHostRegister(data_host_.data(), allocation_size, cudaHostRegisterDefault)); - break; - case TransferStrategy::MAP_PINNED: - RAFT_CUDA_TRY( - cudaHostRegister(data_host_.data(), allocation_size, cudaHostRegisterMapped)); - RAFT_CUDA_TRY(cudaHostGetDevicePointer(&data_ptr, data_host_.data(), 0)); - break; - case TransferStrategy::MANAGED: // sic! using std::memcpy rather than cuda copy - RAFT_CUDA_TRY(cudaMemAdvise(data_ptr, - allocation_size, - cudaMemAdviseSetPreferredLocation, - resource::get_device_id(handle))); - RAFT_CUDA_TRY(cudaMemAdvise(data_ptr, - allocation_size, - cudaMemAdviseSetAccessedBy, - resource::get_device_id(handle))); - RAFT_CUDA_TRY(cudaMemAdvise(data_ptr, - allocation_size, - cudaMemAdviseSetReadMostly, - resource::get_device_id(handle))); - std::memcpy(data_ptr, data_host_.data(), allocation_size); - break; - default: break; - } - - flush_L2_cache(); - { - // Timer synchronizes the stream, so all prior gpu work should be done before it sets off. - cuda_event_timer timer(state, stream); - switch (strategy_) { - case TransferStrategy::COPY_PLAIN: - case TransferStrategy::COPY_PINNED: - copy(data_ptr, data_host_.data(), data_host_.size(), stream); - default: break; - } - - if (scope_ != Scope::SEARCH) { index.emplace(handle, params_, data_ptr); } - if (scope_ != Scope::BUILD) { - index->search(handle, search_items_.data(), out_dists_.data(), out_idxs_.data()); - } - } - - if (scope_ != Scope::SEARCH) { index.reset(); } - - switch (strategy_) { - case TransferStrategy::COPY_PINNED: - case TransferStrategy::MAP_PINNED: - RAFT_CUDA_TRY(cudaHostUnregister(data_host_.data())); - break; - default: break; - } - } - } catch (raft::exception& e) { - state.SkipWithError(e.what()); - } catch (std::bad_alloc& e) { - state.SkipWithError(e.what()); - } - } - - private: - const params params_; - const TransferStrategy strategy_; - const Scope scope_; - device_resource dev_mem_res_; - bool data_does_not_fit_ = false; - - std::vector data_host_; - rmm::device_uvector search_items_; - rmm::device_uvector out_dists_; - rmm::device_uvector out_idxs_; -}; - -inline const std::vector kInputs{ - {2000000, 128, 1000, 32, 0}, {10000000, 128, 1000, 32, 0}, {10000, 8192, 1000, 32, 0}}; - -const std::vector kInputsFilter = - raft::util::itertools::product({size_t(10000000)}, // n_samples - {size_t(128)}, // n_dim - {size_t(1000)}, // n_queries - {size_t(255)}, // k - {0.0, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64} // removed_ratio - ); -inline const std::vector kAllStrategies{ - TransferStrategy::NO_COPY, TransferStrategy::MAP_PINNED, TransferStrategy::MANAGED}; -inline const std::vector kNoCopyOnly{TransferStrategy::NO_COPY}; - -inline const std::vector kScopeFull{Scope::BUILD_SEARCH}; -inline const std::vector kAllScopes{Scope::BUILD_SEARCH, Scope::SEARCH, Scope::BUILD}; - -#define KNN_REGISTER(ValT, IdxT, ImplT, inputs, strats, scope) \ - namespace BENCHMARK_PRIVATE_NAME(knn) { \ - using KNN = knn>; \ - RAFT_BENCH_REGISTER(KNN, #ValT "/" #IdxT "/" #ImplT, inputs, strats, scope); \ - } - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/brute_force_float_int64_t.cu b/cpp/bench/prims/neighbors/knn/brute_force_float_int64_t.cu deleted file mode 100644 index 7df0599670..0000000000 --- a/cpp/bench/prims/neighbors/knn/brute_force_float_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(float, int64_t, brute_force_knn, kInputs, kAllStrategies, kScopeFull); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/brute_force_float_uint32_t.cu b/cpp/bench/prims/neighbors/knn/brute_force_float_uint32_t.cu deleted file mode 100644 index 9704d39e76..0000000000 --- a/cpp/bench/prims/neighbors/knn/brute_force_float_uint32_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(float, uint32_t, brute_force_knn, kInputs, kAllStrategies, kScopeFull); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu b/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu deleted file mode 100644 index 5d762f6e85..0000000000 --- a/cpp/bench/prims/neighbors/knn/cagra_float_uint32_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../cagra_bench.cuh" - -namespace raft::bench::neighbors { - -CAGRA_REGISTER(float, uint32_t, kCagraInputs); - -} // namespace raft::bench::neighbors diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu deleted file mode 100644 index bf5118ceae..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_flat_filter_float_int64_t.cu +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Enable instantiation of search with filter -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(float, int64_t, ivf_flat_filter_knn, kInputsFilter, kNoCopyOnly, kScopeFull); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu deleted file mode 100644 index fbbb4f9acc..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(float, int64_t, ivf_flat_knn, kInputs, kNoCopyOnly, kAllScopes); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu deleted file mode 100644 index 7067dbe1b6..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(int8_t, int64_t, ivf_flat_knn, kInputs, kNoCopyOnly, kAllScopes); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu deleted file mode 100644 index 91fada3c28..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(uint8_t, int64_t, ivf_flat_knn, kInputs, kNoCopyOnly, kAllScopes); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu deleted file mode 100644 index 1840eca99d..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -#include -#include -namespace raft::bench::spatial { - -KNN_REGISTER(float, int64_t, ivf_pq_filter_knn, kInputsFilter, kNoCopyOnly, kScopeFull); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu deleted file mode 100644 index 83c4973c3a..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(float, int64_t, ivf_pq_knn, kInputs, kNoCopyOnly, kAllScopes); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu deleted file mode 100644 index 4ea281b11a..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(int8_t, int64_t, ivf_pq_knn, kInputs, kNoCopyOnly, kAllScopes); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu deleted file mode 100644 index 3313a49ba2..0000000000 --- a/cpp/bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../knn.cuh" - -namespace raft::bench::spatial { - -KNN_REGISTER(uint8_t, int64_t, ivf_pq_knn, kInputs, kNoCopyOnly, kAllScopes); - -} // namespace raft::bench::spatial diff --git a/cpp/bench/prims/neighbors/refine.cuh b/cpp/bench/prims/neighbors/refine.cuh deleted file mode 100644 index 0360babd82..0000000000 --- a/cpp/bench/prims/neighbors/refine.cuh +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include -#include - -using namespace raft::neighbors; - -namespace raft::bench::neighbors { - -template -inline auto operator<<(std::ostream& os, const RefineInputs& p) -> std::ostream& -{ - os << p.n_rows << "#" << p.dim << "#" << p.n_queries << "#" << p.k0 << "#" << p.k << "#" - << (p.host_data ? "host" : "device"); - return os; -} - -template -class RefineAnn : public fixture { - public: - RefineAnn(RefineInputs p) : data(handle_, p) {} - - void run_benchmark(::benchmark::State& state) override - { - std::ostringstream label_stream; - label_stream << data.p; - state.SetLabel(label_stream.str()); - - auto old_mr = rmm::mr::get_current_device_resource(); - rmm::mr::pool_memory_resource pool_mr( - old_mr, rmm::percent_of_free_device_memory(50)); - rmm::mr::set_current_device_resource(&pool_mr); - - if (data.p.host_data) { - loop_on_state(state, [this]() { - raft::neighbors::refine(handle_, - data.dataset_host.view(), - data.queries_host.view(), - data.candidates_host.view(), - data.refined_indices_host.view(), - data.refined_distances_host.view(), - data.p.metric); - }); - } else { - loop_on_state(state, [&]() { - raft::neighbors::refine(handle_, - data.dataset.view(), - data.queries.view(), - data.candidates.view(), - data.refined_indices.view(), - data.refined_distances.view(), - data.p.metric); - }); - } - rmm::mr::set_current_device_resource(old_mr); - } - - private: - raft::device_resources handle_; - RefineHelper data; -}; - -template -std::vector> getInputs() -{ - std::vector> out; - raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded; - for (bool host_data : {true, false}) { - for (T n_queries : {1000, 10000}) { - for (T dim : {128, 512}) { - out.push_back(RefineInputs{n_queries, 2000000, dim, 32, 128, metric, host_data}); - out.push_back(RefineInputs{n_queries, 2000000, dim, 10, 40, metric, host_data}); - } - } - } - return out; -} - -} // namespace raft::bench::neighbors diff --git a/cpp/bench/prims/neighbors/refine_float_int64_t.cu b/cpp/bench/prims/neighbors/refine_float_int64_t.cu deleted file mode 100644 index d69a157eca..0000000000 --- a/cpp/bench/prims/neighbors/refine_float_int64_t.cu +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "refine.cuh" - -#include - -using namespace raft::neighbors; - -namespace raft::bench::neighbors { -using refine_float_int64 = RefineAnn; -RAFT_BENCH_REGISTER(refine_float_int64, "", getInputs()); -} // namespace raft::bench::neighbors diff --git a/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu b/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu deleted file mode 100644 index 9da536b6c7..0000000000 --- a/cpp/bench/prims/neighbors/refine_uint8_t_int64_t.cu +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "refine.cuh" - -#include - -using namespace raft::neighbors; - -namespace raft::bench::neighbors { -using refine_uint8_int64 = RefineAnn; -RAFT_BENCH_REGISTER(refine_uint8_int64, "", getInputs()); -} // namespace raft::bench::neighbors diff --git a/cpp/template/CMakeLists.txt b/cpp/template/CMakeLists.txt deleted file mode 100644 index 40a3795ed1..0000000000 --- a/cpp/template/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. - -cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) - -# ------------- configure rapids-cmake --------------# - -include(cmake/thirdparty/fetch_rapids.cmake) -include(rapids-cmake) -include(rapids-cpm) -include(rapids-cuda) -include(rapids-export) -include(rapids-find) - -# ------------- configure project --------------# - -rapids_cuda_init_architectures(test_raft) - -project(test_raft LANGUAGES CXX CUDA) - -# ------------- configure raft -----------------# - -rapids_cpm_init() -include(cmake/thirdparty/get_raft.cmake) - -# -------------- compile tasks ----------------- # -add_executable(CAGRA_EXAMPLE src/cagra_example.cu) -target_link_libraries(CAGRA_EXAMPLE PRIVATE raft::raft raft::compiled) - -add_executable(IVF_FLAT_EXAMPLE src/ivf_flat_example.cu) -target_link_libraries(IVF_FLAT_EXAMPLE PRIVATE raft::raft raft::compiled) - -add_executable(IVF_PQ_EXAMPLE src/ivf_pq_example.cu) -target_link_libraries(IVF_PQ_EXAMPLE PRIVATE raft::raft raft::compiled) diff --git a/cpp/template/README.md b/cpp/template/README.md deleted file mode 100644 index 05ec48964f..0000000000 --- a/cpp/template/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Example RAFT Project Template - -This template project provides a drop-in sample to either start building a new application with, or using RAFT in an existing CMake project. - -First, please refer to our [installation docs](https://docs.rapids.ai/api/raft/stable/build.html#cuda-gpu-requirements) for the minimum requirements to use RAFT. - -Once the minimum requirements are satisfied, this example template application can be built with the provided `build.sh` script. This is a bash script that calls the appropriate CMake commands, so you can look into it to see the typical CMake based build workflow. - -This directory (`RAFT_SOURCE/cpp/template`) can be copied directly in order to build a new application with RAFT. - -RAFT can be integrated into an existing CMake project by copying the contents in the `configure rapids-cmake` and `configure raft` sections of the provided `CMakeLists.txt` into your project, along with `cmake/thirdparty/get_raft.cmake`. - -Make sure to link against the appropriate Cmake targets. Use `raft::raft`to add make the headers available and `raft::compiled` when utilizing the shared library. - -```cmake -target_link_libraries(your_app_target PRIVATE raft::raft raft::compiled) -``` - diff --git a/cpp/template/build.sh b/cpp/template/build.sh deleted file mode 100755 index 49c17f7499..0000000000 --- a/cpp/template/build.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Copyright (c) 2023-2024, NVIDIA CORPORATION. - -# raft empty project template build script - -# Abort script on first error -set -e - -PARALLEL_LEVEL=${PARALLEL_LEVEL:=`nproc`} - -BUILD_TYPE=Release -BUILD_DIR=build/ - -RAFT_REPO_REL="" -EXTRA_CMAKE_ARGS="" -set -e - - -if [[ ${RAFT_REPO_REL} != "" ]]; then - RAFT_REPO_PATH="`readlink -f \"${RAFT_REPO_REL}\"`" - EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCPM_raft_SOURCE=${RAFT_REPO_PATH}" -fi - -if [ "$1" == "clean" ]; then - rm -rf build - exit 0 -fi - -mkdir -p $BUILD_DIR -cd $BUILD_DIR - -cmake \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DRAFT_NVTX=OFF \ - -DCMAKE_CUDA_ARCHITECTURES="NATIVE" \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ - ${EXTRA_CMAKE_ARGS} \ - ../ - -cmake --build . -j${PARALLEL_LEVEL} diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake deleted file mode 100644 index 6f4c627ed4..0000000000 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ /dev/null @@ -1,21 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. - -# Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.12") - -if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake - ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) -endif() -include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) diff --git a/cpp/template/cmake/thirdparty/get_raft.cmake b/cpp/template/cmake/thirdparty/get_raft.cmake deleted file mode 100644 index 07b0897be0..0000000000 --- a/cpp/template/cmake/thirdparty/get_raft.cmake +++ /dev/null @@ -1,68 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. - -# Use RAPIDS_VERSION from cmake/thirdparty/fetch_rapids.cmake -set(RAFT_VERSION "${RAPIDS_VERSION}") -set(RAFT_FORK "rapidsai") -set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") - -function(find_and_configure_raft) - set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARY ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES) - cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN} ) - - set(RAFT_COMPONENTS "") - if(PKG_COMPILE_LIBRARY) - string(APPEND RAFT_COMPONENTS " compiled") - endif() - - if(PKG_ENABLE_MNMG_DEPENDENCIES) - string(APPEND RAFT_COMPONENTS " distributed") - endif() - - #----------------------------------------------------- - # Invoke CPM find_package() - #----------------------------------------------------- - # Since the RAFT_NVTX option is used by targets generated by - # find_package(RAFT_NVTX) and when building from source we want to - # make `RAFT_NVTX` a cache variable so we get consistent - # behavior - # - set(RAFT_NVTX ${PKG_ENABLE_NVTX} CACHE BOOL "Enable raft nvtx logging" FORCE) - rapids_cpm_find(raft ${PKG_VERSION} - GLOBAL_TARGETS raft::raft - BUILD_EXPORT_SET raft-template-exports - INSTALL_EXPORT_SET raft-template-exports - COMPONENTS ${RAFT_COMPONENTS} - CPM_ARGS - GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git - GIT_TAG ${PKG_PINNED_TAG} - SOURCE_SUBDIR cpp - OPTIONS - "BUILD_TESTS OFF" - "BUILD_PRIMS_BENCH OFF" - "BUILD_ANN_BENCH OFF" - "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}" - ) -endfunction() - -# Change pinned tag here to test a commit in CI -# To use a different RAFT locally, set the CMake variable -# CPM_raft_SOURCE=/path/to/local/raft -find_and_configure_raft(VERSION ${RAFT_VERSION}.00 - FORK ${RAFT_FORK} - PINNED_TAG ${RAFT_PINNED_TAG} - COMPILE_LIBRARY ON - ENABLE_MNMG_DEPENDENCIES OFF - ENABLE_NVTX OFF -) diff --git a/cpp/template/src/cagra_example.cu b/cpp/template/src/cagra_example.cu deleted file mode 100644 index 3c1be8b4f8..0000000000 --- a/cpp/template/src/cagra_example.cu +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.cuh" - -#include -#include -#include -#include - -#include -#include - -#include - -void cagra_build_search_simple(raft::device_resources const& dev_resources, - raft::device_matrix_view dataset, - raft::device_matrix_view queries) -{ - using namespace raft::neighbors; - - int64_t topk = 12; - int64_t n_queries = queries.extent(0); - - // create output arrays - auto neighbors = raft::make_device_matrix(dev_resources, n_queries, topk); - auto distances = raft::make_device_matrix(dev_resources, n_queries, topk); - - // use default index parameters - cagra::index_params index_params; - - std::cout << "Building CAGRA index (search graph)" << std::endl; - auto index = cagra::build(dev_resources, index_params, dataset); - - std::cout << "CAGRA index has " << index.size() << " vectors" << std::endl; - std::cout << "CAGRA graph has degree " << index.graph_degree() << ", graph size [" - << index.graph().extent(0) << ", " << index.graph().extent(1) << "]" << std::endl; - - // use default search parameters - cagra::search_params search_params; - // search K nearest neighbors - cagra::search( - dev_resources, search_params, index, queries, neighbors.view(), distances.view()); - - // The call to ivf_flat::search is asynchronous. Before accessing the data, sync by calling - // raft::resource::sync_stream(dev_resources); - - print_results(dev_resources, neighbors.view(), distances.view()); -} - -int main() -{ - raft::device_resources dev_resources; - - // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr( - rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); - rmm::mr::set_current_device_resource(&pool_mr); - - // Alternatively, one could define a pool allocator for temporary arrays (used within RAFT - // algorithms). In that case only the internal arrays would use the pool, any other allocation - // uses the default RMM memory resource. Here is how to change the workspace memory resource to - // a pool with 2 GiB upper limit. - // raft::resource::set_workspace_to_pool_resource(dev_resources, 2 * 1024 * 1024 * 1024ull); - - // Create input arrays. - int64_t n_samples = 10000; - int64_t n_dim = 90; - int64_t n_queries = 10; - auto dataset = raft::make_device_matrix(dev_resources, n_samples, n_dim); - auto queries = raft::make_device_matrix(dev_resources, n_queries, n_dim); - generate_dataset(dev_resources, dataset.view(), queries.view()); - - // Simple build and search example. - cagra_build_search_simple(dev_resources, - raft::make_const_mdspan(dataset.view()), - raft::make_const_mdspan(queries.view())); -} diff --git a/cpp/template/src/common.cuh b/cpp/template/src/common.cuh deleted file mode 100644 index 3057257537..0000000000 --- a/cpp/template/src/common.cuh +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -// Fill dataset and queries with synthetic data. -void generate_dataset(raft::device_resources const& dev_resources, - raft::device_matrix_view dataset, - raft::device_matrix_view queries) -{ - auto labels = raft::make_device_vector(dev_resources, dataset.extent(0)); - raft::random::make_blobs(dev_resources, dataset, labels.view()); - raft::random::RngState r(1234ULL); - raft::random::uniform(dev_resources, - r, - raft::make_device_vector_view(queries.data_handle(), queries.size()), - -1.0f, - 1.0f); -} - -// Copy the results to host and print them -template -void print_results(raft::device_resources const& dev_resources, - raft::device_matrix_view neighbors, - raft::device_matrix_view distances) -{ - int64_t topk = neighbors.extent(1); - auto neighbors_host = raft::make_host_matrix(neighbors.extent(0), topk); - auto distances_host = raft::make_host_matrix(distances.extent(0), topk); - - cudaStream_t stream = raft::resource::get_cuda_stream(dev_resources); - - raft::copy(neighbors_host.data_handle(), neighbors.data_handle(), neighbors.size(), stream); - raft::copy(distances_host.data_handle(), distances.data_handle(), distances.size(), stream); - - // The calls to RAFT algorithms and raft::copy is asynchronous. - // We need to sync the stream before accessing the data. - raft::resource::sync_stream(dev_resources, stream); - - for (int query_id = 0; query_id < neighbors.extent(0); query_id++) { - std::cout << "Query " << query_id << " neighbor indices: "; - raft::print_host_vector("", &neighbors_host(query_id, 0), topk, std::cout); - std::cout << "Query " << query_id << " neighbor distances: "; - raft::print_host_vector("", &distances_host(query_id, 0), topk, std::cout); - } -} - -/** Subsample the dataset to create a training set*/ -raft::device_matrix subsample( - raft::device_resources const& dev_resources, - raft::device_matrix_view dataset, - raft::device_vector_view data_indices, - float fraction) -{ - int64_t n_samples = dataset.extent(0); - int64_t n_dim = dataset.extent(1); - int64_t n_train = n_samples * fraction; - auto trainset = raft::make_device_matrix(dev_resources, n_train, n_dim); - - int seed = 137; - raft::random::RngState rng(seed); - auto train_indices = raft::make_device_vector(dev_resources, n_train); - - raft::random::sample_without_replacement( - dev_resources, rng, data_indices, std::nullopt, train_indices.view(), std::nullopt); - - raft::matrix::copy_rows( - dev_resources, dataset, trainset.view(), raft::make_const_mdspan(train_indices.view())); - - return trainset; -} diff --git a/cpp/template/src/ivf_flat_example.cu b/cpp/template/src/ivf_flat_example.cu deleted file mode 100644 index 60694aea0f..0000000000 --- a/cpp/template/src/ivf_flat_example.cu +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.cuh" - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include - -void ivf_flat_build_search_simple(raft::device_resources const& dev_resources, - raft::device_matrix_view dataset, - raft::device_matrix_view queries) -{ - using namespace raft::neighbors; - - ivf_flat::index_params index_params; - index_params.n_lists = 1024; - index_params.kmeans_trainset_fraction = 0.1; - index_params.metric = raft::distance::DistanceType::L2Expanded; - - std::cout << "Building IVF-Flat index" << std::endl; - auto index = ivf_flat::build(dev_resources, index_params, dataset); - - std::cout << "Number of clusters " << index.n_lists() << ", number of vectors added to index " - << index.size() << std::endl; - - // Create output arrays. - int64_t topk = 10; - int64_t n_queries = queries.extent(0); - auto neighbors = raft::make_device_matrix(dev_resources, n_queries, topk); - auto distances = raft::make_device_matrix(dev_resources, n_queries, topk); - - // Set search parameters. - ivf_flat::search_params search_params; - search_params.n_probes = 50; - - // Search K nearest neighbors for each of the queries. - ivf_flat::search( - dev_resources, search_params, index, queries, neighbors.view(), distances.view()); - - // The call to ivf_flat::search is asynchronous. Before accessing the data, sync by calling - // raft::resource::sync_stream(dev_resources); - - print_results(dev_resources, neighbors.view(), distances.view()); -} - -void ivf_flat_build_extend_search(raft::device_resources const& dev_resources, - raft::device_matrix_view dataset, - raft::device_matrix_view queries) -{ - using namespace raft::neighbors; - - // Define dataset indices. - auto data_indices = raft::make_device_vector(dev_resources, dataset.extent(0)); - thrust::counting_iterator first(0); - thrust::device_ptr ptr(data_indices.data_handle()); - thrust::copy( - raft::resource::get_thrust_policy(dev_resources), first, first + dataset.extent(0), ptr); - - // Sub-sample the dataset to create a training set. - auto trainset = - subsample(dev_resources, dataset, raft::make_const_mdspan(data_indices.view()), 0.1); - - ivf_flat::index_params index_params; - index_params.n_lists = 100; - index_params.metric = raft::distance::DistanceType::L2Expanded; - index_params.add_data_on_build = false; - - std::cout << "\nRun k-means clustering using the training set" << std::endl; - auto index = - ivf_flat::build(dev_resources, index_params, raft::make_const_mdspan(trainset.view())); - - std::cout << "Number of clusters " << index.n_lists() << ", number of vectors added to index " - << index.size() << std::endl; - - std::cout << "Filling index with the dataset vectors" << std::endl; - index = ivf_flat::extend(dev_resources, - dataset, - std::make_optional(raft::make_const_mdspan(data_indices.view())), - index); - - std::cout << "Index size after addin dataset vectors " << index.size() << std::endl; - - // Set search parameters. - ivf_flat::search_params search_params; - search_params.n_probes = 10; - - // Create output arrays. - int64_t topk = 10; - int64_t n_queries = queries.extent(0); - auto neighbors = raft::make_device_matrix(dev_resources, n_queries, topk); - auto distances = raft::make_device_matrix(dev_resources, n_queries, topk); - - // Search K nearest neighbors for each queries. - ivf_flat::search( - dev_resources, search_params, index, queries, neighbors.view(), distances.view()); - - // The call to ivf_flat::search is asynchronous. Before accessing the data, sync using: - // raft::resource::sync_stream(dev_resources); - - print_results(dev_resources, neighbors.view(), distances.view()); -} - -int main() -{ - raft::device_resources dev_resources; - - // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr( - rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); - rmm::mr::set_current_device_resource(&pool_mr); - - // Alternatively, one could define a pool allocator for temporary arrays (used within RAFT - // algorithms). In that case only the internal arrays would use the pool, any other allocation - // uses the default RMM memory resource. Here is how to change the workspace memory resource to - // a pool with 2 GiB upper limit. - // raft::resource::set_workspace_to_pool_resource(dev_resources, 2 * 1024 * 1024 * 1024ull); - - // Create input arrays. - int64_t n_samples = 10000; - int64_t n_dim = 3; - int64_t n_queries = 10; - auto dataset = raft::make_device_matrix(dev_resources, n_samples, n_dim); - auto queries = raft::make_device_matrix(dev_resources, n_queries, n_dim); - generate_dataset(dev_resources, dataset.view(), queries.view()); - - // Simple build and search example. - ivf_flat_build_search_simple(dev_resources, - raft::make_const_mdspan(dataset.view()), - raft::make_const_mdspan(queries.view())); - - // Build and extend example. - ivf_flat_build_extend_search(dev_resources, - raft::make_const_mdspan(dataset.view()), - raft::make_const_mdspan(queries.view())); -} diff --git a/cpp/template/src/ivf_pq_example.cu b/cpp/template/src/ivf_pq_example.cu deleted file mode 100644 index 4bc0ba4348..0000000000 --- a/cpp/template/src/ivf_pq_example.cu +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "common.cuh" - -#include -#include -#include -#include - -#include -#include - -#include - -void ivf_pq_build_search(raft::device_resources const& dev_resources, - raft::device_matrix_view dataset, - raft::device_matrix_view queries) -{ - using namespace raft::neighbors; // NOLINT - - ivf_pq::index_params index_params; - index_params.n_lists = 1024; - index_params.kmeans_trainset_fraction = 0.1; - index_params.metric = raft::distance::DistanceType::L2Expanded; - index_params.pq_bits = 8; - index_params.pq_dim = 2; - - std::cout << "Building IVF-PQ index" << std::endl; - auto index = ivf_pq::build(dev_resources, index_params, dataset); - - std::cout << "Number of clusters " << index.n_lists() << ", number of vectors added to index " - << index.size() << std::endl; - - // Set search parameters. - ivf_pq::search_params search_params; - search_params.n_probes = 50; - // Set the internal search precision to 16-bit floats; - // usually, this improves the performance at a slight cost to the recall. - search_params.internal_distance_dtype = CUDA_R_16F; - search_params.lut_dtype = CUDA_R_16F; - - // Create output arrays. - int64_t topk = 10; - int64_t n_queries = queries.extent(0); - auto neighbors = raft::make_device_matrix(dev_resources, n_queries, topk); - auto distances = raft::make_device_matrix(dev_resources, n_queries, topk); - - // Search K nearest neighbors for each of the queries. - ivf_pq::search( - dev_resources, search_params, index, queries, neighbors.view(), distances.view()); - - // Re-ranking operation: refine the initial search results by computing exact distances - int64_t topk_refined = 7; - auto neighbors_refined = - raft::make_device_matrix(dev_resources, n_queries, topk_refined); - auto distances_refined = raft::make_device_matrix(dev_resources, n_queries, topk_refined); - - // Note, refinement requires the original dataset and the queries. - // Don't forget to specify the same distance metric as used by the index. - raft::neighbors::refine(dev_resources, - dataset, - queries, - raft::make_const_mdspan(neighbors.view()), - neighbors_refined.view(), - distances_refined.view(), - index.metric()); - - // Show both the original and the refined results - std::cout << std::endl << "Original results:" << std::endl; - print_results(dev_resources, neighbors.view(), distances.view()); - std::cout << std::endl << "Refined results:" << std::endl; - print_results(dev_resources, neighbors_refined.view(), distances_refined.view()); -} - -int main() -{ - raft::device_resources dev_resources; - - // Set pool memory resource with 1 GiB initial pool size. All allocations use the same pool. - rmm::mr::pool_memory_resource pool_mr( - rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull); - rmm::mr::set_current_device_resource(&pool_mr); - - // Alternatively, one could define a pool allocator for temporary arrays (used within RAFT - // algorithms). In that case only the internal arrays would use the pool, any other allocation - // uses the default RMM memory resource. Here is how to change the workspace memory resource to - // a pool with 2 GiB upper limit. - // raft::resource::set_workspace_to_pool_resource(dev_resources, 2 * 1024 * 1024 * 1024ull); - - // Create input arrays. - int64_t n_samples = 10000; - int64_t n_dim = 3; - int64_t n_queries = 10; - auto dataset = raft::make_device_matrix(dev_resources, n_samples, n_dim); - auto queries = raft::make_device_matrix(dev_resources, n_queries, n_dim); - generate_dataset(dev_resources, dataset.view(), queries.view()); - - // Simple build and search example. - ivf_pq_build_search(dev_resources, - raft::make_const_mdspan(dataset.view()), - raft::make_const_mdspan(queries.view())); -} diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 5d504d2100..ffc5e3273f 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -91,415 +91,240 @@ endfunction() # test sources ################################################################################## # ################################################################################################## -# ################################################################################################## -# * distance tests ------------------------------------------------------------------------- - -if(BUILD_TESTS) - ConfigureTest( - NAME CLUSTER_TEST PATH cluster/kmeans.cu cluster/kmeans_balanced.cu cluster/kmeans_find_k.cu - cluster/cluster_solvers.cu cluster/linkage.cu cluster/spectral.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - CORE_TEST - PATH - core/bitmap.cu - core/bitset.cu - core/device_resources_manager.cpp - core/device_setter.cpp - core/logger.cpp - core/math_device.cu - core/math_host.cpp - core/operators_device.cu - core/operators_host.cpp - core/handle.cpp - core/interruptible.cu - core/nvtx.cpp - core/mdarray.cu - core/mdbuffer.cu - core/mdspan_copy.cpp - core/mdspan_copy.cu - core/mdspan_utils.cu - core/numpy_serializer.cu - core/memory_type.cpp - core/sparse_matrix.cu - core/sparse_matrix.cpp - core/span.cpp - core/span.cu - core/stream_view.cpp - core/temporary_device_buffer.cu - test.cpp - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME CORE_TEST PATH core/stream_view.cpp core/mdspan_copy.cpp LIB EXPLICIT_INSTANTIATE_ONLY - NOCUDA - ) - - ConfigureTest( - NAME - DISTANCE_TEST - PATH - distance/dist_adj.cu - distance/dist_adj_distance_instance.cu - distance/dist_canberra.cu - distance/dist_correlation.cu - distance/dist_cos.cu - distance/dist_dice.cu - distance/dist_hamming.cu - distance/dist_hellinger.cu - distance/dist_inner_product.cu - distance/dist_jensen_shannon.cu - distance/dist_kl_divergence.cu - distance/dist_l1.cu - distance/dist_l2_exp.cu - distance/dist_l2_unexp.cu - distance/dist_l2_sqrt_exp.cu - distance/dist_l_inf.cu - distance/dist_lp_unexp.cu - distance/dist_russell_rao.cu - distance/masked_nn.cu - distance/masked_nn_compress_to_bits.cu - distance/fused_l2_nn.cu - distance/fused_cosine_nn.cu - distance/gram.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - list( - APPEND - EXT_HEADER_TEST_SOURCES - ext_headers/raft_neighbors_brute_force.cu - ext_headers/raft_distance_distance.cu - ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - ext_headers/raft_matrix_detail_select_k.cu - ext_headers/raft_neighbors_ball_cover.cu - ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - ext_headers/raft_distance_fused_l2_nn.cu - ext_headers/raft_neighbors_ivf_pq.cu - ext_headers/raft_neighbors_ivf_flat.cu - ext_headers/raft_core_logger.cpp - ext_headers/raft_neighbors_refine.cu - ext_headers/raft_neighbors_detail_ivf_flat_search.cu - ext_headers/raft_linalg_detail_coalesced_reduction.cu - ext_headers/raft_sparse_matrix_detail_select_k.cu - ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - ) - - # Test that the split headers compile in isolation with: - # - # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - EXPLICIT_INSTANTIATE_ONLY - ) - ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - ConfigureTest(NAME LABEL_TEST PATH label/label.cu label/merge_labels.cu) - - ConfigureTest( - NAME - LINALG_TEST - PATH - linalg/add.cu - linalg/axpy.cu - linalg/binary_op.cu - linalg/cholesky_r1.cu - linalg/coalesced_reduction.cu - linalg/divide.cu - linalg/dot.cu - linalg/eig.cu - linalg/eig_sel.cu - linalg/gemm_layout.cu - linalg/gemv.cu - linalg/map.cu - linalg/map_then_reduce.cu - linalg/matrix_vector.cu - linalg/matrix_vector_op.cu - linalg/mean_squared_error.cu - linalg/multiply.cu - linalg/norm.cu - linalg/normalize.cu - linalg/power.cu - linalg/randomized_svd.cu - linalg/reduce.cu - linalg/reduce_cols_by_key.cu - linalg/reduce_rows_by_key.cu - linalg/rsvd.cu - linalg/sqrt.cu - linalg/strided_reduction.cu - linalg/subtract.cu - linalg/svd.cu - linalg/ternary_op.cu - linalg/transpose.cu - linalg/unary_op.cu - ) - - ConfigureTest( - NAME - MATRIX_TEST - PATH - matrix/argmax.cu - matrix/argmin.cu - matrix/columnSort.cu - matrix/diagonal.cu - matrix/gather.cu - matrix/scatter.cu - matrix/eye.cu - matrix/linewise_op.cu - matrix/math.cu - matrix/matrix.cu - matrix/norm.cu - matrix/reverse.cu - matrix/sample_rows.cu - matrix/slice.cu - matrix/triangular.cu - sparse/spectral_matrix.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest(NAME MATRIX_SELECT_TEST PATH matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - - ConfigureTest( - NAME MATRIX_SELECT_LARGE_TEST PATH matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - RANDOM_TEST - PATH - random/make_blobs.cu - random/make_regression.cu - random/multi_variable_gaussian.cu - random/rng_pcg_host_api.cu - random/permute.cu - random/rng.cu - random/rng_discrete.cu - random/rng_int.cu - random/rmat_rectangular_generator.cu - random/sample_without_replacement.cu - random/excess_sampling.cu - ) - - ConfigureTest( - NAME SOLVERS_TEST PATH cluster/cluster_solvers_deprecated.cu linalg/eigen_solvers.cu lap/lap.cu - sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_TEST - PATH - sparse/add.cu - sparse/convert_coo.cu - sparse/convert_csr.cu - sparse/csr_row_slice.cu - sparse/csr_to_dense.cu - sparse/csr_transpose.cu - sparse/degree.cu - sparse/filter.cu - sparse/masked_matmul.cu - sparse/norm.cu - sparse/normalize.cu - sparse/reduce.cu - sparse/row_op.cu - sparse/sddmm.cu - sparse/select_k_csr.cu - sparse/sort.cu - sparse/spgemmi.cu - sparse/spmm.cu - sparse/symmetrize.cu - ) - - ConfigureTest( - NAME SPARSE_DIST_TEST PATH sparse/dist_coo_spmv.cu sparse/distance.cu sparse/gram.cu LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME SPARSE_NEIGHBORS_TEST PATH sparse/neighbors/cross_component_nn.cu - sparse/neighbors/brute_force.cu sparse/neighbors/knn_graph.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - NEIGHBORS_TEST - PATH - neighbors/knn.cu - neighbors/fused_l2_knn.cu - neighbors/tiled_knn.cu - neighbors/haversine.cu - neighbors/ball_cover.cu - neighbors/epsilon_neighborhood.cu - neighbors/refine.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME NEIGHBORS_ANN_BRUTE_FORCE_TEST PATH neighbors/ann_brute_force/test_float.cu LIB - EXPLICIT_INSTANTIATE_ONLY GPUS 1 PERCENT 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_CAGRA_TEST - PATH - neighbors/ann_cagra/test_float_uint32_t.cu - neighbors/ann_cagra/test_half_uint32_t.cu - neighbors/ann_cagra/test_int8_t_uint32_t.cu - neighbors/ann_cagra/test_uint8_t_uint32_t.cu - neighbors/ann_cagra/test_float_int64_t.cu - neighbors/ann_cagra/test_half_int64_t.cu - neighbors/ann_cagra_vpq/test_float_int64_t.cu - neighbors/ann_cagra_vpq/test_float_uint32_t.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_IVF_TEST - PATH - neighbors/ann_ivf_flat/test_filter_float_int64_t.cu - neighbors/ann_ivf_flat/test_float_int64_t.cu - neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu - neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu - neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_float_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_half_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_half_filt32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset32.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu - ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu - neighbors/ann_ivf_pq/test_float_uint32_t.cu - neighbors/ann_ivf_pq/test_float_int64_t.cu - neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - neighbors/ann_ivf_pq/test_filter_float_int64_t.cu - neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - NEIGHBORS_ANN_NN_DESCENT_TEST - PATH - neighbors/ann_nn_descent/test_float_uint32_t.cu - neighbors/ann_nn_descent/test_int8_t_uint32_t.cu - neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - # TODO: Investigate why this test is failing Reference issue - # https://github.com/rapidsai/raft/issues/2450 - # neighbors/ann_nn_descent/test_batch_float_uint32_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME - STATS_TEST - PATH - stats/accuracy.cu - stats/adjusted_rand_index.cu - stats/completeness_score.cu - stats/contingencyMatrix.cu - stats/cov.cu - stats/dispersion.cu - stats/entropy.cu - stats/histogram.cu - stats/homogeneity_score.cu - stats/information_criterion.cu - stats/kl_divergence.cu - stats/mean.cu - stats/meanvar.cu - stats/mean_center.cu - stats/minmax.cu - stats/mutual_info_score.cu - stats/neighborhood_recall.cu - stats/r2_score.cu - stats/rand_index.cu - stats/regression_metrics.cu - stats/silhouette_score.cu - stats/stddev.cu - stats/sum.cu - stats/trustworthiness.cu - stats/weighted_mean.cu - stats/v_measure.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - UTILS_TEST - PATH - core/seive.cu - util/bitonic_sort.cu - util/cudart_utils.cpp - util/device_atomics.cu - util/integer_utils.cpp - util/integer_utils.cu - util/memory_type_dispatcher.cu - util/popc.cu - util/pow2_utils.cu - util/reduction.cu - ) +ConfigureTest( + NAME + CORE_TEST + PATH + core/bitmap.cu + core/bitset.cu + core/device_resources_manager.cpp + core/device_setter.cpp + core/logger.cpp + core/math_device.cu + core/math_host.cpp + core/operators_device.cu + core/operators_host.cpp + core/handle.cpp + core/interruptible.cu + core/nvtx.cpp + core/mdarray.cu + core/mdbuffer.cu + core/mdspan_copy.cpp + core/mdspan_copy.cu + core/mdspan_utils.cu + core/numpy_serializer.cu + core/memory_type.cpp + core/sparse_matrix.cu + core/sparse_matrix.cpp + core/span.cpp + core/span.cu + core/stream_view.cpp + core/temporary_device_buffer.cu + test.cpp + LIB + EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME CORE_TEST PATH core/stream_view.cpp core/mdspan_copy.cpp LIB EXPLICIT_INSTANTIATE_ONLY + NOCUDA +) + +list(APPEND EXT_HEADER_TEST_SOURCES ext_headers/raft_core_logger.cpp) + +# Test that the split headers compile in isolation with: +# +# * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined +# * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined +# * EXT_HEADERS_TEST_IMPLICIT: no macros defined. +ConfigureTest( + NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + EXPLICIT_INSTANTIATE_ONLY +) +ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) +ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + +ConfigureTest(NAME LABEL_TEST PATH label/label.cu label/merge_labels.cu) + +ConfigureTest( + NAME + LINALG_TEST + PATH + linalg/add.cu + linalg/axpy.cu + linalg/binary_op.cu + linalg/cholesky_r1.cu + linalg/coalesced_reduction.cu + linalg/divide.cu + linalg/dot.cu + linalg/eig.cu + linalg/eig_sel.cu + linalg/gemm_layout.cu + linalg/gemv.cu + linalg/map.cu + linalg/map_then_reduce.cu + linalg/matrix_vector.cu + linalg/matrix_vector_op.cu + linalg/mean_squared_error.cu + linalg/multiply.cu + linalg/norm.cu + linalg/normalize.cu + linalg/power.cu + linalg/randomized_svd.cu + linalg/reduce.cu + linalg/reduce_cols_by_key.cu + linalg/reduce_rows_by_key.cu + linalg/rsvd.cu + linalg/sqrt.cu + linalg/strided_reduction.cu + linalg/subtract.cu + linalg/svd.cu + linalg/ternary_op.cu + linalg/transpose.cu + linalg/unary_op.cu +) + +ConfigureTest( + NAME + MATRIX_TEST + PATH + matrix/argmax.cu + matrix/argmin.cu + matrix/columnSort.cu + matrix/diagonal.cu + matrix/gather.cu + matrix/scatter.cu + matrix/eye.cu + matrix/linewise_op.cu + matrix/math.cu + matrix/matrix.cu + matrix/norm.cu + matrix/reverse.cu + matrix/sample_rows.cu + matrix/slice.cu + matrix/triangular.cu + sparse/spectral_matrix.cu + LIB + EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest(NAME MATRIX_SELECT_TEST PATH matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + +ConfigureTest( + NAME MATRIX_SELECT_LARGE_TEST PATH matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME + RANDOM_TEST + PATH + random/make_blobs.cu + random/make_regression.cu + random/multi_variable_gaussian.cu + random/rng_pcg_host_api.cu + random/permute.cu + random/rng.cu + random/rng_discrete.cu + random/rng_int.cu + random/rmat_rectangular_generator.cu + random/sample_without_replacement.cu + random/excess_sampling.cu +) + +ConfigureTest( + NAME SOLVERS_TEST PATH cluster/cluster_solvers_deprecated.cu linalg/eigen_solvers.cu lap/lap.cu + sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME + SPARSE_TEST + PATH + sparse/add.cu + sparse/convert_coo.cu + sparse/convert_csr.cu + sparse/csr_row_slice.cu + sparse/csr_to_dense.cu + sparse/csr_transpose.cu + sparse/degree.cu + sparse/filter.cu + sparse/masked_matmul.cu + sparse/norm.cu + sparse/normalize.cu + sparse/reduce.cu + sparse/row_op.cu + sparse/sddmm.cu + sparse/select_k_csr.cu + sparse/sort.cu + sparse/spgemmi.cu + sparse/spmm.cu + sparse/symmetrize.cu +) + +ConfigureTest( + NAME SPARSE_DIST_TEST PATH sparse/dist_coo_spmv.cu sparse/distance.cu sparse/gram.cu LIB + EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME SPARSE_NEIGHBORS_TEST PATH sparse/neighbors/cross_component_nn.cu + sparse/neighbors/brute_force.cu sparse/neighbors/knn_graph.cu LIB EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME NEIGHBORS_TEST PATH neighbors/haversine.cu neighbors/ball_cover.cu + neighbors/epsilon_neighborhood.cu LIB EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME + STATS_TEST + PATH + stats/accuracy.cu + stats/adjusted_rand_index.cu + stats/completeness_score.cu + stats/contingencyMatrix.cu + stats/cov.cu + stats/dispersion.cu + stats/entropy.cu + stats/histogram.cu + stats/homogeneity_score.cu + stats/information_criterion.cu + stats/kl_divergence.cu + stats/mean.cu + stats/meanvar.cu + stats/mean_center.cu + stats/minmax.cu + stats/mutual_info_score.cu + stats/neighborhood_recall.cu + stats/r2_score.cu + stats/rand_index.cu + stats/regression_metrics.cu + stats/silhouette_score.cu + stats/stddev.cu + stats/sum.cu + stats/trustworthiness.cu + stats/weighted_mean.cu + stats/v_measure.cu + LIB + EXPLICIT_INSTANTIATE_ONLY +) + +ConfigureTest( + NAME + UTILS_TEST + PATH + core/seive.cu + util/bitonic_sort.cu + util/cudart_utils.cpp + util/device_atomics.cu + util/integer_utils.cpp + util/integer_utils.cu + util/memory_type_dispatcher.cu + util/popc.cu + util/pow2_utils.cu + util/reduction.cu +) endif() # ################################################################################################## diff --git a/cpp/test/neighbors/ann_brute_force.cuh b/cpp/test/neighbors/ann_brute_force.cuh deleted file mode 100644 index 6370c5ee83..0000000000 --- a/cpp/test/neighbors/ann_brute_force.cuh +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "../test_utils.cuh" -#include "ann_utils.cuh" -#include "knn_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include - -namespace raft::neighbors::brute_force { - -template -struct AnnBruteForceInputs { - IdxT num_queries; - IdxT num_db_vecs; - IdxT dim; - IdxT k; - raft::distance::DistanceType metric; - bool host_dataset; -}; - -template -::std::ostream& operator<<(::std::ostream& os, const AnnBruteForceInputs& p) -{ - os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", " - << static_cast(p.metric) << ", " << p.host_dataset << '}' << std::endl; - return os; -} - -template -class AnnBruteForceTest : public ::testing::TestWithParam> { - public: - AnnBruteForceTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam>::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - void testBruteForce() - { - size_t queries_size = ps.num_queries * ps.k; - - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database.data(), - ps.num_queries, - ps.num_db_vecs, - ps.dim, - ps.k, - ps.metric); - resource::sync_stream(handle_); - - { - // Require exact result for brute force - rmm::device_uvector distances_bruteforce_dev(queries_size, stream_); - rmm::device_uvector indices_bruteforce_dev(queries_size, stream_); - brute_force::index_params index_params{}; - brute_force::search_params search_params{}; - index_params.metric = ps.metric; - index_params.metric_arg = 0; - - auto device_dataset = std::optional>{}; - auto idx = [this, &index_params]() { - if (ps.host_dataset) { - auto host_database = raft::make_host_matrix(ps.num_db_vecs, ps.dim); - raft::copy( - host_database.data_handle(), database.data(), ps.num_db_vecs * ps.dim, stream_); - return brute_force::build( - handle_, index_params, raft::make_const_mdspan(host_database.view())); - } else { - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.num_db_vecs, ps.dim); - return brute_force::build(handle_, index_params, database_view); - } - }(); - - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.num_queries, ps.dim); - auto indices_out_view = raft::make_device_matrix_view( - indices_bruteforce_dev.data(), ps.num_queries, ps.k); - auto dists_out_view = raft::make_device_matrix_view( - distances_bruteforce_dev.data(), ps.num_queries, ps.k); - brute_force::serialize(handle_, std::string{"brute_force_index"}, idx); - - auto index_loaded = - brute_force::deserialize(handle_, std::string{"brute_force_index"}); - ASSERT_EQ(idx.size(), index_loaded.size()); - - brute_force::search(handle_, - search_params, - index_loaded, - search_queries_view, - indices_out_view, - dists_out_view); - - resource::sync_stream(handle_); - - ASSERT_TRUE(raft::spatial::knn::devArrMatchKnnPair(indices_naive_dev.data(), - indices_bruteforce_dev.data(), - distances_naive_dev.data(), - distances_bruteforce_dev.data(), - ps.num_queries, - ps.k, - 0.001f, - stream_, - true)); - brute_force::serialize(handle_, std::string{"brute_force_index"}, idx, false); - index_loaded = brute_force::deserialize(handle_, std::string{"brute_force_index"}); - index_loaded.update_dataset(handle_, idx.dataset()); - ASSERT_EQ(idx.size(), index_loaded.size()); - - brute_force::search(handle_, - search_params, - index_loaded, - search_queries_view, - indices_out_view, - dists_out_view); - - resource::sync_stream(handle_); - - ASSERT_TRUE(raft::spatial::knn::devArrMatchKnnPair(indices_naive_dev.data(), - indices_bruteforce_dev.data(), - distances_naive_dev.data(), - distances_bruteforce_dev.data(), - ps.num_queries, - ps.k, - 0.001f, - stream_, - true)); - } - } - - void SetUp() override - { - database.resize(ps.num_db_vecs * ps.dim, stream_); - search_queries.resize(ps.num_queries * ps.dim, stream_); - - raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { - raft::random::uniform( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); - raft::random::uniform( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20)); - raft::random::uniformInt( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20)); - } - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnBruteForceInputs ps; - rmm::device_uvector database; - rmm::device_uvector search_queries; -}; - -const std::vector> inputs = { - // test various dims (aligned and not aligned to vector sizes) - {1000, 10000, 1, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 2, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 3, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 4, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 5, 16, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 8, 16, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 5, 16, raft::distance::DistanceType::L2SqrtExpanded, true}, - {1000, 10000, 8, 16, raft::distance::DistanceType::L2SqrtExpanded, true}, - - // test dims that do not fit into kernel shared memory limits - {1000, 10000, 2048, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 2049, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 2050, 16, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 2051, 16, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 2052, 16, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 2053, 16, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 2056, 16, raft::distance::DistanceType::L2Expanded, true}, - - // host input data - {1000, 10000, 16, 10, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 16, 10, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 16, 10, raft::distance::DistanceType::L2Expanded, false}, - {100, 10000, 16, 10, raft::distance::DistanceType::L2Expanded, false}, - {20, 100000, 16, 10, raft::distance::DistanceType::L2Expanded, false}, - {1000, 100000, 16, 10, raft::distance::DistanceType::L2Expanded, false}, - {10000, 131072, 8, 10, raft::distance::DistanceType::L2Expanded, false}, - - {1000, 10000, 16, 10, raft::distance::DistanceType::InnerProduct, false}}; -} // namespace raft::neighbors::brute_force diff --git a/cpp/test/neighbors/ann_brute_force/test_float.cu b/cpp/test/neighbors/ann_brute_force/test_float.cu deleted file mode 100644 index f157b5f65c..0000000000 --- a/cpp/test/neighbors/ann_brute_force/test_float.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_brute_force.cuh" - -#include - -namespace raft::neighbors::brute_force { - -using AnnBruteForceTest_float = AnnBruteForceTest; -TEST_P(AnnBruteForceTest_float, AnnBruteForce) { this->testBruteForce(); } - -INSTANTIATE_TEST_CASE_P(AnnBruteForceTest, AnnBruteForceTest_float, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::brute_force diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh deleted file mode 100644 index cc787d3e57..0000000000 --- a/cpp/test/neighbors/ann_cagra.cuh +++ /dev/null @@ -1,949 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Search with filter instantiation - -#include "../test_utils.cuh" -#include "ann_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -#include - -#include -#include -#include -#include - -namespace raft::neighbors::cagra { -namespace { - -/* A filter that excludes all indices below `offset`. */ -struct test_cagra_sample_filter { - static constexpr unsigned offset = 300; - inline _RAFT_HOST_DEVICE auto operator()( - // query index - const uint32_t query_ix, - // the index of the current sample inside the current inverted list - const uint32_t sample_ix) const - { - return sample_ix >= offset; - } -}; - -// For sort_knn_graph test -template -void RandomSuffle(raft::host_matrix_view index) -{ - for (IdxT i = 0; i < index.extent(0); i++) { - uint64_t rand = i; - IdxT* const row_ptr = index.data_handle() + i * index.extent(1); - for (unsigned j = 0; j < index.extent(1); j++) { - // Swap two indices at random - rand = raft::neighbors::cagra::detail::device::xorshift64(rand); - const auto i0 = rand % index.extent(1); - rand = raft::neighbors::cagra::detail::device::xorshift64(rand); - const auto i1 = rand % index.extent(1); - - const auto tmp = row_ptr[i0]; - row_ptr[i0] = row_ptr[i1]; - row_ptr[i1] = tmp; - } - } -} - -template -testing::AssertionResult CheckOrder(raft::host_matrix_view index_test, - raft::host_matrix_view dataset, - raft::distance::DistanceType metric) -{ - for (IdxT i = 0; i < index_test.extent(0); i++) { - const DatatT* const base_vec = dataset.data_handle() + i * dataset.extent(1); - const IdxT* const index_row = index_test.data_handle() + i * index_test.extent(1); - DistanceT prev_distance = metric == raft::distance::DistanceType::L2Expanded - ? 0 - : std::numeric_limits::max(); - for (unsigned j = 0; j < index_test.extent(1) - 1; j++) { - const DatatT* const target_vec = dataset.data_handle() + index_row[j] * dataset.extent(1); - DistanceT distance = 0; - switch (metric) { - case raft::distance::DistanceType::L2Expanded: - for (unsigned l = 0; l < dataset.extent(1); l++) { - const auto diff = - static_cast(target_vec[l]) - static_cast(base_vec[l]); - distance += diff * diff; - } - if (prev_distance > distance) { - return testing::AssertionFailure() - << "Wrong index order (row = " << i << ", neighbor_id = " << j - << "). (distance[neighbor_id-1] = " << prev_distance - << "should be lesser than distance[neighbor_id] = " << distance << ")"; - } - break; - case raft::distance::DistanceType::InnerProduct: - for (unsigned l = 0; l < dataset.extent(1); l++) { - const auto prod = - static_cast(target_vec[l]) * static_cast(base_vec[l]); - distance += prod; - } - if (prev_distance < distance) { - return testing::AssertionFailure() - << "Wrong index order (row = " << i << ", neighbor_id = " << j - << "). (distance[neighbor_id-1] = " << prev_distance - << "should be greater than distance[neighbor_id] = " << distance << ")"; - } - break; - default: - return testing::AssertionFailure() - << "Distance metric " << metric - << " not supported. Only L2Expanded and InnerProduct are supported"; - } - prev_distance = distance; - } - } - return testing::AssertionSuccess(); -} - -template -struct fpi_mapper {}; - -template <> -struct fpi_mapper { - using type = int64_t; - static constexpr int kBitshiftBase = 53; -}; - -template <> -struct fpi_mapper { - using type = int32_t; - static constexpr int kBitshiftBase = 24; -}; - -template <> -struct fpi_mapper { - using type = int16_t; - static constexpr int kBitshiftBase = 11; -}; - -// Generate dataset to ensure no rounding error occurs in the norm computation of any two vectors. -// When testing the CAGRA index sorting function, rounding errors can affect the norm and alter the -// order of the index. To ensure the accuracy of the test, we utilize the dataset. The generation -// method is based on the error-free transformation (EFT) method. -template -RAFT_KERNEL GenerateRoundingErrorFreeDataset_kernel(T* const ptr, - const uint32_t size, - const typename fpi_mapper::type resolution) -{ - const auto tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid >= size) { return; } - - const float u32 = *reinterpret_cast::type*>(ptr + tid); - ptr[tid] = u32 / resolution; -} - -template -void GenerateRoundingErrorFreeDataset( - const raft::resources& handle, - T* const ptr, - const uint32_t n_row, - const uint32_t dim, - raft::random::RngState& rng, - const bool diff_flag // true if compute the norm between two vectors -) -{ - using mapper_type = fpi_mapper; - using int_type = typename mapper_type::type; - auto cuda_stream = resource::get_cuda_stream(handle); - const uint32_t size = n_row * dim; - const uint32_t block_size = 256; - const uint32_t grid_size = (size + block_size - 1) / block_size; - - const auto bitshift = (mapper_type::kBitshiftBase - std::log2(dim) - (diff_flag ? 1 : 0)) / 2; - // Skip the test when `dim` is too big for type `T` to allow rounding error-free test. - if (bitshift <= 1) { GTEST_SKIP(); } - const int_type resolution = int_type{1} << static_cast(std::floor(bitshift)); - raft::random::uniformInt( - handle, rng, reinterpret_cast(ptr), size, -resolution, resolution - 1); - - GenerateRoundingErrorFreeDataset_kernel - <<>>(ptr, size, resolution); -} - -template -void InitDataset(const raft::resources& handle, - DataT* const datatset_ptr, - std::uint32_t size, - std::uint32_t dim, - raft::distance::DistanceType metric, - raft::random::RngState& r) -{ - if constexpr (std::is_same_v || std::is_same_v) { - GenerateRoundingErrorFreeDataset(handle, datatset_ptr, size, dim, r, true); - - if (metric == raft::distance::InnerProduct) { - auto dataset_view = raft::make_device_matrix_view(datatset_ptr, size, dim); - raft::linalg::row_normalize( - handle, raft::make_const_mdspan(dataset_view), dataset_view, raft::linalg::L2Norm); - } - } else if constexpr (std::is_same_v || std::is_same_v) { - if constexpr (std::is_same_v) { - raft::random::uniformInt(handle, r, datatset_ptr, size * dim, DataT(-10), DataT(10)); - } else { - raft::random::uniformInt(handle, r, datatset_ptr, size * dim, DataT(1), DataT(20)); - } - - if (metric == raft::distance::InnerProduct) { - // TODO (enp1s0): Change this once row_normalize supports (u)int8 matrices. - // https://github.com/rapidsai/raft/issues/2291 - - using ComputeT = float; - auto dataset_view = raft::make_device_matrix_view(datatset_ptr, size, dim); - auto dev_row_norm = raft::make_device_vector(handle, size); - const auto normalized_norm = - (std::is_same_v ? 40 : 20) * std::sqrt(static_cast(dim)); - - raft::linalg::reduce(dev_row_norm.data_handle(), - datatset_ptr, - dim, - size, - 0.f, - true, - true, - resource::get_cuda_stream(handle), - false, - raft::sq_op(), - raft::add_op(), - raft::sqrt_op()); - raft::linalg::matrix_vector_op( - handle, - raft::make_const_mdspan(dataset_view), - raft::make_const_mdspan(dev_row_norm.view()), - dataset_view, - raft::linalg::Apply::ALONG_COLUMNS, - [normalized_norm] __device__(DataT elm, ComputeT norm) { - const ComputeT v = elm / norm * normalized_norm; - const ComputeT max_v_range = std::numeric_limits::max(); - const ComputeT min_v_range = std::numeric_limits::min(); - return static_cast(std::min(max_v_range, std::max(min_v_range, v))); - }); - } - } -} -} // namespace - -struct AnnCagraInputs { - int n_queries; - int n_rows; - int dim; - int k; - graph_build_algo build_algo; - search_algo algo; - int max_queries; - int team_size; - int itopk_size; - int search_width; - raft::distance::DistanceType metric; - bool host_dataset; - bool include_serialized_dataset; - // std::optional - double min_recall; // = std::nullopt; -}; - -inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraInputs& p) -{ - std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; - std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; - os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim - << ", k=" << p.k << ", " << algo.at((int)p.algo) << ", max_queries=" << p.max_queries - << ", itopk_size=" << p.itopk_size << ", search_width=" << p.search_width - << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") - << ", build_algo=" << build_algo.at((int)p.build_algo) << '}' << std::endl; - return os; -} - -template -class AnnCagraTest : public ::testing::TestWithParam { - public: - AnnCagraTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - protected: - void testCagra() - { - // TODO (tarang-jain): remove when NN Descent index building support InnerProduct. Reference - // issue: https://github.com/rapidsai/raft/issues/2276 - if (ps.metric == distance::InnerProduct && ps.build_algo == graph_build_algo::NN_DESCENT) - GTEST_SKIP(); - - size_t queries_size = ps.n_queries * ps.k; - std::vector indices_Cagra(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_Cagra(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database.data(), - ps.n_queries, - ps.n_rows, - ps.dim, - ps.k, - ps.metric); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - rmm::device_uvector distances_dev(queries_size, stream_); - rmm::device_uvector indices_dev(queries_size, stream_); - - { - cagra::index_params index_params; - index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is - // not used for knn_graph building. - index_params.build_algo = ps.build_algo; - cagra::search_params search_params; - search_params.algo = ps.algo; - search_params.max_queries = ps.max_queries; - search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); - - { - cagra::index index(handle_); - if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, database_view); - }; - cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset); - } - - auto index = cagra::deserialize(handle_, "cagra_index"); - if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } - - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.n_queries, ps.dim); - auto indices_out_view = - raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); - auto dists_out_view = raft::make_device_matrix_view( - distances_dev.data(), ps.n_queries, ps.k); - - cagra::search( - handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); - update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); - update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - // for (int i = 0; i < min(ps.n_queries, 10); i++) { - // // std::cout << "query " << i << std::end; - // print_vector("T", indices_naive.data() + i * ps.k, ps.k, std::cout); - // print_vector("C", indices_Cagra.data() + i * ps.k, ps.k, std::cout); - // print_vector("T", distances_naive.data() + i * ps.k, ps.k, std::cout); - // print_vector("C", distances_Cagra.data() + i * ps.k, ps.k, std::cout); - // } - - double min_recall = ps.min_recall; - EXPECT_TRUE(eval_neighbours(indices_naive, - indices_Cagra, - distances_naive, - distances_Cagra, - ps.n_queries, - ps.k, - 0.003, - min_recall)); - EXPECT_TRUE(eval_distances(handle_, - database.data(), - search_queries.data(), - indices_dev.data(), - distances_dev.data(), - ps.n_rows, - ps.dim, - ps.n_queries, - ps.k, - ps.metric, - 1.0e-4)); - } - } - - void SetUp() override - { - database.resize(((size_t)ps.n_rows) * ps.dim, stream_); - search_queries.resize(ps.n_queries * ps.dim, stream_); - raft::random::RngState r(1234ULL); - InitDataset(handle_, database.data(), ps.n_rows, ps.dim, ps.metric, r); - InitDataset(handle_, search_queries.data(), ps.n_queries, ps.dim, ps.metric, r); - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnCagraInputs ps; - rmm::device_uvector database; - rmm::device_uvector search_queries; -}; - -template -class AnnCagraSortTest : public ::testing::TestWithParam { - public: - AnnCagraSortTest() - : ps(::testing::TestWithParam::GetParam()), database(0, handle_.get_stream()) - { - } - - protected: - void testCagraSort() - { - if (ps.metric == distance::InnerProduct && ps.build_algo == graph_build_algo::NN_DESCENT) - GTEST_SKIP(); - - { - // Step 1: Build a sorted KNN graph by CAGRA knn build - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy( - database_host.data_handle(), database.data(), database.size(), handle_.get_stream()); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - - cagra::index_params index_params; - auto knn_graph = - raft::make_host_matrix(ps.n_rows, index_params.intermediate_graph_degree); - - if (ps.build_algo == graph_build_algo::IVF_PQ) { - auto build_params = ivf_pq::index_params::from_dataset(database_view, ps.metric); - if (ps.host_dataset) { - cagra::build_knn_graph( - handle_, database_host_view, knn_graph.view(), 2, build_params); - } else { - cagra::build_knn_graph( - handle_, database_view, knn_graph.view(), 2, build_params); - } - } else { - auto nn_descent_idx_params = experimental::nn_descent::index_params{}; - nn_descent_idx_params.graph_degree = index_params.intermediate_graph_degree; - nn_descent_idx_params.intermediate_graph_degree = index_params.intermediate_graph_degree; - - if (ps.host_dataset) { - cagra::build_knn_graph( - handle_, database_host_view, knn_graph.view(), nn_descent_idx_params); - } else { - cagra::build_knn_graph( - handle_, database_host_view, knn_graph.view(), nn_descent_idx_params); - } - } - - handle_.sync_stream(); - ASSERT_TRUE(CheckOrder(knn_graph.view(), database_host.view(), ps.metric)); - - if (ps.metric != raft::distance::DistanceType::InnerProduct) { - RandomSuffle(knn_graph.view()); - - cagra::sort_knn_graph(handle_, database_view, knn_graph.view()); - handle_.sync_stream(); - - ASSERT_TRUE(CheckOrder(knn_graph.view(), database_host.view(), ps.metric)); - } - } - } - - void SetUp() override - { - database.resize(((size_t)ps.n_rows) * ps.dim, handle_.get_stream()); - raft::random::RngState r(1234ULL); - if constexpr (std::is_same_v || std::is_same_v) { - GenerateRoundingErrorFreeDataset(handle_, database.data(), ps.n_rows, ps.dim, r, false); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); - } - handle_.sync_stream(); - } - - void TearDown() override - { - handle_.sync_stream(); - database.resize(0, handle_.get_stream()); - } - - private: - raft::device_resources handle_; - AnnCagraInputs ps; - rmm::device_uvector database; -}; - -template -class AnnCagraFilterTest : public ::testing::TestWithParam { - public: - AnnCagraFilterTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - protected: - void testCagraFilter() - { - if (ps.metric == distance::InnerProduct && ps.build_algo == graph_build_algo::NN_DESCENT) - GTEST_SKIP(); - - size_t queries_size = ps.n_queries * ps.k; - std::vector indices_Cagra(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_Cagra(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim; - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database_filtered_ptr, - ps.n_queries, - ps.n_rows - test_cagra_sample_filter::offset, - ps.dim, - ps.k, - ps.metric); - raft::linalg::addScalar(indices_naive_dev.data(), - indices_naive_dev.data(), - IdxT(test_cagra_sample_filter::offset), - queries_size, - stream_); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - rmm::device_uvector distances_dev(queries_size, stream_); - rmm::device_uvector indices_dev(queries_size, stream_); - - { - cagra::index_params index_params; - index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is - // not used for knn_graph building. - index_params.nn_descent_niter = 50; - cagra::search_params search_params; - search_params.algo = ps.algo; - search_params.max_queries = ps.max_queries; - search_params.team_size = ps.team_size; - search_params.hashmap_mode = cagra::hash_mode::HASH; - - // TODO: setting search_params.itopk_size here breaks the filter tests, but is required for - // k>1024 skip these tests until fixed - if (ps.k >= 1024) { GTEST_SKIP(); } - // search_params.itopk_size = ps.itopk_size; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); - - cagra::index index(handle_); - if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, database_view); - } - - if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } - - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.n_queries, ps.dim); - auto indices_out_view = - raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); - auto dists_out_view = raft::make_device_matrix_view( - distances_dev.data(), ps.n_queries, ps.k); - - cagra::search_with_filtering(handle_, - search_params, - index, - search_queries_view, - indices_out_view, - dists_out_view, - test_cagra_sample_filter()); - update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); - update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - // Test filter - bool unacceptable_node = false; - for (int q = 0; q < ps.n_queries; q++) { - for (int i = 0; i < ps.k; i++) { - const auto n = indices_Cagra[q * ps.k + i]; - unacceptable_node = unacceptable_node | !test_cagra_sample_filter()(q, n); - } - } - EXPECT_FALSE(unacceptable_node); - - double min_recall = ps.min_recall; - // TODO(mfoerster): re-enable uniquenes test - EXPECT_TRUE(eval_neighbours(indices_naive, - indices_Cagra, - distances_naive, - distances_Cagra, - ps.n_queries, - ps.k, - 0.003, - min_recall, - false)); - EXPECT_TRUE(eval_distances(handle_, - database.data(), - search_queries.data(), - indices_dev.data(), - distances_dev.data(), - ps.n_rows, - ps.dim, - ps.n_queries, - ps.k, - ps.metric, - 1.0e-4)); - } - } - - void testCagraRemoved() - { - if (ps.metric == distance::InnerProduct && ps.build_algo == graph_build_algo::NN_DESCENT) - GTEST_SKIP(); - - size_t queries_size = ps.n_queries * ps.k; - std::vector indices_Cagra(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_Cagra(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim; - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database_filtered_ptr, - ps.n_queries, - ps.n_rows - test_cagra_sample_filter::offset, - ps.dim, - ps.k, - ps.metric); - raft::linalg::addScalar(indices_naive_dev.data(), - indices_naive_dev.data(), - IdxT(test_cagra_sample_filter::offset), - queries_size, - stream_); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - rmm::device_uvector distances_dev(queries_size, stream_); - rmm::device_uvector indices_dev(queries_size, stream_); - - { - cagra::index_params index_params; - index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is - // not used for knn_graph building. - index_params.nn_descent_niter = 50; - cagra::search_params search_params; - search_params.algo = ps.algo; - search_params.max_queries = ps.max_queries; - search_params.team_size = ps.team_size; - search_params.hashmap_mode = cagra::hash_mode::HASH; - - // TODO: setting search_params.itopk_size here breaks the filter tests, but is required for - // k>1024 skip these tests until fixed - if (ps.k >= 1024) { GTEST_SKIP(); } - // search_params.itopk_size = ps.itopk_size; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); - - cagra::index index(handle_); - if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, database_view); - } - - if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } - - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.n_queries, ps.dim); - auto indices_out_view = - raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); - auto dists_out_view = raft::make_device_matrix_view( - distances_dev.data(), ps.n_queries, ps.k); - auto removed_indices = - raft::make_device_vector(handle_, test_cagra_sample_filter::offset); - thrust::sequence( - resource::get_thrust_policy(handle_), - thrust::device_pointer_cast(removed_indices.data_handle()), - thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0))); - resource::sync_stream(handle_); - raft::core::bitset removed_indices_bitset( - handle_, removed_indices.view(), ps.n_rows); - cagra::search_with_filtering( - handle_, - search_params, - index, - search_queries_view, - indices_out_view, - dists_out_view, - raft::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); - update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); - update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - double min_recall = ps.min_recall; - // TODO(mfoerster): re-enable uniquenes test - EXPECT_TRUE(eval_neighbours(indices_naive, - indices_Cagra, - distances_naive, - distances_Cagra, - ps.n_queries, - ps.k, - 0.003, - min_recall, - false)); - EXPECT_TRUE(eval_distances(handle_, - database.data(), - search_queries.data(), - indices_dev.data(), - distances_dev.data(), - ps.n_rows, - ps.dim, - ps.n_queries, - ps.k, - ps.metric, - 1.0e-4)); - } - } - - void SetUp() override - { - database.resize(((size_t)ps.n_rows) * ps.dim, stream_); - search_queries.resize(ps.n_queries * ps.dim, stream_); - raft::random::RngState r(1234ULL); - InitDataset(handle_, database.data(), ps.n_rows, ps.dim, ps.metric, r); - InitDataset(handle_, search_queries.data(), ps.n_queries, ps.dim, ps.metric, r); - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnCagraInputs ps; - rmm::device_uvector database; - rmm::device_uvector search_queries; -}; - -inline std::vector generate_inputs() -{ - // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries - std::vector inputs = raft::util::itertools::product( - {100}, - {1000}, - {1, 8, 17, 1599}, - {16}, // k - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL}, - {0, 1, 10, 100}, // query size - {0}, - {256}, - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false}, - {true}, - {0.995}); - - auto inputs2 = raft::util::itertools::product( - {100}, - {1000}, - {1, 8, 17, 1599}, - {1}, // k - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL}, - {0, 1, 10, 100}, // query size - {0}, - {256}, - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false}, - {true}, - {99. / 100} - // smaller threshould than the other test cases because it is too strict for Top-1 search - ); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - - inputs2 = raft::util::itertools::product( - {100}, - {1000}, - {1, 3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim - {16}, // k - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, - {64}, - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false}, - {true}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - inputs2 = raft::util::itertools::product( - {100}, - {1000}, - {64}, - {16}, - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0, 4, 8, 16, 32}, // team_size - {64}, - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false}, - {false}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - - inputs2 = raft::util::itertools::product( - {100}, - {1000}, - {64}, - {16}, - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, // team_size - {32, 64, 128, 256, 512, 768}, - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false}, - {true}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - - inputs2 = raft::util::itertools::product( - {100}, - {10000, 20000}, - {32}, - {10}, - {graph_build_algo::IVF_PQ, graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, // team_size - {64}, - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false, true}, - {false}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - - inputs2 = raft::util::itertools::product( - {100}, - {20000}, - {32}, - {2048}, // k - {graph_build_algo::NN_DESCENT}, - {search_algo::AUTO}, - {10}, - {0}, - {4096}, // itopk_size - {1}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false}, - {false}, - {0.995}); - inputs.insert(inputs.end(), inputs2.begin(), inputs2.end()); - - return inputs; -} - -const std::vector inputs = generate_inputs(); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh deleted file mode 100644 index 412e71bff1..0000000000 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include // none_cagra_sample_filter -#include // RAFT_EXPLICIT - -namespace raft::neighbors::cagra::detail { - -namespace multi_cta_search { -#define instantiate_kernel_selection( \ - DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - raft::distance::DistanceType metric, \ - cudaStream_t stream); - -instantiate_kernel_selection(standard_dataset_descriptor_t, - 32, - 1024, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection(standard_dataset_descriptor_t, - 8, - 128, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection(standard_dataset_descriptor_t, - 16, - 256, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection(standard_dataset_descriptor_t, - 32, - 512, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection -} // namespace multi_cta_search - -namespace single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - raft::distance::DistanceType metric, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, - 32, - 1024, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, - 8, - 128, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, - 16, - 256, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, - 32, - 512, - float, - uint64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -} // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail diff --git a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu deleted file mode 100644 index ff7e839abf..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra.cuh" -#include "search_kernel_uint64_t.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestF_I64; -TEST_P(AnnCagraTestF_I64, AnnCagra) { this->testCagra(); } - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_I64, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu deleted file mode 100644 index 7d29ce4f99..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestF_U32; -TEST_P(AnnCagraTestF_U32, AnnCagra) { this->testCagra(); } - -typedef AnnCagraSortTest AnnCagraSortTestF_U32; -TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); } - -typedef AnnCagraFilterTest AnnCagraFilterTestF_U32; -TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) -{ - this->testCagraFilter(); - this->testCagraRemoved(); -} - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestF_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_half_int64_t.cu b/cpp/test/neighbors/ann_cagra/test_half_int64_t.cu deleted file mode 100644 index bcdd95bece..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_half_int64_t.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra.cuh" -#include "search_kernel_uint64_t.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestH_I64; -TEST_P(AnnCagraTestH_I64, AnnCagra) { this->testCagra(); } - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestH_I64, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu deleted file mode 100644 index ec7144f8d0..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_half_uint32_t.cu +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestH_U32; -TEST_P(AnnCagraTestH_U32, AnnCagra) { this->testCagra(); } - -typedef AnnCagraSortTest AnnCagraSortTestH_U32; -TEST_P(AnnCagraSortTestH_U32, AnnCagraSort) { this->testCagraSort(); } - -typedef AnnCagraFilterTest AnnCagraFilterTestH_U32; -TEST_P(AnnCagraFilterTestH_U32, AnnCagraFilter) -{ - this->testCagraFilter(); - this->testCagraRemoved(); -} - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestH_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestH_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestH_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu deleted file mode 100644 index b2242d89b1..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestI8_U32; -TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); } -typedef AnnCagraSortTest AnnCagraSortTestI8_U32; -TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); } -typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32; -TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) -{ - this->testCagraFilter(); - this->testCagraRemoved(); -} - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestI8_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu deleted file mode 100644 index 302b2bec18..0000000000 --- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraTest AnnCagraTestU8_U32; -TEST_P(AnnCagraTestU8_U32, AnnCagra) { this->testCagra(); } - -typedef AnnCagraSortTest AnnCagraSortTestU8_U32; -TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); } - -typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32; -TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) -{ - this->testCagraFilter(); - this->testCagraRemoved(); -} - -INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs)); -INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestU8_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh deleted file mode 100644 index 6b24bca921..0000000000 --- a/cpp/test/neighbors/ann_cagra_vpq.cuh +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "../test_utils.cuh" -#include "ann_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace { -template -void GenerateDataset(T* const dataset_ptr, - T* const query_ptr, - const std::size_t dataset_size, - const std::size_t query_size, - const std::size_t dim, - const std::size_t num_centers, - cudaStream_t cuda_stream) -{ - auto center_list = raft::make_host_matrix(num_centers, dim); - auto host_dataset = raft::make_host_matrix(std::max(dataset_size, query_size), dim); - - std::normal_distribution dist(0, 1); - std::mt19937 mt(0); - for (std::size_t i = 0; i < center_list.size(); i++) { - center_list.data_handle()[i] = dist(mt); - } - - std::uniform_int_distribution i_dist(0, num_centers - 1); - for (std::size_t i = 0; i < dataset_size; i++) { - const auto center_index = i_dist(mt); - for (std::size_t j = 0; j < dim; j++) { - host_dataset.data_handle()[i * dim + j] = - center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; - } - } - raft::copy(dataset_ptr, host_dataset.data_handle(), dataset_size * dim, cuda_stream); - - for (std::size_t i = 0; i < query_size; i++) { - const auto center_index = i_dist(mt); - for (std::size_t j = 0; j < dim; j++) { - host_dataset.data_handle()[i * dim + j] = - center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; - } - } - raft::copy(query_ptr, host_dataset.data_handle(), query_size * dim, cuda_stream); -} -} // namespace - -namespace raft::neighbors::cagra { -struct AnnCagraVpqInputs { - int n_queries; - int n_rows; - int dim; - int k; - int pq_len; - int pq_bits; - graph_build_algo build_algo; - search_algo algo; - int max_queries; - int team_size; - int itopk_size; - int search_width; - raft::distance::DistanceType metric; - bool host_dataset; - bool include_serialized_dataset; - // std::optional - double min_recall; // = std::nullopt; -}; - -inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraVpqInputs& p) -{ - std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; - std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; - os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim - << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", pq_len=" << p.pq_len << ", " - << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size - << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) - << (p.host_dataset ? ", host" : ", device") - << ", build_algo=" << build_algo.at((int)p.build_algo) << '}' << std::endl; - return os; -} - -template -class AnnCagraVpqTest : public ::testing::TestWithParam { - public: - AnnCagraVpqTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - protected: - void testCagra() - { - size_t queries_size = ps.n_queries * ps.k; - std::vector indices_Cagra(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_Cagra(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database.data(), - ps.n_queries, - ps.n_rows, - ps.dim, - ps.k, - ps.metric); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - const auto vpq_k = ps.k * 4; - { - rmm::device_uvector distances_dev(vpq_k * ps.n_queries, stream_); - rmm::device_uvector indices_dev(vpq_k * ps.n_queries, stream_); - - { - if ((ps.dim % ps.pq_len) != 0) { - // TODO: remove this requirement in the algorithm. - GTEST_SKIP() << "(TODO) At the moment dim, (" << ps.dim - << ") must be a multiple of pq_len (" << ps.pq_len << ")"; - } - cagra::index_params index_params; - index_params.compression = vpq_params{.pq_bits = static_cast(ps.pq_bits), - .pq_dim = static_cast(ps.dim / ps.pq_len)}; - index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is - // not used for knn_graph building. - index_params.build_algo = ps.build_algo; - cagra::search_params search_params; - search_params.algo = ps.algo; - search_params.max_queries = ps.max_queries; - search_params.team_size = ps.team_size; - search_params.itopk_size = ps.itopk_size; - - auto database_view = - raft::make_device_matrix_view(database.data(), ps.n_rows, ps.dim); - - { - cagra::index index(handle_); - if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - database_host.data_handle(), ps.n_rows, ps.dim); - index = cagra::build(handle_, index_params, database_host_view); - } else { - index = cagra::build(handle_, index_params, database_view); - }; - cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset); - } - - auto index = cagra::deserialize(handle_, "cagra_index"); - if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } - - // CAGRA-Q sanity check: we've built the right index type - auto* vpq_dataset = - dynamic_cast*>(&index.data()); - EXPECT_NE(vpq_dataset, nullptr) - << "Expected VPQ dataset, because we're testing CAGRA-Q here."; - - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.n_queries, ps.dim); - auto indices_out_view = - raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, vpq_k); - auto dists_out_view = raft::make_device_matrix_view( - distances_dev.data(), ps.n_queries, vpq_k); - - cagra::search( - handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); - - { - auto host_dataset = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy( - host_dataset.data_handle(), (const DataT*)database.data(), ps.n_rows * ps.dim, stream_); - - auto host_queries = raft::make_host_matrix(ps.n_queries, ps.dim); - raft::copy(host_queries.data_handle(), - (const DataT*)search_queries_view.data_handle(), - ps.n_queries * ps.dim, - stream_); - - auto host_index_candidate = raft::make_host_matrix(ps.n_queries, vpq_k); - raft::copy(host_index_candidate.data_handle(), - indices_out_view.data_handle(), - ps.n_queries * vpq_k, - stream_); - - auto host_indices_Cagra_view = - raft::make_host_matrix_view(indices_Cagra.data(), ps.n_queries, ps.k); - - auto host_dists_Cagra_view = - raft::make_host_matrix_view(distances_Cagra.data(), ps.n_queries, ps.k); - - resource::sync_stream(handle_); - - raft::neighbors::refine(handle_, - raft::make_const_mdspan(host_dataset.view()), - raft::make_const_mdspan(host_queries.view()), - raft::make_const_mdspan(host_index_candidate.view()), - host_indices_Cagra_view, - host_dists_Cagra_view, - ps.metric); - - raft::copy(indices_dev.data(), - host_indices_Cagra_view.data_handle(), - ps.k * ps.n_queries, - stream_); - raft::copy(distances_dev.data(), - host_dists_Cagra_view.data_handle(), - ps.k * ps.n_queries, - stream_); - resource::sync_stream(handle_); - } - } - - double min_recall = ps.min_recall; - EXPECT_TRUE(eval_neighbours(indices_naive, - indices_Cagra, - distances_naive, - distances_Cagra, - ps.n_queries, - ps.k, - 0.003, - min_recall)); - EXPECT_TRUE(eval_distances(handle_, - database.data(), - search_queries.data(), - indices_dev.data(), - distances_dev.data(), - ps.n_rows, - ps.dim, - ps.n_queries, - ps.k, - ps.metric, - 1.0e-4)); - } - } - - void SetUp() override - { - database.resize(((size_t)ps.n_rows) * ps.dim, stream_); - search_queries.resize(ps.n_queries * ps.dim, stream_); - GenerateDataset(database.data(), - search_queries.data(), - ps.n_rows, - ps.n_queries, - ps.dim, - static_cast(std::sqrt(ps.n_rows)), - stream_); - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnCagraVpqInputs ps; - rmm::device_uvector database; - rmm::device_uvector search_queries; -}; - -const std::vector vpq_inputs = raft::util::itertools::product( - {100}, // n_queries - {1000, 10000}, // n_rows - {128, 132, 192, 256, 512, 768}, // dim - {8, 12}, // k - {2, 4}, // pq_len - {8}, // pq_bits - {graph_build_algo::NN_DESCENT}, // build_algo - {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, // algo - {0}, // max_queries - {0}, // team_size - {512}, // itopk_size - {1}, // search_width - {raft::distance::DistanceType::L2Expanded}, // metric - {false}, // host_dataset - {true}, // include_serialized_dataset - {0.8} // min_recall -); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu deleted file mode 100644 index f60edb5ed6..0000000000 --- a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY -#include "../ann_cagra_vpq.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraVpqTest AnnCagraVpqTestF_I64; -TEST_P(AnnCagraVpqTestF_I64, AnnCagraVpq) { this->testCagra(); } - -INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_I64, ::testing::ValuesIn(vpq_inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu deleted file mode 100644 index 19d3f32250..0000000000 --- a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_cagra_vpq.cuh" - -#include - -namespace raft::neighbors::cagra { - -typedef AnnCagraVpqTest AnnCagraVpqTestF_U32; -TEST_P(AnnCagraVpqTestF_U32, AnnCagraVpq) { this->testCagra(); } - -INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_U32, ::testing::ValuesIn(vpq_inputs)); - -} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_ivf_flat.cuh b/cpp/test/neighbors/ann_ivf_flat.cuh deleted file mode 100644 index de6af589fa..0000000000 --- a/cpp/test/neighbors/ann_ivf_flat.cuh +++ /dev/null @@ -1,675 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "../test_utils.cuh" -#include "ann_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include - -namespace raft::neighbors::ivf_flat { - -struct test_ivf_sample_filter { - static constexpr unsigned offset = 300; -}; - -template -struct AnnIvfFlatInputs { - IdxT num_queries; - IdxT num_db_vecs; - IdxT dim; - IdxT k; - IdxT nprobe; - IdxT nlist; - raft::distance::DistanceType metric; - bool adaptive_centers; - bool host_dataset; -}; - -template -::std::ostream& operator<<(::std::ostream& os, const AnnIvfFlatInputs& p) -{ - os << "{ " << p.num_queries << ", " << p.num_db_vecs << ", " << p.dim << ", " << p.k << ", " - << p.nprobe << ", " << p.nlist << ", " << static_cast(p.metric) << ", " - << p.adaptive_centers << ", " << p.host_dataset << '}' << std::endl; - return os; -} - -template -class AnnIVFFlatTest : public ::testing::TestWithParam> { - public: - AnnIVFFlatTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam>::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - void testIVFFlat() - { - size_t queries_size = ps.num_queries * ps.k; - std::vector indices_ivfflat(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_ivfflat(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database.data(), - ps.num_queries, - ps.num_db_vecs, - ps.dim, - ps.k, - ps.metric); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - // unless something is really wrong with clustering, this could serve as a lower bound on - // recall - double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); - - rmm::device_uvector distances_ivfflat_dev(queries_size, stream_); - rmm::device_uvector indices_ivfflat_dev(queries_size, stream_); - - { - // legacy interface - raft::spatial::knn::IVFFlatParam ivfParams; - ivfParams.nprobe = ps.nprobe; - ivfParams.nlist = ps.nlist; - raft::spatial::knn::knnIndex index; - - approx_knn_build_index(handle_, - &index, - dynamic_cast(&ivfParams), - ps.metric, - (IdxT)0, - database.data(), - ps.num_db_vecs, - ps.dim); - - resource::sync_stream(handle_); - approx_knn_search(handle_, - distances_ivfflat_dev.data(), - indices_ivfflat_dev.data(), - &index, - ps.k, - search_queries.data(), - ps.num_queries); - - update_host(distances_ivfflat.data(), distances_ivfflat_dev.data(), queries_size, stream_); - update_host(indices_ivfflat.data(), indices_ivfflat_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - ASSERT_TRUE(eval_neighbours(indices_naive, - indices_ivfflat, - distances_naive, - distances_ivfflat, - ps.num_queries, - ps.k, - 0.001, - min_recall)); - { - ivf_flat::index_params index_params; - ivf_flat::search_params search_params; - index_params.n_lists = ps.nlist; - index_params.metric = ps.metric; - index_params.adaptive_centers = ps.adaptive_centers; - search_params.n_probes = ps.nprobe; - - index_params.add_data_on_build = false; - index_params.kmeans_trainset_fraction = 0.5; - index_params.metric_arg = 0; - - ivf_flat::index idx(handle_, index_params, ps.dim); - ivf_flat::index index_2(handle_, index_params, ps.dim); - - if (!ps.host_dataset) { - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.num_db_vecs, ps.dim); - idx = ivf_flat::build(handle_, index_params, database_view); - rmm::device_uvector vector_indices(ps.num_db_vecs, stream_); - thrust::sequence(resource::get_thrust_policy(handle_), - thrust::device_pointer_cast(vector_indices.data()), - thrust::device_pointer_cast(vector_indices.data() + ps.num_db_vecs)); - resource::sync_stream(handle_); - - IdxT half_of_data = ps.num_db_vecs / 2; - - auto half_of_data_view = raft::make_device_matrix_view( - (const DataT*)database.data(), half_of_data, ps.dim); - - const std::optional> no_opt = std::nullopt; - index_2 = ivf_flat::extend(handle_, half_of_data_view, no_opt, idx); - - auto new_half_of_data_view = raft::make_device_matrix_view( - database.data() + half_of_data * ps.dim, IdxT(ps.num_db_vecs) - half_of_data, ps.dim); - - auto new_half_of_data_indices_view = raft::make_device_vector_view( - vector_indices.data() + half_of_data, IdxT(ps.num_db_vecs) - half_of_data); - - ivf_flat::extend(handle_, - new_half_of_data_view, - std::make_optional>( - new_half_of_data_indices_view), - &index_2); - - } else { - auto host_database = raft::make_host_matrix(ps.num_db_vecs, ps.dim); - raft::copy( - host_database.data_handle(), database.data(), ps.num_db_vecs * ps.dim, stream_); - idx = - ivf_flat::build(handle_, index_params, raft::make_const_mdspan(host_database.view())); - - auto vector_indices = raft::make_host_vector(handle_, ps.num_db_vecs); - std::iota(vector_indices.data_handle(), vector_indices.data_handle() + ps.num_db_vecs, 0); - - IdxT half_of_data = ps.num_db_vecs / 2; - - auto half_of_data_view = raft::make_host_matrix_view( - (const DataT*)host_database.data_handle(), half_of_data, ps.dim); - - const std::optional> no_opt = std::nullopt; - index_2 = ivf_flat::extend(handle_, half_of_data_view, no_opt, idx); - - auto new_half_of_data_view = raft::make_host_matrix_view( - host_database.data_handle() + half_of_data * ps.dim, - IdxT(ps.num_db_vecs) - half_of_data, - ps.dim); - auto new_half_of_data_indices_view = raft::make_host_vector_view( - vector_indices.data_handle() + half_of_data, IdxT(ps.num_db_vecs) - half_of_data); - ivf_flat::extend(handle_, - new_half_of_data_view, - std::make_optional>( - new_half_of_data_indices_view), - &index_2); - } - - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.num_queries, ps.dim); - auto indices_out_view = raft::make_device_matrix_view( - indices_ivfflat_dev.data(), ps.num_queries, ps.k); - auto dists_out_view = raft::make_device_matrix_view( - distances_ivfflat_dev.data(), ps.num_queries, ps.k); - ivf_flat::detail::serialize(handle_, "ivf_flat_index", index_2); - - auto index_loaded = ivf_flat::detail::deserialize(handle_, "ivf_flat_index"); - ASSERT_EQ(index_2.size(), index_loaded.size()); - - ivf_flat::search(handle_, - search_params, - index_loaded, - search_queries_view, - indices_out_view, - dists_out_view); - - update_host(distances_ivfflat.data(), distances_ivfflat_dev.data(), queries_size, stream_); - update_host(indices_ivfflat.data(), indices_ivfflat_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - - // Test the centroid invariants - if (index_2.adaptive_centers()) { - // The centers must be up-to-date with the corresponding data - std::vector list_sizes(index_2.n_lists()); - std::vector list_indices(index_2.n_lists()); - rmm::device_uvector centroid(ps.dim, stream_); - raft::copy( - list_sizes.data(), index_2.list_sizes().data_handle(), index_2.n_lists(), stream_); - raft::copy( - list_indices.data(), index_2.inds_ptrs().data_handle(), index_2.n_lists(), stream_); - resource::sync_stream(handle_); - for (uint32_t l = 0; l < index_2.n_lists(); l++) { - if (list_sizes[l] == 0) continue; - rmm::device_uvector cluster_data(list_sizes[l] * ps.dim, stream_); - raft::spatial::knn::detail::utils::copy_selected((IdxT)list_sizes[l], - (IdxT)ps.dim, - database.data(), - list_indices[l], - (IdxT)ps.dim, - cluster_data.data(), - (IdxT)ps.dim, - stream_); - raft::stats::mean( - centroid.data(), cluster_data.data(), ps.dim, list_sizes[l], false, true, stream_); - ASSERT_TRUE(raft::devArrMatch(index_2.centers().data_handle() + ps.dim * l, - centroid.data(), - ps.dim, - raft::CompareApprox(0.001), - stream_)); - } - } else { - // The centers must be immutable - ASSERT_TRUE(raft::devArrMatch(index_2.centers().data_handle(), - idx.centers().data_handle(), - index_2.centers().size(), - raft::Compare(), - stream_)); - } - } - ASSERT_TRUE(eval_neighbours(indices_naive, - indices_ivfflat, - distances_naive, - distances_ivfflat, - ps.num_queries, - ps.k, - 0.001, - min_recall)); - } - } - - void testPacker() - { - ivf_flat::index_params index_params; - ivf_flat::search_params search_params; - index_params.n_lists = ps.nlist; - index_params.metric = ps.metric; - index_params.adaptive_centers = false; - search_params.n_probes = ps.nprobe; - - index_params.add_data_on_build = false; - index_params.kmeans_trainset_fraction = 1.0; - index_params.metric_arg = 0; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.num_db_vecs, ps.dim); - - auto idx = ivf_flat::build(handle_, index_params, database_view); - - const std::optional> no_opt = std::nullopt; - index extend_index = ivf_flat::extend(handle_, database_view, no_opt, idx); - - auto list_sizes = raft::make_host_vector(idx.n_lists()); - update_host(list_sizes.data_handle(), - extend_index.list_sizes().data_handle(), - extend_index.n_lists(), - stream_); - resource::sync_stream(handle_); - - auto& lists = idx.lists(); - - // conservative memory allocation for codepacking - auto list_device_spec = list_spec{idx.dim(), false}; - - for (uint32_t label = 0; label < idx.n_lists(); label++) { - uint32_t list_size = list_sizes.data_handle()[label]; - - ivf::resize_list(handle_, lists[label], list_device_spec, list_size, 0); - } - - helpers::recompute_internal_state(handle_, &idx); - - using interleaved_group = Pow2; - - for (uint32_t label = 0; label < idx.n_lists(); label++) { - uint32_t list_size = list_sizes.data_handle()[label]; - - if (list_size > 0) { - uint32_t padded_list_size = interleaved_group::roundUp(list_size); - uint32_t n_elems = padded_list_size * idx.dim(); - auto list_data = lists[label]->data; - auto list_inds = extend_index.lists()[label]->indices; - - // fetch the flat codes - auto flat_codes = make_device_matrix(handle_, list_size, idx.dim()); - - matrix::gather( - handle_, - make_device_matrix_view( - (const DataT*)database.data(), static_cast(ps.num_db_vecs), idx.dim()), - make_device_vector_view((const IdxT*)list_inds.data_handle(), - list_size), - flat_codes.view()); - - helpers::codepacker::pack( - handle_, make_const_mdspan(flat_codes.view()), idx.veclen(), 0, list_data.view()); - - { - auto mask = make_device_vector(handle_, n_elems); - - linalg::map_offset(handle_, - mask.view(), - [dim = idx.dim(), - list_size, - padded_list_size, - chunk_size = util::FastIntDiv(idx.veclen())] __device__(auto i) { - uint32_t max_group_offset = interleaved_group::roundDown(list_size); - if (i < max_group_offset * dim) { return true; } - uint32_t surplus = (i - max_group_offset * dim); - uint32_t ingroup_id = interleaved_group::mod(surplus / chunk_size); - return ingroup_id < (list_size - max_group_offset); - }); - - // ensure that the correct number of indices are masked out - ASSERT_TRUE(thrust::reduce(resource::get_thrust_policy(handle_), - mask.data_handle(), - mask.data_handle() + n_elems, - 0) == list_size * ps.dim); - - auto packed_list_data = make_device_vector(handle_, n_elems); - - linalg::map_offset(handle_, - packed_list_data.view(), - [mask = mask.data_handle(), - list_data = list_data.data_handle()] __device__(uint32_t i) { - if (mask[i]) return list_data[i]; - return DataT{0}; - }); - - auto extend_data = extend_index.lists()[label]->data; - auto extend_data_filtered = make_device_vector(handle_, n_elems); - linalg::map_offset(handle_, - extend_data_filtered.view(), - [mask = mask.data_handle(), - extend_data = extend_data.data_handle()] __device__(uint32_t i) { - if (mask[i]) return extend_data[i]; - return DataT{0}; - }); - - ASSERT_TRUE(raft::devArrMatch(packed_list_data.data_handle(), - extend_data_filtered.data_handle(), - n_elems, - raft::Compare(), - stream_)); - } - - auto unpacked_flat_codes = - make_device_matrix(handle_, list_size, idx.dim()); - - helpers::codepacker::unpack( - handle_, list_data.view(), idx.veclen(), 0, unpacked_flat_codes.view()); - - ASSERT_TRUE(raft::devArrMatch(flat_codes.data_handle(), - unpacked_flat_codes.data_handle(), - list_size * ps.dim, - raft::Compare(), - stream_)); - } - } - } - - void testFilter() - { - size_t queries_size = ps.num_queries * ps.k; - std::vector indices_ivfflat(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_ivfflat(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - auto* database_filtered_ptr = database.data() + test_ivf_sample_filter::offset * ps.dim; - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database_filtered_ptr, - ps.num_queries, - ps.num_db_vecs - test_ivf_sample_filter::offset, - ps.dim, - ps.k, - ps.metric); - raft::linalg::addScalar(indices_naive_dev.data(), - indices_naive_dev.data(), - IdxT(test_ivf_sample_filter::offset), - queries_size, - stream_); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - // unless something is really wrong with clustering, this could serve as a lower bound on - // recall - double min_recall = static_cast(ps.nprobe) / static_cast(ps.nlist); - - auto distances_ivfflat_dev = raft::make_device_matrix(handle_, ps.num_queries, ps.k); - auto indices_ivfflat_dev = - raft::make_device_matrix(handle_, ps.num_queries, ps.k); - - { - ivf_flat::index_params index_params; - ivf_flat::search_params search_params; - index_params.n_lists = ps.nlist; - index_params.metric = ps.metric; - index_params.adaptive_centers = ps.adaptive_centers; - search_params.n_probes = ps.nprobe; - - index_params.add_data_on_build = true; - index_params.kmeans_trainset_fraction = 0.5; - index_params.metric_arg = 0; - - // Create IVF Flat index - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.num_db_vecs, ps.dim); - auto index = ivf_flat::build(handle_, index_params, database_view); - - // Create Bitset filter - auto removed_indices = - raft::make_device_vector(handle_, test_ivf_sample_filter::offset); - thrust::sequence(resource::get_thrust_policy(handle_), - thrust::device_pointer_cast(removed_indices.data_handle()), - thrust::device_pointer_cast(removed_indices.data_handle() + - test_ivf_sample_filter::offset)); - resource::sync_stream(handle_); - - raft::core::bitset removed_indices_bitset( - handle_, removed_indices.view(), ps.num_db_vecs); - - // Search with the filter - auto search_queries_view = raft::make_device_matrix_view( - search_queries.data(), ps.num_queries, ps.dim); - ivf_flat::search_with_filtering( - handle_, - search_params, - index, - search_queries_view, - indices_ivfflat_dev.view(), - distances_ivfflat_dev.view(), - raft::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); - - update_host( - distances_ivfflat.data(), distances_ivfflat_dev.data_handle(), queries_size, stream_); - update_host( - indices_ivfflat.data(), indices_ivfflat_dev.data_handle(), queries_size, stream_); - resource::sync_stream(handle_); - } - ASSERT_TRUE(eval_neighbours(indices_naive, - indices_ivfflat, - distances_naive, - distances_ivfflat, - ps.num_queries, - ps.k, - 0.001, - min_recall)); - } - } - - void SetUp() override - { - database.resize(ps.num_db_vecs * ps.dim, stream_); - search_queries.resize(ps.num_queries * ps.dim, stream_); - - raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { - raft::random::uniform( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); - raft::random::uniform( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20)); - raft::random::uniformInt( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20)); - } - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnIvfFlatInputs ps; - rmm::device_uvector database; - rmm::device_uvector search_queries; -}; - -const std::vector> inputs = { - // test various dims (aligned and not aligned to vector sizes) - {1000, 10000, 1, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 2, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 3, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 4, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 5, 16, 40, 1024, raft::distance::DistanceType::InnerProduct, false}, - {1000, 10000, 8, 16, 40, 1024, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 5, 16, 40, 1024, raft::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 10000, 8, 16, 40, 1024, raft::distance::DistanceType::L2SqrtExpanded, true}, - - // test dims that do not fit into kernel shared memory limits - {1000, 10000, 2048, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 2049, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 2050, 16, 40, 1024, raft::distance::DistanceType::InnerProduct, false}, - {1000, 10000, 2051, 16, 40, 1024, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 2052, 16, 40, 1024, raft::distance::DistanceType::InnerProduct, false}, - {1000, 10000, 2053, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, true}, - {1000, 10000, 2056, 16, 40, 1024, raft::distance::DistanceType::L2Expanded, true}, - - // various random combinations - {1000, 10000, 16, 10, 40, 1024, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 16, 10, 50, 1024, raft::distance::DistanceType::L2Expanded, false}, - {1000, 10000, 16, 10, 70, 1024, raft::distance::DistanceType::L2Expanded, false}, - {100, 10000, 16, 10, 20, 512, raft::distance::DistanceType::L2Expanded, false}, - {20, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, true}, - {1000, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, true}, - {10000, 131072, 8, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false}, - - // various combinations with k>raft::matrix::detail::select::warpsort::kMaxCapacity - {1000, 10000, 16, 1024, 40, 1024, raft::distance::DistanceType::L2SqrtExpanded, true}, - {1000, 10000, 2053, 512, 50, 1024, raft::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 10000, 2049, 2048, 70, 1024, raft::distance::DistanceType::L2SqrtExpanded, false}, - {1000, 10000, 16, 4000, 100, 2048, raft::distance::DistanceType::L2SqrtExpanded, false}, - {10, 10000, 16, 4000, 100, 2048, raft::distance::DistanceType::L2SqrtExpanded, false}, - {10, 10000, 16, 4000, 120, 2048, raft::distance::DistanceType::L2SqrtExpanded, true}, - {20, 100000, 16, 257, 20, 1024, raft::distance::DistanceType::L2SqrtExpanded, true}, - {1000, 100000, 16, 259, 20, 1024, raft::distance::DistanceType::L2Expanded, true, true}, - {10000, 131072, 8, 280, 20, 1024, raft::distance::DistanceType::InnerProduct, false}, - {100000, 1024, 32, 257, 64, 64, raft::distance::DistanceType::L2Expanded, false}, - {100000, 1024, 32, 257, 64, 64, raft::distance::DistanceType::L2SqrtExpanded, false}, - {100000, 1024, 32, 257, 64, 64, raft::distance::DistanceType::InnerProduct, false}, - {100000, 1024, 16, 300, 20, 60, raft::distance::DistanceType::L2Expanded, false}, - {100000, 1024, 16, 500, 20, 60, raft::distance::DistanceType::L2SqrtExpanded, false}, - {100000, 1024, 16, 700, 20, 60, raft::distance::DistanceType::InnerProduct, false}, - - // host input data - {1000, 10000, 16, 10, 40, 1024, raft::distance::DistanceType::L2Expanded, false, true}, - {1000, 10000, 16, 10, 50, 1024, raft::distance::DistanceType::L2Expanded, false, true}, - {1000, 10000, 16, 10, 70, 1024, raft::distance::DistanceType::L2Expanded, false, true}, - {100, 10000, 16, 10, 20, 512, raft::distance::DistanceType::L2Expanded, false, true}, - {20, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, true}, - {1000, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, true}, - {10000, 131072, 8, 10, 20, 1024, raft::distance::DistanceType::L2Expanded, false, true}, - - {1000, 10000, 16, 10, 40, 1024, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 16, 10, 50, 1024, raft::distance::DistanceType::InnerProduct, true}, - {1000, 10000, 16, 10, 70, 1024, raft::distance::DistanceType::InnerProduct, false}, - {100, 10000, 16, 10, 20, 512, raft::distance::DistanceType::InnerProduct, true}, - {20, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::InnerProduct, true}, - {1000, 100000, 16, 10, 20, 1024, raft::distance::DistanceType::InnerProduct, false}, - {10000, 131072, 8, 10, 50, 1024, raft::distance::DistanceType::InnerProduct, true}, - - {1000, 10000, 4096, 20, 50, 1024, raft::distance::DistanceType::InnerProduct, false}, - - // test splitting the big query batches (> max gridDim.y) into smaller batches - {100000, 1024, 32, 10, 64, 64, raft::distance::DistanceType::InnerProduct, false}, - {1000000, 1024, 32, 10, 256, 256, raft::distance::DistanceType::InnerProduct, false}, - {98306, 1024, 32, 10, 64, 64, raft::distance::DistanceType::InnerProduct, true}, - - // test radix_sort for getting the cluster selection - {1000, - 10000, - 16, - 10, - raft::matrix::detail::select::warpsort::kMaxCapacity * 2, - raft::matrix::detail::select::warpsort::kMaxCapacity * 4, - raft::distance::DistanceType::L2Expanded, - false}, - {1000, - 10000, - 16, - 10, - raft::matrix::detail::select::warpsort::kMaxCapacity * 4, - raft::matrix::detail::select::warpsort::kMaxCapacity * 4, - raft::distance::DistanceType::InnerProduct, - false}, - - // The following two test cases should show very similar recall. - // num_queries, num_db_vecs, dim, k, nprobe, nlist, metric, adaptive_centers - {20000, 8712, 3, 10, 51, 66, raft::distance::DistanceType::L2Expanded, false}, - {100000, 8712, 3, 10, 51, 66, raft::distance::DistanceType::L2Expanded, false}}; - -} // namespace raft::neighbors::ivf_flat diff --git a/cpp/test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu deleted file mode 100644 index 0e1036e566..0000000000 --- a/cpp/test/neighbors/ann_ivf_flat/test_filter_float_int64_t.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Enable instantiation of search with filter -#include "../ann_ivf_flat.cuh" - -namespace raft::neighbors::ivf_flat { - -typedef AnnIVFFlatTest AnnIVFFlatFilterTestF; -TEST_P(AnnIVFFlatFilterTestF, AnnIVFFlatFilter) { this->testFilter(); } - -INSTANTIATE_TEST_CASE_P(AnnIVFFlatTest, AnnIVFFlatFilterTestF, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::ivf_flat diff --git a/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu deleted file mode 100644 index 2ff17b8536..0000000000 --- a/cpp/test/neighbors/ann_ivf_flat/test_float_int64_t.cu +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_flat.cuh" - -#include - -namespace raft::neighbors::ivf_flat { - -typedef AnnIVFFlatTest AnnIVFFlatTestF; -TEST_P(AnnIVFFlatTestF, AnnIVFFlat) -{ - this->testIVFFlat(); - this->testPacker(); -} - -INSTANTIATE_TEST_CASE_P(AnnIVFFlatTest, AnnIVFFlatTestF, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::ivf_flat diff --git a/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu deleted file mode 100644 index 6fe12506aa..0000000000 --- a/cpp/test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_flat.cuh" - -#include - -namespace raft::neighbors::ivf_flat { - -typedef AnnIVFFlatTest AnnIVFFlatTestF_int8; -TEST_P(AnnIVFFlatTestF_int8, AnnIVFFlat) { this->testIVFFlat(); } - -INSTANTIATE_TEST_CASE_P(AnnIVFFlatTest, AnnIVFFlatTestF_int8, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::ivf_flat diff --git a/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu deleted file mode 100644 index ab6001c71b..0000000000 --- a/cpp/test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_flat.cuh" - -#include - -namespace raft::neighbors::ivf_flat { - -typedef AnnIVFFlatTest AnnIVFFlatTestF_uint8; -TEST_P(AnnIVFFlatTestF_uint8, AnnIVFFlat) { this->testIVFFlat(); } - -INSTANTIATE_TEST_CASE_P(AnnIVFFlatTest, AnnIVFFlatTestF_uint8, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::ivf_flat diff --git a/cpp/test/neighbors/ann_ivf_pq.cuh b/cpp/test/neighbors/ann_ivf_pq.cuh deleted file mode 100644 index 4ebe02027f..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq.cuh +++ /dev/null @@ -1,1095 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "../test_utils.cuh" -#include "ann_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace raft::neighbors::ivf_pq { - -struct test_ivf_sample_filter { - static constexpr unsigned offset = 1500; -}; - -struct ivf_pq_inputs { - uint32_t num_db_vecs = 4096; - uint32_t num_queries = 1024; - uint32_t dim = 64; - uint32_t k = 32; - std::optional min_recall = std::nullopt; - - ivf_pq::index_params index_params; - ivf_pq::search_params search_params; - - // Set some default parameters for tests - ivf_pq_inputs() - { - index_params.n_lists = max(32u, min(1024u, num_db_vecs / 128u)); - index_params.kmeans_trainset_fraction = 1.0; - } -}; - -inline auto operator<<(std::ostream& os, const ivf_pq::codebook_gen& p) -> std::ostream& -{ - switch (p) { - case ivf_pq::codebook_gen::PER_CLUSTER: os << "codebook_gen::PER_CLUSTER"; break; - case ivf_pq::codebook_gen::PER_SUBSPACE: os << "codebook_gen::PER_SUBSPACE"; break; - default: RAFT_FAIL("unreachable code"); - } - return os; -} - -inline auto operator<<(std::ostream& os, const ivf_pq_inputs& p) -> std::ostream& -{ - ivf_pq_inputs dflt; - bool need_comma = false; -#define PRINT_DIFF_V(spec, val) \ - do { \ - if (dflt spec != p spec) { \ - if (need_comma) { os << ", "; } \ - os << #spec << " = " << val; \ - need_comma = true; \ - } \ - } while (0) -#define PRINT_DIFF(spec) PRINT_DIFF_V(spec, p spec) - - os << "ivf_pq_inputs {"; - PRINT_DIFF(.num_db_vecs); - PRINT_DIFF(.num_queries); - PRINT_DIFF(.dim); - PRINT_DIFF(.k); - PRINT_DIFF_V(.min_recall, p.min_recall.value_or(0)); - PRINT_DIFF_V(.index_params.metric, print_metric{p.index_params.metric}); - PRINT_DIFF(.index_params.metric_arg); - PRINT_DIFF(.index_params.add_data_on_build); - PRINT_DIFF(.index_params.n_lists); - PRINT_DIFF(.index_params.kmeans_n_iters); - PRINT_DIFF(.index_params.kmeans_trainset_fraction); - PRINT_DIFF(.index_params.pq_bits); - PRINT_DIFF(.index_params.pq_dim); - PRINT_DIFF(.index_params.codebook_kind); - PRINT_DIFF(.index_params.force_random_rotation); - PRINT_DIFF(.search_params.n_probes); - PRINT_DIFF_V(.search_params.lut_dtype, print_dtype{p.search_params.lut_dtype}); - PRINT_DIFF_V(.search_params.internal_distance_dtype, - print_dtype{p.search_params.internal_distance_dtype}); - os << "}"; - return os; -} - -template -void compare_vectors_l2( - const raft::resources& res, T a, T b, uint32_t label, double compression_ratio, double eps) -{ - auto n_rows = a.extent(0); - auto dim = a.extent(1); - rmm::mr::managed_memory_resource managed_memory; - auto dist = make_device_mdarray(res, &managed_memory, make_extents(n_rows)); - linalg::map_offset(res, dist.view(), [a, b, dim] __device__(uint32_t i) { - spatial::knn::detail::utils::mapping f{}; - double d = 0.0f; - for (uint32_t j = 0; j < dim; j++) { - double t = f(a(i, j)) - f(b(i, j)); - d += t * t; - } - return sqrt(d / double(dim)); - }); - resource::sync_stream(res); - for (uint32_t i = 0; i < n_rows; i++) { - double d = dist(i); - // The theoretical estimate of the error is hard to come up with, - // the estimate below is based on experimentation + curse of dimensionality - ASSERT_LE(d, 1.2 * eps * std::pow(2.0, compression_ratio)) - << " (label = " << label << ", ix = " << i << ", eps = " << eps << ")"; - } -} - -template -auto min_output_size(const raft::resources& handle, - const ivf_pq::index& index, - uint32_t n_probes) -> IdxT -{ - auto acc_sizes = index.accum_sorted_sizes(); - uint32_t last_nonzero = index.n_lists(); - while (last_nonzero > 0 && acc_sizes(last_nonzero - 1) == acc_sizes(last_nonzero)) { - last_nonzero--; - } - return acc_sizes(last_nonzero) - acc_sizes(last_nonzero - std::min(last_nonzero, n_probes)); -} - -template -class ivf_pq_test : public ::testing::TestWithParam { - public: - ivf_pq_test() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - void gen_data() - { - database.resize(size_t{ps.num_db_vecs} * size_t{ps.dim}, stream_); - search_queries.resize(size_t{ps.num_queries} * size_t{ps.dim}, stream_); - - raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { - raft::random::uniform( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); - raft::random::uniform( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20)); - raft::random::uniformInt( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20)); - } - resource::sync_stream(handle_); - } - - void calc_ref() - { - size_t queries_size = size_t{ps.num_queries} * size_t{ps.k}; - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database.data(), - ps.num_queries, - ps.num_db_vecs, - ps.dim, - ps.k, - ps.index_params.metric); - distances_ref.resize(queries_size); - update_host(distances_ref.data(), distances_naive_dev.data(), queries_size, stream_); - indices_ref.resize(queries_size); - update_host(indices_ref.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - auto build_only() - { - auto ipams = ps.index_params; - ipams.add_data_on_build = true; - - auto index_view = - raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); - return ivf_pq::build(handle_, ipams, index_view); - } - - auto build_2_extends() - { - auto db_indices = make_device_vector(handle_, ps.num_db_vecs); - linalg::map_offset(handle_, db_indices.view(), identity_op{}); - resource::sync_stream(handle_); - auto size_1 = IdxT(ps.num_db_vecs) / 2; - auto size_2 = IdxT(ps.num_db_vecs) - size_1; - auto vecs_1 = database.data(); - auto vecs_2 = database.data() + size_t(size_1) * size_t(ps.dim); - auto inds_1 = db_indices.data_handle(); - auto inds_2 = db_indices.data_handle() + size_t(size_1); - - auto ipams = ps.index_params; - ipams.add_data_on_build = false; - - auto database_view = - raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); - auto idx = ivf_pq::build(handle_, ipams, database_view); - - auto vecs_2_view = raft::make_device_matrix_view(vecs_2, size_2, ps.dim); - auto inds_2_view = raft::make_device_vector_view(inds_2, size_2); - ivf_pq::extend(handle_, vecs_2_view, inds_2_view, &idx); - - auto vecs_1_view = - raft::make_device_matrix_view(vecs_1, size_1, ps.dim); - auto inds_1_view = raft::make_device_vector_view(inds_1, size_1); - ivf_pq::extend(handle_, vecs_1_view, inds_1_view, &idx); - return idx; - } - - auto build_serialize() - { - ivf_pq::serialize(handle_, "ivf_pq_index", build_only()); - return ivf_pq::deserialize(handle_, "ivf_pq_index"); - } - - void check_reconstruction(const index& index, - double compression_ratio, - uint32_t label, - uint32_t n_take, - uint32_t n_skip) - { - auto& rec_list = index.lists()[label]; - auto dim = index.dim(); - n_take = std::min(n_take, rec_list->size.load()); - n_skip = std::min(n_skip, rec_list->size.load() - n_take); - - if (n_take == 0) { return; } - - auto rec_data = make_device_matrix(handle_, n_take, dim); - auto orig_data = make_device_matrix(handle_, n_take, dim); - - ivf_pq::helpers::reconstruct_list_data(handle_, index, rec_data.view(), label, n_skip); - - matrix::gather(database.data(), - IdxT{dim}, - IdxT{n_take}, - rec_list->indices.data_handle() + n_skip, - IdxT{n_take}, - orig_data.data_handle(), - stream_); - - compare_vectors_l2(handle_, rec_data.view(), orig_data.view(), label, compression_ratio, 0.06); - } - - void check_reconstruct_extend(index* index, double compression_ratio, uint32_t label) - { - // NB: this is not reference, the list is retained; the index will have to create a new list on - // `erase_list` op. - auto old_list = index->lists()[label]; - auto n_rows = old_list->size.load(); - if (n_rows == 0) { return; } - - auto vectors_1 = make_device_matrix(handle_, n_rows, index->dim()); - auto indices = make_device_vector(handle_, n_rows); - copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_); - - ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_1.view(), label, 0); - ivf_pq::helpers::erase_list(handle_, index, label); - // NB: passing the type parameter because const->non-const implicit conversion of the mdspans - // breaks type inference - ivf_pq::helpers::extend_list( - handle_, index, vectors_1.view(), indices.view(), label); - - auto& new_list = index->lists()[label]; - ASSERT_NE(old_list.get(), new_list.get()) - << "The old list should have been shared and retained after ivf_pq index has erased the " - "corresponding cluster."; - - auto vectors_2 = make_device_matrix(handle_, n_rows, index->dim()); - ivf_pq::helpers::reconstruct_list_data(handle_, *index, vectors_2.view(), label, 0); - // The code search is unstable, and there's high chance of repeating values of the lvl-2 codes. - // Hence, encoding-decoding chain often leads to altering both the PQ codes and the - // reconstructed data. - compare_vectors_l2( - handle_, vectors_1.view(), vectors_2.view(), label, compression_ratio, 0.04); // 0.025); - } - - void check_packing(index* index, uint32_t label) - { - auto old_list = index->lists()[label]; - auto n_rows = old_list->size.load(); - - if (n_rows == 0) { return; } - - auto codes = make_device_matrix(handle_, n_rows, index->pq_dim()); - auto indices = make_device_vector(handle_, n_rows); - copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_); - - ivf_pq::helpers::unpack_list_data(handle_, *index, codes.view(), label, 0); - ivf_pq::helpers::erase_list(handle_, index, label); - ivf_pq::helpers::extend_list_with_codes( - handle_, index, codes.view(), indices.view(), label); - - auto& new_list = index->lists()[label]; - ASSERT_NE(old_list.get(), new_list.get()) - << "The old list should have been shared and retained after ivf_pq index has erased the " - "corresponding cluster."; - auto list_data_size = (n_rows / ivf_pq::kIndexGroupSize) * new_list->data.extent(1) * - new_list->data.extent(2) * new_list->data.extent(3); - - ASSERT_TRUE(old_list->data.size() >= list_data_size); - ASSERT_TRUE(new_list->data.size() >= list_data_size); - ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), - new_list->data.data_handle(), - list_data_size, - Compare{})); - - // Pack a few vectors back to the list. - int row_offset = 9; - int n_vec = 3; - ASSERT_TRUE(row_offset + n_vec < n_rows); - size_t offset = row_offset * index->pq_dim(); - auto codes_to_pack = make_device_matrix_view( - codes.data_handle() + offset, n_vec, index->pq_dim()); - ivf_pq::helpers::pack_list_data(handle_, index, codes_to_pack, label, row_offset); - ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), - new_list->data.data_handle(), - list_data_size, - Compare{})); - - // Another test with the API that take list_data directly - auto list_data = index->lists()[label]->data.view(); - uint32_t n_take = 4; - ASSERT_TRUE(row_offset + n_take < n_rows); - auto codes2 = raft::make_device_matrix(handle_, n_take, index->pq_dim()); - ivf_pq::helpers::codepacker::unpack( - handle_, list_data, index->pq_bits(), row_offset, codes2.view()); - - // Write it back - ivf_pq::helpers::codepacker::pack( - handle_, make_const_mdspan(codes2.view()), index->pq_bits(), row_offset, list_data); - ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), - new_list->data.data_handle(), - list_data_size, - Compare{})); - } - void check_packing_contiguous(index* index, uint32_t label) - { - auto old_list = index->lists()[label]; - auto n_rows = old_list->size.load(); - - if (n_rows == 0) { return; } - - auto codes = make_device_matrix(handle_, n_rows, index->pq_dim()); - auto indices = make_device_vector(handle_, n_rows); - copy(indices.data_handle(), old_list->indices.data_handle(), n_rows, stream_); - - uint32_t code_size = ceildiv(index->pq_dim() * index->pq_bits(), 8); - - auto codes_compressed = make_device_matrix(handle_, n_rows, code_size); - - ivf_pq::helpers::unpack_contiguous_list_data( - handle_, *index, codes_compressed.data_handle(), n_rows, label, 0); - ivf_pq::helpers::erase_list(handle_, index, label); - ivf_pq::detail::extend_list_prepare(handle_, index, make_const_mdspan(indices.view()), label); - ivf_pq::helpers::pack_contiguous_list_data( - handle_, index, codes_compressed.data_handle(), n_rows, label, 0); - ivf_pq::helpers::recompute_internal_state(handle_, index); - - auto& new_list = index->lists()[label]; - ASSERT_NE(old_list.get(), new_list.get()) - << "The old list should have been shared and retained after ivf_pq index has erased the " - "corresponding cluster."; - auto list_data_size = (n_rows / ivf_pq::kIndexGroupSize) * new_list->data.extent(1) * - new_list->data.extent(2) * new_list->data.extent(3); - - ASSERT_TRUE(old_list->data.size() >= list_data_size); - ASSERT_TRUE(new_list->data.size() >= list_data_size); - ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), - new_list->data.data_handle(), - list_data_size, - Compare{})); - - // Pack a few vectors back to the list. - uint32_t row_offset = 9; - uint32_t n_vec = 3; - ASSERT_TRUE(row_offset + n_vec < n_rows); - size_t offset = row_offset * code_size; - auto codes_to_pack = make_device_matrix_view( - codes_compressed.data_handle() + offset, n_vec, index->pq_dim()); - ivf_pq::helpers::pack_contiguous_list_data( - handle_, index, codes_to_pack.data_handle(), n_vec, label, row_offset); - ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), - new_list->data.data_handle(), - list_data_size, - Compare{})); - - // // Another test with the API that take list_data directly - auto list_data = index->lists()[label]->data.view(); - uint32_t n_take = 4; - ASSERT_TRUE(row_offset + n_take < n_rows); - auto codes2 = raft::make_device_matrix(handle_, n_take, code_size); - ivf_pq::helpers::codepacker::unpack_contiguous(handle_, - list_data, - index->pq_bits(), - row_offset, - n_take, - index->pq_dim(), - codes2.data_handle()); - - // Write it back - ivf_pq::helpers::codepacker::pack_contiguous(handle_, - codes2.data_handle(), - n_vec, - index->pq_dim(), - index->pq_bits(), - row_offset, - list_data); - ASSERT_TRUE(devArrMatch(old_list->data.data_handle(), - new_list->data.data_handle(), - list_data_size, - Compare{})); - } - - template - void run(BuildIndex build_index) - { - index index = build_index(); - - double compression_ratio = - static_cast(ps.dim * 8) / static_cast(index.pq_dim() * index.pq_bits()); - - for (uint32_t label = 0; label < index.n_lists(); label++) { - switch (label % 3) { - case 0: { - // Reconstruct and re-write vectors for one label - check_reconstruct_extend(&index, compression_ratio, label); - } break; - case 1: { - // Dump and re-write codes for one label - check_packing(&index, label); - check_packing_contiguous(&index, label); - } break; - default: { - // check a small subset of data in a randomly chosen cluster to see if the data - // reconstruction works well. - check_reconstruction(index, compression_ratio, label, 100, 7); - } - } - } - - size_t queries_size = ps.num_queries * ps.k; - std::vector indices_ivf_pq(queries_size); - std::vector distances_ivf_pq(queries_size); - - rmm::device_uvector distances_ivf_pq_dev(queries_size, stream_); - rmm::device_uvector indices_ivf_pq_dev(queries_size, stream_); - - auto query_view = - raft::make_device_matrix_view(search_queries.data(), ps.num_queries, ps.dim); - auto inds_view = raft::make_device_matrix_view( - indices_ivf_pq_dev.data(), ps.num_queries, ps.k); - auto dists_view = raft::make_device_matrix_view( - distances_ivf_pq_dev.data(), ps.num_queries, ps.k); - - ivf_pq::search( - handle_, ps.search_params, index, query_view, inds_view, dists_view); - - update_host(distances_ivf_pq.data(), distances_ivf_pq_dev.data(), queries_size, stream_); - update_host(indices_ivf_pq.data(), indices_ivf_pq_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - - // A very conservative lower bound on recall - double min_recall = - static_cast(ps.search_params.n_probes) / static_cast(ps.index_params.n_lists); - // Using a heuristic to lower the required recall due to code-packing errors - min_recall = - std::min(std::erfc(0.05 * compression_ratio / std::max(min_recall, 0.5)), min_recall); - // Use explicit per-test min recall value if provided. - min_recall = ps.min_recall.value_or(min_recall); - - ASSERT_TRUE(eval_neighbours(indices_ref, - indices_ivf_pq, - distances_ref, - distances_ivf_pq, - ps.num_queries, - ps.k, - 0.0001 * compression_ratio, - min_recall)) - << ps; - - // Test a few extra invariants - IdxT min_results = min_output_size(handle_, index, ps.search_params.n_probes); - IdxT max_oob = ps.k <= min_results ? 0 : ps.k - min_results; - IdxT found_oob = 0; - for (uint32_t query_ix = 0; query_ix < ps.num_queries; query_ix++) { - for (uint32_t k = 0; k < ps.k; k++) { - auto flat_i = query_ix * ps.k + k; - auto found_ix = indices_ivf_pq[flat_i]; - if (found_ix == ivf_pq::kOutOfBoundsRecord) { - found_oob++; - continue; - } - ASSERT_NE(found_ix, ivf::kInvalidRecord) - << "got an invalid record at query_ix = " << query_ix << ", k = " << k - << " (distance = " << distances_ivf_pq[flat_i] << ")"; - ASSERT_LT(found_ix, ps.num_db_vecs) - << "got an impossible index = " << found_ix << " at query_ix = " << query_ix - << ", k = " << k << " (distance = " << distances_ivf_pq[flat_i] << ")"; - } - } - ASSERT_LE(found_oob, max_oob) - << "got too many records out-of-bounds (see ivf_pq::kOutOfBoundsRecord)."; - if (found_oob > 0) { - RAFT_LOG_WARN( - "Got %zu results out-of-bounds because of large top-k (%zu) and small n_probes (%u) and " - "small DB size/n_lists ratio (%zu / %u)", - size_t(found_oob), - size_t(ps.k), - ps.search_params.n_probes, - size_t(ps.num_db_vecs), - ps.index_params.n_lists); - } - } - - void SetUp() override // NOLINT - { - gen_data(); - calc_ref(); - } - - void TearDown() override // NOLINT - { - cudaGetLastError(); - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - ivf_pq_inputs ps; // NOLINT - rmm::device_uvector database; // NOLINT - rmm::device_uvector search_queries; // NOLINT - std::vector indices_ref; // NOLINT - std::vector distances_ref; // NOLINT -}; - -template -class ivf_pq_filter_test : public ::testing::TestWithParam { - public: - ivf_pq_filter_test() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_), - search_queries(0, stream_) - { - } - - void gen_data() - { - database.resize(size_t{ps.num_db_vecs} * size_t{ps.dim}, stream_); - search_queries.resize(size_t{ps.num_queries} * size_t{ps.dim}, stream_); - - raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { - raft::random::uniform( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(0.1), DataT(2.0)); - raft::random::uniform( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(0.1), DataT(2.0)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.num_db_vecs * ps.dim, DataT(1), DataT(20)); - raft::random::uniformInt( - handle_, r, search_queries.data(), ps.num_queries * ps.dim, DataT(1), DataT(20)); - } - resource::sync_stream(handle_); - } - - void calc_ref() - { - size_t queries_size = size_t{ps.num_queries} * size_t{ps.k}; - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - search_queries.data(), - database.data() + test_ivf_sample_filter::offset * ps.dim, - ps.num_queries, - ps.num_db_vecs - test_ivf_sample_filter::offset, - ps.dim, - ps.k, - ps.index_params.metric); - raft::linalg::addScalar(indices_naive_dev.data(), - indices_naive_dev.data(), - IdxT(test_ivf_sample_filter::offset), - queries_size, - stream_); - distances_ref.resize(queries_size); - update_host(distances_ref.data(), distances_naive_dev.data(), queries_size, stream_); - indices_ref.resize(queries_size); - update_host(indices_ref.data(), indices_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - auto build_only() - { - auto ipams = ps.index_params; - ipams.add_data_on_build = true; - - auto index_view = - raft::make_device_matrix_view(database.data(), ps.num_db_vecs, ps.dim); - return ivf_pq::build(handle_, ipams, index_view); - } - - template - void run(BuildIndex build_index) - { - index index = build_index(); - - double compression_ratio = - static_cast(ps.dim * 8) / static_cast(index.pq_dim() * index.pq_bits()); - size_t queries_size = ps.num_queries * ps.k; - std::vector indices_ivf_pq(queries_size); - std::vector distances_ivf_pq(queries_size); - - rmm::device_uvector distances_ivf_pq_dev(queries_size, stream_); - rmm::device_uvector indices_ivf_pq_dev(queries_size, stream_); - - auto query_view = - raft::make_device_matrix_view(search_queries.data(), ps.num_queries, ps.dim); - auto inds_view = raft::make_device_matrix_view( - indices_ivf_pq_dev.data(), ps.num_queries, ps.k); - auto dists_view = raft::make_device_matrix_view( - distances_ivf_pq_dev.data(), ps.num_queries, ps.k); - - // Create Bitset filter - auto removed_indices = - raft::make_device_vector(handle_, test_ivf_sample_filter::offset); - thrust::sequence( - resource::get_thrust_policy(handle_), - thrust::device_pointer_cast(removed_indices.data_handle()), - thrust::device_pointer_cast(removed_indices.data_handle() + test_ivf_sample_filter::offset)); - resource::sync_stream(handle_); - - raft::core::bitset removed_indices_bitset( - handle_, removed_indices.view(), ps.num_db_vecs); - ivf_pq::search_with_filtering( - handle_, - ps.search_params, - index, - query_view, - inds_view, - dists_view, - raft::neighbors::filtering::bitset_filter(removed_indices_bitset.view())); - - update_host(distances_ivf_pq.data(), distances_ivf_pq_dev.data(), queries_size, stream_); - update_host(indices_ivf_pq.data(), indices_ivf_pq_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - - // A very conservative lower bound on recall - double min_recall = - static_cast(ps.search_params.n_probes) / static_cast(ps.index_params.n_lists); - // Using a heuristic to lower the required recall due to code-packing errors - min_recall = - std::min(std::erfc(0.05 * compression_ratio / std::max(min_recall, 0.5)), min_recall); - // Use explicit per-test min recall value if provided. - min_recall = ps.min_recall.value_or(min_recall); - - ASSERT_TRUE(eval_neighbours(indices_ref, - indices_ivf_pq, - distances_ref, - distances_ivf_pq, - ps.num_queries, - ps.k, - 0.0001 * compression_ratio, - min_recall)) - << ps; - } - - void SetUp() override // NOLINT - { - gen_data(); - calc_ref(); - } - - void TearDown() override // NOLINT - { - cudaGetLastError(); - resource::sync_stream(handle_); - database.resize(0, stream_); - search_queries.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - ivf_pq_inputs ps; // NOLINT - rmm::device_uvector database; // NOLINT - rmm::device_uvector search_queries; // NOLINT - std::vector indices_ref; // NOLINT - std::vector distances_ref; // NOLINT -}; - -/* Test cases */ -using test_cases_t = std::vector; - -// concatenate parameter sets for different type -template -auto operator+(const std::vector& a, const std::vector& b) -> std::vector -{ - std::vector res = a; - res.insert(res.end(), b.begin(), b.end()); - return res; -} - -inline auto defaults() -> test_cases_t { return {ivf_pq_inputs{}}; } - -template -auto map(const std::vector& xs, F f) -> std::vector -{ - std::vector ys(xs.size()); - std::transform(xs.begin(), xs.end(), ys.begin(), f); - return ys; -} - -inline auto with_dims(const std::vector& dims) -> test_cases_t -{ - return map(dims, [](uint32_t d) { - ivf_pq_inputs x; - x.dim = d; - return x; - }); -} - -/** These will surely trigger the fastest kernel available. */ -inline auto small_dims() -> test_cases_t { return with_dims({1, 2, 3, 4, 5, 8, 15, 16, 17}); } - -inline auto small_dims_per_cluster() -> test_cases_t -{ - return map(small_dims(), [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - y.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; - return y; - }); -} - -inline auto big_dims() -> test_cases_t -{ - // with_dims({512, 513, 1023, 1024, 1025, 2048, 2049, 2050, 2053, 6144, 8192, 12288, 16384}); - auto xs = with_dims({512, 513, 1023, 1024, 1025, 2048, 2049, 2050, 2053, 6144}); - return map(xs, [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - uint32_t pq_len = 2; - y.index_params.pq_dim = div_rounding_up_safe(x.dim, pq_len); - // This comes from pure experimentation, also the recall depens a lot on pq_len. - y.min_recall = 0.48 + 0.028 * std::log2(x.dim); - return y; - }); -} - -/** These will surely trigger no-smem-lut kernel. */ -inline auto big_dims_moderate_lut() -> test_cases_t -{ - return map(big_dims(), [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - uint32_t pq_len = 2; - y.index_params.pq_dim = round_up_safe(div_rounding_up_safe(x.dim, pq_len), 4u); - y.index_params.pq_bits = 6; - y.search_params.lut_dtype = CUDA_R_16F; - y.min_recall = 0.69; - return y; - }); -} - -/** Some of these should trigger no-basediff kernel. */ -inline auto big_dims_small_lut() -> test_cases_t -{ - return map(big_dims(), [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - uint32_t pq_len = 8; - y.index_params.pq_dim = round_up_safe(div_rounding_up_safe(x.dim, pq_len), 4u); - y.index_params.pq_bits = 6; - y.search_params.lut_dtype = CUDA_R_8U; - y.min_recall = 0.21; - return y; - }); -} - -/** - * A minimal set of tests to check various enum-like parameters. - */ -inline auto enum_variety() -> test_cases_t -{ - test_cases_t xs; -#define ADD_CASE(f) \ - do { \ - xs.push_back({}); \ - ([](ivf_pq_inputs & x) f)(xs[xs.size() - 1]); \ - } while (0); - - ADD_CASE({ - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; - x.min_recall = 0.86; - }); - ADD_CASE({ - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_SUBSPACE; - x.min_recall = 0.86; - }); - ADD_CASE({ - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; - x.index_params.pq_bits = 4; - x.min_recall = 0.79; - }); - ADD_CASE({ - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; - x.index_params.pq_bits = 5; - x.min_recall = 0.83; - }); - - ADD_CASE({ - x.index_params.pq_bits = 6; - x.min_recall = 0.84; - }); - ADD_CASE({ - x.index_params.pq_bits = 7; - x.min_recall = 0.85; - }); - ADD_CASE({ - x.index_params.pq_bits = 8; - x.min_recall = 0.86; - }); - - ADD_CASE({ - x.index_params.force_random_rotation = true; - x.min_recall = 0.86; - }); - ADD_CASE({ - x.index_params.force_random_rotation = false; - x.min_recall = 0.86; - }); - - ADD_CASE({ - x.search_params.lut_dtype = CUDA_R_32F; - x.min_recall = 0.86; - }); - ADD_CASE({ - x.search_params.lut_dtype = CUDA_R_16F; - x.min_recall = 0.86; - }); - ADD_CASE({ - x.search_params.lut_dtype = CUDA_R_8U; - x.min_recall = 0.84; - }); - - ADD_CASE({ - x.search_params.internal_distance_dtype = CUDA_R_32F; - x.min_recall = 0.86; - }); - ADD_CASE({ - x.search_params.internal_distance_dtype = CUDA_R_16F; - x.search_params.lut_dtype = CUDA_R_16F; - x.min_recall = 0.86; - }); - - return xs; -} - -inline auto enum_variety_l2() -> test_cases_t -{ - return map(enum_variety(), [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - y.index_params.metric = distance::DistanceType::L2Expanded; - return y; - }); -} - -inline auto enum_variety_ip() -> test_cases_t -{ - return map(enum_variety(), [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - if (y.min_recall.has_value()) { - if (y.search_params.lut_dtype == CUDA_R_8U) { - // InnerProduct score is signed, - // thus we're forced to used signed 8-bit representation, - // thus we have one bit less precision - y.min_recall = y.min_recall.value() * 0.90; - } else { - // In other cases it seems to perform a little bit better, still worse than L2 - y.min_recall = y.min_recall.value() * 0.94; - } - } - y.index_params.metric = distance::DistanceType::InnerProduct; - return y; - }); -} - -inline auto enum_variety_l2sqrt() -> test_cases_t -{ - return map(enum_variety(), [](const ivf_pq_inputs& x) { - ivf_pq_inputs y(x); - y.index_params.metric = distance::DistanceType::L2SqrtExpanded; - return y; - }); -} - -/** - * Try different number of n_probes, some of which may trigger the non-fused version of the search - * kernel. - */ -inline auto var_n_probes() -> test_cases_t -{ - ivf_pq_inputs dflt; - std::vector xs; - for (auto x = dflt.index_params.n_lists; x >= 1; x /= 2) { - xs.push_back(x); - } - return map(xs, [](uint32_t n_probes) { - ivf_pq_inputs x; - x.search_params.n_probes = n_probes; - return x; - }); -} - -/** - * Try different number of nearest neighbours. - * Values smaller than 32 test if the code behaves well when Capacity (== 32) does not change, - * but `k <= Capacity` changes. - * - * Values between `32 and ivf_pq::detail::kMaxCapacity` test various instantiations of the - * main kernel (Capacity-templated) - * - * Values above ivf_pq::detail::kMaxCapacity should trigger the non-fused version of the kernel - * (manage_local_topk = false). - * - * Also we test here various values that are close-but-not-power-of-two to catch any problems - * related to rounding/alignment. - * - * Note, we cannot control explicitly which instance of the search kernel to choose, hence it's - * important to try a variety of different values of `k` to make sure all paths are triggered. - * - * Set the log level to DEBUG (5) or above to inspect the selected kernel instances. - */ -inline auto var_k() -> test_cases_t -{ - return map( - {1, 2, 3, 5, 8, 15, 16, 32, 63, 65, 127, 128, 256, 257, 1023, 2048, 2049}, [](uint32_t k) { - ivf_pq_inputs x; - x.k = k; - // when there's not enough data, try more cluster probes - x.search_params.n_probes = max(x.search_params.n_probes, min(x.index_params.n_lists, k)); - return x; - }); -} - -/** - * Cases brought up from downstream projects. - */ -inline auto special_cases() -> test_cases_t -{ - test_cases_t xs; - -#define ADD_CASE(f) \ - do { \ - xs.push_back({}); \ - ([](ivf_pq_inputs & x) f)(xs[xs.size() - 1]); \ - } while (0); - - ADD_CASE({ - x.num_db_vecs = 1183514; - x.dim = 100; - x.num_queries = 10000; - x.k = 10; - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_SUBSPACE; - x.index_params.pq_dim = 10; - x.index_params.pq_bits = 8; - x.index_params.n_lists = 1024; - x.search_params.n_probes = 50; - }); - - ADD_CASE({ - x.num_db_vecs = 10000; - x.dim = 16; - x.num_queries = 500; - x.k = 128; - x.index_params.metric = distance::DistanceType::L2Expanded; - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_SUBSPACE; - x.index_params.pq_bits = 8; - x.index_params.n_lists = 100; - x.search_params.n_probes = 100; - }); - - ADD_CASE({ - x.num_db_vecs = 10000; - x.dim = 16; - x.num_queries = 500; - x.k = 129; - x.index_params.metric = distance::DistanceType::L2Expanded; - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_SUBSPACE; - x.index_params.pq_bits = 8; - x.index_params.n_lists = 100; - x.search_params.n_probes = 100; - }); - - ADD_CASE({ - x.num_db_vecs = 4335; - x.dim = 4; - x.num_queries = 100000; - x.k = 12; - x.index_params.metric = distance::DistanceType::L2Expanded; - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_SUBSPACE; - x.index_params.pq_dim = 2; - x.index_params.pq_bits = 8; - x.index_params.n_lists = 69; - x.search_params.n_probes = 69; - }); - - ADD_CASE({ - x.num_db_vecs = 4335; - x.dim = 4; - x.num_queries = 100000; - x.k = 12; - x.index_params.metric = distance::DistanceType::L2Expanded; - x.index_params.codebook_kind = ivf_pq::codebook_gen::PER_CLUSTER; - x.index_params.pq_dim = 2; - x.index_params.pq_bits = 8; - x.index_params.n_lists = 69; - x.search_params.n_probes = 69; - }); - - return xs; -} - -/* Test instantiations */ - -#define TEST_BUILD_SEARCH(type) \ - TEST_P(type, build_search) /* NOLINT */ \ - { \ - this->run([this]() { return this->build_only(); }); \ - } - -#define TEST_BUILD_EXTEND_SEARCH(type) \ - TEST_P(type, build_extend_search) /* NOLINT */ \ - { \ - this->run([this]() { return this->build_2_extends(); }); \ - } - -#define TEST_BUILD_SERIALIZE_SEARCH(type) \ - TEST_P(type, build_serialize_search) /* NOLINT */ \ - { \ - this->run([this]() { return this->build_serialize(); }); \ - } - -#define INSTANTIATE(type, vals) \ - INSTANTIATE_TEST_SUITE_P(IvfPq, type, ::testing::ValuesIn(vals)); /* NOLINT */ - -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu deleted file mode 100644 index 5ba21c3c2f..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include // raft::neighbors::ivf_pq::index -#include - -#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT) \ - template raft::neighbors::ivf_pq::index raft::neighbors::ivf_pq::build( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset); \ - \ - template auto raft::neighbors::ivf_pq::build( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - const T* dataset, \ - IdxT n_rows, \ - uint32_t dim) \ - ->raft::neighbors::ivf_pq::index; - -instantiate_raft_neighbors_ivf_pq_build(float, uint32_t); - -#undef instantiate_raft_neighbors_ivf_pq_build diff --git a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh deleted file mode 100644 index cd5435ab2e..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include // raft::neighbors::ivf_pq::index -#include - -#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT) \ - extern template raft::neighbors::ivf_pq::index raft::neighbors::ivf_pq::build( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - raft::device_matrix_view dataset); \ - \ - extern template auto raft::neighbors::ivf_pq::build( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::index_params& params, \ - const T* dataset, \ - IdxT n_rows, \ - uint32_t dim) \ - ->raft::neighbors::ivf_pq::index; - -instantiate_raft_neighbors_ivf_pq_build(float, uint32_t); - -#undef instantiate_raft_neighbors_ivf_pq_build diff --git a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu deleted file mode 100644 index 00baa59f58..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include // raft::neighbors::ivf_pq::index -#include - -#include - -#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT) \ - template void raft::neighbors::ivf_pq::search( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::search_params& params, \ - const raft::neighbors::ivf_pq::index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances); \ - \ - template void raft::neighbors::ivf_pq::search( \ - raft::resources const& handle, \ - const raft::neighbors::ivf_pq::search_params& params, \ - const raft::neighbors::ivf_pq::index& idx, \ - const T* queries, \ - uint32_t n_queries, \ - uint32_t k, \ - IdxT* neighbors, \ - float* distances) - -instantiate_raft_neighbors_ivf_pq_search(float, uint32_t); - -#undef instantiate_raft_neighbors_ivf_pq_search - -#define instantiate_raft_neighbors_ivf_pq_search_with_filtering(T, IdxT, FilterT) \ - template void raft::neighbors::ivf_pq::search_with_filtering( \ - raft::resources const& handle, \ - const search_params& params, \ - const index& idx, \ - raft::device_matrix_view queries, \ - raft::device_matrix_view neighbors, \ - raft::device_matrix_view distances, \ - FilterT sample_filter) - -#define COMMA , -instantiate_raft_neighbors_ivf_pq_search_with_filtering( - float, uint32_t, raft::neighbors::filtering::bitset_filter); - -instantiate_raft_neighbors_ivf_pq_search_with_filtering( - int8_t, int64_t, raft::neighbors::filtering::bitset_filter); - -instantiate_raft_neighbors_ivf_pq_search_with_filtering( - float, uint32_t, raft::neighbors::filtering::none_ivf_sample_filter); - -#undef COMMA -#undef instantiate_raft_neighbors_ivf_pq_search_with_filtering diff --git a/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu deleted file mode 100644 index 70d5d8761f..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_pq.cuh" - -#include -#include - -namespace raft::neighbors::ivf_pq { - -using f32_f32_i64_filter = ivf_pq_filter_test; - -TEST_BUILD_SEARCH(f32_f32_i64_filter) -INSTANTIATE(f32_f32_i64_filter, defaults() + big_dims_moderate_lut()); -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu deleted file mode 100644 index ba96a8db0b..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_pq.cuh" - -#include -#include - -namespace raft::neighbors::ivf_pq { - -using f32_i08_i64_filter = ivf_pq_filter_test; - -TEST_BUILD_SEARCH(f32_i08_i64_filter) -INSTANTIATE(f32_i08_i64_filter, big_dims()); - -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu deleted file mode 100644 index 9859061d70..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/test_float_int64_t.cu +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_pq.cuh" - -namespace raft::neighbors::ivf_pq { - -using f32_f32_i64 = ivf_pq_test; - -TEST_BUILD_EXTEND_SEARCH(f32_f32_i64) -TEST_BUILD_SERIALIZE_SEARCH(f32_f32_i64) -INSTANTIATE(f32_f32_i64, defaults() + small_dims() + big_dims_moderate_lut()); - -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu deleted file mode 100644 index b8ada2249a..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_pq.cuh" -#include "ivf_pq_build_test-ext.cuh" - -#include -#include - -namespace raft::neighbors::ivf_pq { - -using f32_f32_u32 = ivf_pq_test; -using f32_f32_u32_filter = ivf_pq_filter_test; - -TEST_BUILD_SEARCH(f32_f32_u32) -TEST_BUILD_SERIALIZE_SEARCH(f32_f32_u32) -INSTANTIATE(f32_f32_u32, defaults() + var_n_probes() + var_k() + special_cases()); - -TEST_BUILD_SEARCH(f32_f32_u32_filter) -INSTANTIATE(f32_f32_u32_filter, defaults()); -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu deleted file mode 100644 index 970bdd6a12..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_pq.cuh" - -#include -namespace raft::neighbors::ivf_pq { - -using f32_i08_i64 = ivf_pq_test; - -TEST_BUILD_SEARCH(f32_i08_i64) -TEST_BUILD_SERIALIZE_SEARCH(f32_i08_i64) -INSTANTIATE(f32_i08_i64, defaults() + big_dims() + var_k()); - -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu deleted file mode 100644 index e949c2f7ed..0000000000 --- a/cpp/test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_ivf_pq.cuh" - -namespace raft::neighbors::ivf_pq { - -using f32_u08_i64 = ivf_pq_test; - -TEST_BUILD_SEARCH(f32_u08_i64) -TEST_BUILD_EXTEND_SEARCH(f32_u08_i64) -INSTANTIATE(f32_u08_i64, small_dims_per_cluster() + enum_variety()); - -} // namespace raft::neighbors::ivf_pq diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh deleted file mode 100644 index 5070d83b15..0000000000 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include "ann_utils.cuh" - -#include -#include -#include -#include - -#include - -#include - -#include -#include -#include -#include - -namespace raft::neighbors::experimental::nn_descent { - -struct AnnNNDescentInputs { - int n_rows; - int dim; - int graph_degree; - raft::distance::DistanceType metric; - bool host_dataset; - double min_recall; -}; - -struct AnnNNDescentBatchInputs { - std::pair recall_cluster; - int n_rows; - int dim; - int graph_degree; - raft::distance::DistanceType metric; - bool host_dataset; -}; - -inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentInputs& p) -{ - os << "dataset shape=" << p.n_rows << "x" << p.dim << ", graph_degree=" << p.graph_degree - << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") - << std::endl; - return os; -} - -inline ::std::ostream& operator<<(::std::ostream& os, const AnnNNDescentBatchInputs& p) -{ - os << "dataset shape=" << p.n_rows << "x" << p.dim << ", graph_degree=" << p.graph_degree - << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") - << ", clusters=" << p.recall_cluster.second << std::endl; - return os; -} - -template -class AnnNNDescentTest : public ::testing::TestWithParam { - public: - AnnNNDescentTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_) - { - } - - protected: - void testNNDescent() - { - size_t queries_size = ps.n_rows * ps.graph_degree; - std::vector indices_NNDescent(queries_size); - std::vector distances_NNDescent(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - database.data(), - database.data(), - ps.n_rows, - ps.n_rows, - ps.dim, - ps.graph_degree, - ps.metric); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - { - nn_descent::index_params index_params; - index_params.metric = ps.metric; - index_params.graph_degree = ps.graph_degree; - index_params.intermediate_graph_degree = 2 * ps.graph_degree; - index_params.max_iterations = 100; - index_params.return_distances = true; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); - - { - if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - index index{handle_, ps.n_rows, static_cast(ps.graph_degree), true}; - nn_descent::build( - handle_, index_params, database_host_view, index, DistEpilogue()); - raft::copy( - indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); - if (index.distances().has_value()) { - raft::copy(distances_NNDescent.data(), - index.distances().value().data_handle(), - queries_size, - stream_); - } - - } else { - index index{handle_, ps.n_rows, static_cast(ps.graph_degree), true}; - nn_descent::build( - handle_, index_params, database_view, index, DistEpilogue()); - raft::copy( - indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); - if (index.distances().has_value()) { - raft::copy(distances_NNDescent.data(), - index.distances().value().data_handle(), - queries_size, - stream_); - } - }; - } - resource::sync_stream(handle_); - } - - double min_recall = ps.min_recall; - EXPECT_TRUE(eval_neighbours(indices_naive, - indices_NNDescent, - distances_naive, - distances_NNDescent, - ps.n_rows, - ps.graph_degree, - 0.001, - min_recall)); - } - } - - void SetUp() override - { - database.resize(((size_t)ps.n_rows) * ps.dim, stream_); - raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { - raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); - } - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnNNDescentInputs ps; - rmm::device_uvector database; -}; - -template -class AnnNNDescentBatchTest : public ::testing::TestWithParam { - public: - AnnNNDescentBatchTest() - : stream_(resource::get_cuda_stream(handle_)), - ps(::testing::TestWithParam::GetParam()), - database(0, stream_) - { - } - - void testNNDescentBatch() - { - size_t queries_size = ps.n_rows * ps.graph_degree; - std::vector indices_NNDescent(queries_size); - std::vector distances_NNDescent(queries_size); - std::vector indices_naive(queries_size); - std::vector distances_naive(queries_size); - - { - rmm::device_uvector distances_naive_dev(queries_size, stream_); - rmm::device_uvector indices_naive_dev(queries_size, stream_); - naive_knn(handle_, - distances_naive_dev.data(), - indices_naive_dev.data(), - database.data(), - database.data(), - ps.n_rows, - ps.n_rows, - ps.dim, - ps.graph_degree, - ps.metric); - update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); - update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); - resource::sync_stream(handle_); - } - - { - { - nn_descent::index_params index_params; - index_params.metric = ps.metric; - index_params.graph_degree = ps.graph_degree; - index_params.intermediate_graph_degree = 2 * ps.graph_degree; - index_params.max_iterations = 10; - index_params.return_distances = true; - index_params.n_clusters = ps.recall_cluster.second; - - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); - - { - if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); - raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); - auto index = nn_descent::build( - handle_, index_params, database_host_view, DistEpilogue()); - raft::copy( - indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); - if (index.distances().has_value()) { - raft::copy(distances_NNDescent.data(), - index.distances().value().data_handle(), - queries_size, - stream_); - } - - } else { - auto index = nn_descent::build( - handle_, index_params, database_view, DistEpilogue()); - raft::copy( - indices_NNDescent.data(), index.graph().data_handle(), queries_size, stream_); - if (index.distances().has_value()) { - raft::copy(distances_NNDescent.data(), - index.distances().value().data_handle(), - queries_size, - stream_); - } - }; - } - resource::sync_stream(handle_); - } - double min_recall = ps.recall_cluster.first; - EXPECT_TRUE(eval_neighbours(indices_naive, - indices_NNDescent, - distances_naive, - distances_NNDescent, - ps.n_rows, - ps.graph_degree, - 0.01, - min_recall, - true, - static_cast(ps.graph_degree * 0.1))); - } - } - - void SetUp() override - { - database.resize(((size_t)ps.n_rows) * ps.dim, stream_); - raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { - raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); - } - resource::sync_stream(handle_); - } - - void TearDown() override - { - resource::sync_stream(handle_); - database.resize(0, stream_); - } - - private: - raft::resources handle_; - rmm::cuda_stream_view stream_; - AnnNNDescentBatchInputs ps; - rmm::device_uvector database; -}; - -const std::vector inputs = raft::util::itertools::product( - {1000, 2000}, // n_rows - {3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim - {32, 64}, // graph_degree - {raft::distance::DistanceType::L2Expanded}, - {false, true}, - {0.90}); - -// TODO: Investigate why this test is failing -// Reference issue https://github.com/rapidsai/raft/issues/2450 -// const std::vector inputsBatch = -// raft::util::itertools::product( -// {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters -// {4000, 5000}, // n_rows -// {192, 512}, // dim -// {32, 64}, // graph_degree -// {raft::distance::DistanceType::L2Expanded}, -// {false, true}); - -} // namespace raft::neighbors::experimental::nn_descent diff --git a/cpp/test/neighbors/ann_nn_descent/test_batch_float_uint32_t.cu b/cpp/test/neighbors/ann_nn_descent/test_batch_float_uint32_t.cu deleted file mode 100644 index c6f56e8c39..0000000000 --- a/cpp/test/neighbors/ann_nn_descent/test_batch_float_uint32_t.cu +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_nn_descent.cuh" - -#include - -namespace raft::neighbors::experimental::nn_descent { - -typedef AnnNNDescentBatchTest AnnNNDescentBatchTestF_U32; -TEST_P(AnnNNDescentBatchTestF_U32, AnnNNDescentBatch) { this->testNNDescentBatch(); } - -INSTANTIATE_TEST_CASE_P(AnnNNDescentBatchTest, - AnnNNDescentBatchTestF_U32, - ::testing::ValuesIn(inputsBatch)); - -} // namespace raft::neighbors::experimental::nn_descent diff --git a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu b/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu deleted file mode 100644 index ec6d04ad12..0000000000 --- a/cpp/test/neighbors/ann_nn_descent/test_float_uint32_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_nn_descent.cuh" - -#include - -namespace raft::neighbors::experimental::nn_descent { - -typedef AnnNNDescentTest AnnNNDescentTestF_U32; -TEST_P(AnnNNDescentTestF_U32, AnnNNDescent) { this->testNNDescent(); } - -INSTANTIATE_TEST_CASE_P(AnnNNDescentTest, AnnNNDescentTestF_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::experimental::nn_descent diff --git a/cpp/test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu deleted file mode 100644 index 27fa42d636..0000000000 --- a/cpp/test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_nn_descent.cuh" - -#include - -namespace raft::neighbors::experimental::nn_descent { - -typedef AnnNNDescentTest AnnNNDescentTestI8_U32; -TEST_P(AnnNNDescentTestI8_U32, AnnNNDescent) { this->testNNDescent(); } - -INSTANTIATE_TEST_CASE_P(AnnNNDescentTest, AnnNNDescentTestI8_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::experimental::nn_descent diff --git a/cpp/test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu deleted file mode 100644 index 3afe79dcc4..0000000000 --- a/cpp/test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../ann_nn_descent.cuh" - -#include - -namespace raft::neighbors::experimental::nn_descent { - -typedef AnnNNDescentTest AnnNNDescentTestUI8_U32; -TEST_P(AnnNNDescentTestUI8_U32, AnnNNDescent) { this->testNNDescent(); } - -INSTANTIATE_TEST_CASE_P(AnnNNDescentTest, AnnNNDescentTestUI8_U32, ::testing::ValuesIn(inputs)); - -} // namespace raft::neighbors::experimental::nn_descent diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index 82e3ace9da..bb6b6d68a6 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -332,4 +332,4 @@ auto eval_distances(raft::resources const& handle, } return testing::AssertionSuccess(); } -} // namespace raft::neighbors +} // namespace raft::neighbors \ No newline at end of file diff --git a/cpp/test/neighbors/fused_l2_knn.cu b/cpp/test/neighbors/fused_l2_knn.cu deleted file mode 100644 index e4d018aff0..0000000000 --- a/cpp/test/neighbors/fused_l2_knn.cu +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../test_utils.cuh" -#include "./knn_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include -#include -#include - -namespace raft { -namespace spatial { -namespace knn { -struct FusedL2KNNInputs { - int num_queries; - int num_db_vecs; - int dim; - int k; - raft::distance::DistanceType metric_; -}; - -template -class FusedL2KNNTest : public ::testing::TestWithParam { - public: - FusedL2KNNTest() - : stream_(resource::get_cuda_stream(handle_)), - params_(::testing::TestWithParam::GetParam()), - database(params_.num_db_vecs * params_.dim, stream_), - search_queries(params_.num_queries * params_.dim, stream_), - raft_indices_(params_.num_queries * params_.k, stream_), - raft_distances_(params_.num_queries * params_.k, stream_), - ref_indices_(params_.num_queries * params_.k, stream_), - ref_distances_(params_.num_queries * params_.k, stream_) - { - RAFT_CUDA_TRY(cudaMemsetAsync(database.data(), 0, database.size() * sizeof(T), stream_)); - RAFT_CUDA_TRY( - cudaMemsetAsync(search_queries.data(), 0, search_queries.size() * sizeof(T), stream_)); - RAFT_CUDA_TRY( - cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(int64_t), stream_)); - RAFT_CUDA_TRY( - cudaMemsetAsync(raft_distances_.data(), 0, raft_distances_.size() * sizeof(T), stream_)); - RAFT_CUDA_TRY( - cudaMemsetAsync(ref_indices_.data(), 0, ref_indices_.size() * sizeof(int64_t), stream_)); - RAFT_CUDA_TRY( - cudaMemsetAsync(ref_distances_.data(), 0, ref_distances_.size() * sizeof(T), stream_)); - } - - protected: - void testBruteForce() - { - // calculate the naive knn, by calculating the full pairwise distances and doing a k-select - rmm::device_uvector temp_distances(num_db_vecs * num_queries, stream_); - distance::pairwise_distance( - handle_, - raft::make_device_matrix_view(search_queries.data(), num_queries, dim), - raft::make_device_matrix_view(database.data(), num_db_vecs, dim), - raft::make_device_matrix_view(temp_distances.data(), num_queries, num_db_vecs), - metric); - - matrix::select_k( - handle_, - make_device_matrix_view(temp_distances.data(), num_queries, num_db_vecs), - std::nullopt, - make_device_matrix_view(ref_distances_.data(), num_queries, k_), - make_device_matrix_view(ref_indices_.data(), num_queries, k_), - true, - true); - - auto index_view = - raft::make_device_matrix_view(database.data(), num_db_vecs, dim); - auto query_view = - raft::make_device_matrix_view(search_queries.data(), num_queries, dim); - auto out_indices_view = - raft::make_device_matrix_view(raft_indices_.data(), num_queries, k_); - auto out_dists_view = - raft::make_device_matrix_view(raft_distances_.data(), num_queries, k_); - raft::neighbors::brute_force::fused_l2_knn( - handle_, index_view, query_view, out_indices_view, out_dists_view, metric); - - // verify. - ASSERT_TRUE(devArrMatchKnnPair(ref_indices_.data(), - raft_indices_.data(), - ref_distances_.data(), - raft_distances_.data(), - num_queries, - k_, - float(0.001), - stream_)); - } - - void SetUp() override - { - num_queries = params_.num_queries; - num_db_vecs = params_.num_db_vecs; - dim = params_.dim; - k_ = params_.k; - metric = params_.metric_; - - unsigned long long int seed = 1234ULL; - raft::random::RngState r(seed); - uniform(handle_, r, database.data(), num_db_vecs * dim, T(-1.0), T(1.0)); - uniform(handle_, r, search_queries.data(), num_queries * dim, T(-1.0), T(1.0)); - } - - private: - raft::resources handle_; - cudaStream_t stream_ = 0; - FusedL2KNNInputs params_; - int num_queries; - int num_db_vecs; - int dim; - rmm::device_uvector database; - rmm::device_uvector search_queries; - rmm::device_uvector raft_indices_; - rmm::device_uvector raft_distances_; - rmm::device_uvector ref_indices_; - rmm::device_uvector ref_distances_; - int k_; - raft::distance::DistanceType metric; -}; - -const std::vector inputs = { - {100, 1000, 16, 10, raft::distance::DistanceType::L2Expanded}, - {256, 256, 30, 10, raft::distance::DistanceType::L2Expanded}, - {1000, 10000, 16, 10, raft::distance::DistanceType::L2Expanded}, - {100, 1000, 16, 50, raft::distance::DistanceType::L2Expanded}, - {20, 10000, 16, 10, raft::distance::DistanceType::L2Expanded}, - {1000, 10000, 16, 50, raft::distance::DistanceType::L2Expanded}, - {1000, 10000, 32, 50, raft::distance::DistanceType::L2Expanded}, - {10000, 40000, 32, 30, raft::distance::DistanceType::L2Expanded}, - // L2 unexpanded - {100, 1000, 16, 10, raft::distance::DistanceType::L2Unexpanded}, - {1000, 10000, 16, 10, raft::distance::DistanceType::L2Unexpanded}, - {100, 1000, 16, 50, raft::distance::DistanceType::L2Unexpanded}, - {20, 10000, 16, 50, raft::distance::DistanceType::L2Unexpanded}, - {1000, 10000, 16, 50, raft::distance::DistanceType::L2Unexpanded}, - {1000, 10000, 32, 50, raft::distance::DistanceType::L2Unexpanded}, - {10000, 40000, 32, 30, raft::distance::DistanceType::L2Unexpanded}, -}; - -typedef FusedL2KNNTest FusedL2KNNTestF; -TEST_P(FusedL2KNNTestF, FusedBruteForce) { this->testBruteForce(); } - -INSTANTIATE_TEST_CASE_P(FusedL2KNNTest, FusedL2KNNTestF, ::testing::ValuesIn(inputs)); - -} // namespace knn -} // namespace spatial -} // namespace raft diff --git a/cpp/test/neighbors/refine.cu b/cpp/test/neighbors/refine.cu deleted file mode 100644 index 05e6048e56..0000000000 --- a/cpp/test/neighbors/refine.cu +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "../test_utils.cuh" -#include "ann_utils.cuh" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -#include - -#include - -namespace raft::neighbors { - -template -class RefineTest : public ::testing::TestWithParam> { - public: - RefineTest() - : stream_(resource::get_cuda_stream(handle_)), - data(handle_, ::testing::TestWithParam>::GetParam()) - { - } - - protected: - public: // tamas remove - void testRefine() - { - std::vector indices(data.p.n_queries * data.p.k); - std::vector distances(data.p.n_queries * data.p.k); - - if (data.p.host_data) { - raft::neighbors::refine(handle_, - data.dataset_host.view(), - data.queries_host.view(), - data.candidates_host.view(), - data.refined_indices_host.view(), - data.refined_distances_host.view(), - data.p.metric); - raft::copy(indices.data(), - data.refined_indices_host.data_handle(), - data.refined_indices_host.size(), - stream_); - raft::copy(distances.data(), - data.refined_distances_host.data_handle(), - data.refined_distances_host.size(), - stream_); - - } else { - raft::neighbors::refine(handle_, - data.dataset.view(), - data.queries.view(), - data.candidates.view(), - data.refined_indices.view(), - data.refined_distances.view(), - data.p.metric); - update_host(distances.data(), - data.refined_distances.data_handle(), - data.refined_distances.size(), - stream_); - update_host( - indices.data(), data.refined_indices.data_handle(), data.refined_indices.size(), stream_); - } - resource::sync_stream(handle_); - - double min_recall = 1; - - ASSERT_TRUE(raft::neighbors::eval_neighbours(data.true_refined_indices_host, - indices, - data.true_refined_distances_host, - distances, - data.p.n_queries, - data.p.k, - 0.001, - min_recall)); - } - - public: - raft::resources handle_; - rmm::cuda_stream_view stream_; - RefineHelper data; -}; - -const std::vector> inputs = - raft::util::itertools::product>( - {static_cast(137)}, - {static_cast(1000)}, - {static_cast(16)}, - {static_cast(1), static_cast(10), static_cast(33)}, - {static_cast(33)}, - {raft::distance::DistanceType::L2Expanded, raft::distance::DistanceType::InnerProduct}, - {false, true}); - -typedef RefineTest RefineTestF; -TEST_P(RefineTestF, AnnRefine) { this->testRefine(); } - -INSTANTIATE_TEST_CASE_P(RefineTest, RefineTestF, ::testing::ValuesIn(inputs)); - -typedef RefineTest RefineTestF_uint8; -TEST_P(RefineTestF_uint8, AnnRefine) { this->testRefine(); } -INSTANTIATE_TEST_CASE_P(RefineTest, RefineTestF_uint8, ::testing::ValuesIn(inputs)); - -typedef RefineTest RefineTestF_int8; -TEST_P(RefineTestF_int8, AnnRefine) { this->testRefine(); } -INSTANTIATE_TEST_CASE_P(RefineTest, RefineTestF_int8, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt index c286d3debf..9bde613720 100644 --- a/python/pylibraft/CMakeLists.txt +++ b/python/pylibraft/CMakeLists.txt @@ -53,7 +53,6 @@ if(NOT raft_FOUND) set(BUILD_TESTS OFF) set(BUILD_PRIMS_BENCH OFF) - set(BUILD_ANN_BENCH OFF) set(RAFT_COMPILE_LIBRARY ON) set(CUDA_STATIC_RUNTIME ON) set(CUDA_STATIC_MATH_LIBRARIES ON) @@ -66,12 +65,14 @@ if(NOT raft_FOUND) add_subdirectory(../../cpp raft-cpp EXCLUDE_FROM_ALL) if(NOT CUDA_STATIC_MATH_LIBRARIES AND USE_CUDA_MATH_WHEELS) - set_property(TARGET raft_lib PROPERTY INSTALL_RPATH - "$ORIGIN/../nvidia/cublas/lib" - "$ORIGIN/../nvidia/curand/lib" - "$ORIGIN/../nvidia/cusolver/lib" - "$ORIGIN/../nvidia/cusparse/lib" - "$ORIGIN/../nvidia/nvjitlink/lib" + set_property( + TARGET raft_lib + PROPERTY INSTALL_RPATH + "$ORIGIN/../nvidia/cublas/lib" + "$ORIGIN/../nvidia/curand/lib" + "$ORIGIN/../nvidia/cusolver/lib" + "$ORIGIN/../nvidia/cusparse/lib" + "$ORIGIN/../nvidia/nvjitlink/lib" ) endif() @@ -85,11 +86,7 @@ endif() rapids_cython_init() add_subdirectory(pylibraft/common) -add_subdirectory(pylibraft/distance) -add_subdirectory(pylibraft/matrix) -add_subdirectory(pylibraft/neighbors) add_subdirectory(pylibraft/random) -add_subdirectory(pylibraft/cluster) if(DEFINED cython_lib_dir) rapids_cython_add_rpath_entries(TARGET raft PATHS "${cython_lib_dir}") diff --git a/python/pylibraft/pylibraft/test/ann_utils.py b/python/pylibraft/pylibraft/test/ann_utils.py deleted file mode 100644 index 60db7f3273..0000000000 --- a/python/pylibraft/pylibraft/test/ann_utils.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# h ttp://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - - -def generate_data(shape, dtype): - if dtype == np.byte: - x = np.random.randint(-127, 128, size=shape, dtype=np.byte) - elif dtype == np.ubyte: - x = np.random.randint(0, 255, size=shape, dtype=np.ubyte) - else: - x = np.random.random_sample(shape).astype(dtype) - - return x - - -def calc_recall(ann_idx, true_nn_idx): - assert ann_idx.shape == true_nn_idx.shape - n = 0 - for i in range(ann_idx.shape[0]): - n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size - recall = n / ann_idx.size - return recall diff --git a/python/pylibraft/pylibraft/test/test_brute_force.py b/python/pylibraft/pylibraft/test/test_brute_force.py deleted file mode 100644 index 42095c3b9f..0000000000 --- a/python/pylibraft/pylibraft/test/test_brute_force.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from scipy.spatial.distance import cdist - -from pylibraft.common import DeviceResources, Stream, device_ndarray -from pylibraft.neighbors.brute_force import knn - - -@pytest.mark.parametrize("n_index_rows", [32, 100]) -@pytest.mark.parametrize("n_query_rows", [32, 100]) -@pytest.mark.parametrize("n_cols", [40, 100]) -@pytest.mark.parametrize("k", [1, 5, 32]) -@pytest.mark.parametrize( - "metric", - [ - "euclidean", - "cityblock", - "chebyshev", - "canberra", - "correlation", - "russellrao", - "cosine", - "sqeuclidean", - # "inner_product", - ], -) -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("dtype", [np.float32]) -def test_knn(n_index_rows, n_query_rows, n_cols, k, inplace, metric, dtype): - index = np.random.random_sample((n_index_rows, n_cols)).astype(dtype) - queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype) - - # RussellRao expects boolean arrays - if metric == "russellrao": - index[index < 0.5] = 0.0 - index[index >= 0.5] = 1.0 - queries[queries < 0.5] = 0.0 - queries[queries >= 0.5] = 1.0 - - indices = np.zeros((n_query_rows, k), dtype="int64") - distances = np.zeros((n_query_rows, k), dtype=dtype) - - index_device = device_ndarray(index) - - queries_device = device_ndarray(queries) - indices_device = device_ndarray(indices) - distances_device = device_ndarray(distances) - - s2 = Stream() - handle = DeviceResources(stream=s2) - ret_distances, ret_indices = knn( - index_device, - queries_device, - k, - indices=indices_device, - distances=distances_device, - metric=metric, - handle=handle, - ) - handle.sync() - - pw_dists = cdist(queries, index, metric=metric) - - distances_device = ret_distances if not inplace else distances_device - - actual_distances = distances_device.copy_to_host() - - actual_distances[actual_distances <= 1e-5] = 0.0 - argsort = np.argsort(pw_dists, axis=1) - - for i in range(pw_dists.shape[0]): - expected_indices = argsort[i] - gpu_dists = actual_distances[i] - - cpu_ordered = pw_dists[i, expected_indices] - np.testing.assert_allclose( - cpu_ordered[:k], gpu_dists, atol=1e-3, rtol=1e-3 - ) - - -def test_knn_check_col_major_inputs(): - # make sure that we get an exception if passed col-major inputs, - # instead of returning incorrect results - cp = pytest.importorskip("cupy") - n_index_rows, n_query_rows, n_cols = 128, 16, 32 - index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") - queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") - - with pytest.raises(ValueError): - knn(cp.asarray(index, order="F"), queries, k=4) - - with pytest.raises(ValueError): - knn(index, cp.asarray(queries, order="F"), k=4) - - # shouldn't throw an exception with c-contiguous inputs - knn(index, queries, k=4) diff --git a/python/pylibraft/pylibraft/test/test_cagra.py b/python/pylibraft/pylibraft/test/test_cagra.py deleted file mode 100644 index ef8e54917a..0000000000 --- a/python/pylibraft/pylibraft/test/test_cagra.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# h ttp://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from sklearn.neighbors import NearestNeighbors -from sklearn.preprocessing import normalize - -from pylibraft.common import device_ndarray -from pylibraft.neighbors import cagra -from pylibraft.test.ann_utils import calc_recall, generate_data - - -def run_cagra_build_search_test( - n_rows=10000, - n_cols=10, - n_queries=100, - k=10, - dtype=np.float32, - metric="sqeuclidean", - intermediate_graph_degree=128, - graph_degree=64, - build_algo="ivf_pq", - array_type="device", - compare=True, - inplace=True, - add_data_on_build=True, - search_params={}, -): - dataset = generate_data((n_rows, n_cols), dtype) - if metric == "inner_product": - dataset = normalize(dataset, norm="l2", axis=1) - dataset_device = device_ndarray(dataset) - - build_params = cagra.IndexParams( - metric=metric, - intermediate_graph_degree=intermediate_graph_degree, - graph_degree=graph_degree, - build_algo=build_algo, - ) - - if array_type == "device": - index = cagra.build(build_params, dataset_device) - else: - index = cagra.build(build_params, dataset) - - assert index.trained - - if not add_data_on_build: - dataset_1 = dataset[: n_rows // 2, :] - dataset_2 = dataset[n_rows // 2 :, :] - indices_1 = np.arange(n_rows // 2, dtype=np.uint32) - indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.uint32) - if array_type == "device": - dataset_1_device = device_ndarray(dataset_1) - dataset_2_device = device_ndarray(dataset_2) - indices_1_device = device_ndarray(indices_1) - indices_2_device = device_ndarray(indices_2) - index = cagra.extend(index, dataset_1_device, indices_1_device) - index = cagra.extend(index, dataset_2_device, indices_2_device) - else: - index = cagra.extend(index, dataset_1, indices_1) - index = cagra.extend(index, dataset_2, indices_2) - - queries = generate_data((n_queries, n_cols), dtype) - out_idx = np.zeros((n_queries, k), dtype=np.uint32) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - - queries_device = device_ndarray(queries) - out_idx_device = device_ndarray(out_idx) if inplace else None - out_dist_device = device_ndarray(out_dist) if inplace else None - - search_params = cagra.SearchParams(**search_params) - - ret_output = cagra.search( - search_params, - index, - queries_device, - k, - neighbors=out_idx_device, - distances=out_dist_device, - ) - - if not inplace: - out_dist_device, out_idx_device = ret_output - - if not compare: - return - - out_idx = out_idx_device.copy_to_host() - out_dist = out_dist_device.copy_to_host() - - # Calculate reference values with sklearn - skl_metric = { - "sqeuclidean": "sqeuclidean", - "inner_product": "cosine", - "euclidean": "euclidean", - }[metric] - nn_skl = NearestNeighbors( - n_neighbors=k, algorithm="brute", metric=skl_metric - ) - nn_skl.fit(dataset) - skl_idx = nn_skl.kneighbors(queries, return_distance=False) - - recall = calc_recall(out_idx, skl_idx) - assert recall > 0.7 - - -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -@pytest.mark.parametrize("array_type", ["device", "host"]) -@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) -def test_cagra_dataset_dtype_host_device( - dtype, array_type, inplace, build_algo -): - # Note that inner_product tests use normalized input which we cannot - # represent in int8, therefore we test only sqeuclidean metric here. - run_cagra_build_search_test( - dtype=dtype, - inplace=inplace, - array_type=array_type, - build_algo=build_algo, - ) - - -@pytest.mark.parametrize( - "params", - [ - { - "intermediate_graph_degree": 64, - "graph_degree": 32, - "add_data_on_build": True, - "k": 1, - "metric": "sqeuclidean", - "build_algo": "ivf_pq", - }, - { - "intermediate_graph_degree": 32, - "graph_degree": 16, - "add_data_on_build": False, - "k": 5, - "metric": "sqeuclidean", - "build_algo": "ivf_pq", - }, - { - "intermediate_graph_degree": 128, - "graph_degree": 32, - "add_data_on_build": True, - "k": 10, - "metric": "sqeuclidean", - "build_algo": "nn_descent", - }, - ], -) -def test_cagra_index_params(params): - # Note that inner_product tests use normalized input which we cannot - # represent in int8, therefore we test only sqeuclidean metric here. - run_cagra_build_search_test( - k=params["k"], - metric=params["metric"], - graph_degree=params["graph_degree"], - intermediate_graph_degree=params["intermediate_graph_degree"], - compare=False, - build_algo=params["build_algo"], - ) - - -@pytest.mark.parametrize( - "params", - [ - { - "max_queries": 100, - "itopk_size": 32, - "max_iterations": 100, - "algo": "single_cta", - "team_size": 0, - "search_width": 1, - "min_iterations": 1, - "thread_block_size": 64, - "hashmap_mode": "hash", - "hashmap_min_bitlen": 0.2, - "hashmap_max_fill_rate": 0.5, - "num_random_samplings": 1, - }, - { - "max_queries": 10, - "itopk_size": 128, - "max_iterations": 0, - "algo": "multi_cta", - "team_size": 8, - "search_width": 2, - "min_iterations": 10, - "thread_block_size": 0, - "hashmap_mode": "auto", - "hashmap_min_bitlen": 0.9, - "hashmap_max_fill_rate": 0.5, - "num_random_samplings": 10, - }, - { - "max_queries": 0, - "itopk_size": 64, - "max_iterations": 0, - "algo": "multi_kernel", - "team_size": 16, - "search_width": 1, - "min_iterations": 0, - "thread_block_size": 0, - "hashmap_mode": "auto", - "hashmap_min_bitlen": 0, - "hashmap_max_fill_rate": 0.5, - "num_random_samplings": 1, - }, - { - "max_queries": 0, - "itopk_size": 64, - "max_iterations": 0, - "algo": "auto", - "team_size": 32, - "search_width": 4, - "min_iterations": 0, - "thread_block_size": 0, - "hashmap_mode": "auto", - "hashmap_min_bitlen": 0, - "hashmap_max_fill_rate": 0.5, - "num_random_samplings": 1, - }, - ], -) -def test_cagra_search_params(params): - # Note that inner_product tests use normalized input which we cannot - # represent in int8, therefore we test only sqeuclidean metric here. - run_cagra_build_search_test(search_params=params) - - -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte]) -@pytest.mark.parametrize("include_dataset", [True, False]) -def test_save_load(dtype, include_dataset): - n_rows = 10000 - n_cols = 50 - n_queries = 1000 - - dataset = generate_data((n_rows, n_cols), dtype) - dataset_device = device_ndarray(dataset) - - build_params = cagra.IndexParams() - index = cagra.build(build_params, dataset_device) - - assert index.trained - filename = "my_index.bin" - cagra.save(filename, index, include_dataset=include_dataset) - loaded_index = cagra.load(filename) - - # if we didn't save the dataset with the index, we need to update the - # index with an already loaded copy - if not include_dataset: - loaded_index.update_dataset(dataset) - - queries = generate_data((n_queries, n_cols), dtype) - - queries_device = device_ndarray(queries) - search_params = cagra.SearchParams() - k = 10 - - distance_dev, neighbors_dev = cagra.search( - search_params, index, queries_device, k - ) - - neighbors = neighbors_dev.copy_to_host() - dist = distance_dev.copy_to_host() - del index - - distance_dev, neighbors_dev = cagra.search( - search_params, loaded_index, queries_device, k - ) - - neighbors2 = neighbors_dev.copy_to_host() - dist2 = distance_dev.copy_to_host() - - assert np.all(neighbors == neighbors2) - assert np.allclose(dist, dist2, rtol=1e-6) diff --git a/python/pylibraft/pylibraft/test/test_distance.py b/python/pylibraft/pylibraft/test/test_distance.py deleted file mode 100644 index 34ed86db01..0000000000 --- a/python/pylibraft/pylibraft/test/test_distance.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from scipy.spatial.distance import cdist - -from pylibraft.common import DeviceResources, Stream, device_ndarray -from pylibraft.distance import pairwise_distance - - -@pytest.mark.parametrize("n_rows", [50, 100]) -@pytest.mark.parametrize("n_cols", [10, 50]) -@pytest.mark.parametrize( - "metric", - [ - "euclidean", - "cityblock", - "chebyshev", - "canberra", - "correlation", - "hamming", - "jensenshannon", - "russellrao", - "cosine", - "sqeuclidean", - "inner_product", - ], -) -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("order", ["F", "C"]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_distance(n_rows, n_cols, inplace, metric, order, dtype): - input1 = np.random.random_sample((n_rows, n_cols)) - input1 = np.asarray(input1, order=order).astype(dtype) - - # RussellRao expects boolean arrays - if metric == "russellrao": - input1[input1 < 0.5] = 0 - input1[input1 >= 0.5] = 1 - - # JensenShannon expects probability arrays - elif metric == "jensenshannon": - norm = np.sum(input1, axis=1) - input1 = (input1.T / norm).T - - output = np.zeros((n_rows, n_rows), dtype=dtype) - - if metric == "inner_product": - expected = np.matmul(input1, input1.T) - else: - expected = cdist(input1, input1, metric) - - input1_device = device_ndarray(input1) - output_device = device_ndarray(output) if inplace else None - - s2 = Stream() - handle = DeviceResources(stream=s2) - ret_output = pairwise_distance( - input1_device, input1_device, output_device, metric, handle=handle - ) - handle.sync() - - output_device = ret_output if not inplace else output_device - - actual = output_device.copy_to_host() - - assert np.allclose(expected, actual, atol=1e-3, rtol=1e-3) diff --git a/python/pylibraft/pylibraft/test/test_doctests.py b/python/pylibraft/pylibraft/test/test_doctests.py index c75f565236..f3853d3c66 100644 --- a/python/pylibraft/pylibraft/test/test_doctests.py +++ b/python/pylibraft/pylibraft/test/test_doctests.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -92,16 +92,7 @@ def _find_doctests_in_obj(obj, finder=None, criteria=None): # since the root pylibraft module doesn't import submodules (or define an # __all__) we are explicitly adding all the submodules we want to run # doctests for here -DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.cluster)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.common)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.distance)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.matrix.select_k)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.brute_force)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.cagra)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_flat)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.ivf_pq)) -DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.neighbors.refine)) +DOC_STRINGS = list(_find_doctests_in_obj(pylibraft.common)) DOC_STRINGS.extend(_find_doctests_in_obj(pylibraft.random)) diff --git a/python/pylibraft/pylibraft/test/test_eps_neighborhood.py b/python/pylibraft/pylibraft/test/test_eps_neighborhood.py deleted file mode 100644 index f2643de904..0000000000 --- a/python/pylibraft/pylibraft/test/test_eps_neighborhood.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from scipy.sparse import csr_array - -from pylibraft.common import DeviceResources, Stream -from pylibraft.neighbors.brute_force import eps_neighbors as eps_neighbors_bf -from pylibraft.neighbors.rbc import ( - build_rbc_index, - eps_neighbors as eps_neighbors_rbc, -) - - -def test_bf_eps_neighbors_check_col_major_inputs(): - # make sure that we get an exception if passed col-major inputs, - # instead of returning incorrect results - cp = pytest.importorskip("cupy") - n_index_rows, n_query_rows, n_cols = 128, 16, 32 - eps = 0.02 - index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") - queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") - - with pytest.raises(ValueError): - eps_neighbors_bf(cp.asarray(index, order="F"), queries, eps) - - with pytest.raises(ValueError): - eps_neighbors_bf(index, cp.asarray(queries, order="F"), eps) - - # shouldn't throw an exception with c-contiguous inputs - eps_neighbors_bf(index, queries, eps) - - -def test_rbc_eps_neighbors_check_col_major_inputs(): - # make sure that we get an exception if passed col-major inputs, - # instead of returning incorrect results - cp = pytest.importorskip("cupy") - n_index_rows, n_query_rows, n_cols = 128, 16, 32 - eps = 0.02 - index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") - queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") - - with pytest.raises(ValueError): - build_rbc_index(cp.asarray(index, order="F")) - - rbc_index = build_rbc_index(index) - - with pytest.raises(ValueError): - eps_neighbors_rbc(rbc_index, cp.asarray(queries, order="F"), eps) - - eps_neighbors_rbc(rbc_index, queries, eps) - - -@pytest.mark.parametrize("n_index_rows", [32, 100, 1000]) -@pytest.mark.parametrize("n_query_rows", [32, 100, 1000]) -@pytest.mark.parametrize("n_cols", [2, 3, 40, 100]) -def test_eps_neighbors(n_index_rows, n_query_rows, n_cols): - s2 = Stream() - handle = DeviceResources(stream=s2) - - cp = pytest.importorskip("cupy") - eps = 0.02 - index = cp.random.random_sample((n_index_rows, n_cols), dtype="float32") - queries = cp.random.random_sample((n_query_rows, n_cols), dtype="float32") - - # brute force - adj_bf, vd_bf = eps_neighbors_bf(index, queries, eps, handle=handle) - adj_bf = cp.asarray(adj_bf) - vd_bf = cp.asarray(vd_bf) - - rbc_index = build_rbc_index(index, handle=handle) - adj_rbc_ia, adj_rbc_ja, vd_rbc = eps_neighbors_rbc( - rbc_index, queries, eps, handle=handle - ) - adj_rbc_ia = cp.asarray(adj_rbc_ia) - adj_rbc_ja = cp.asarray(adj_rbc_ja) - vd_rbc = cp.asarray(vd_rbc) - - np.testing.assert_array_equal(vd_bf.get(), vd_rbc.get()) - - adj_rbc = csr_array( - ( - np.ones(adj_rbc_ia.get()[n_query_rows]), - adj_rbc_ja.get(), - adj_rbc_ia.get(), - ), - shape=(n_query_rows, n_index_rows), - ).toarray() - np.testing.assert_array_equal(adj_bf.get(), adj_rbc) diff --git a/python/pylibraft/pylibraft/test/test_fused_distance_argmin.py b/python/pylibraft/pylibraft/test/test_fused_distance_argmin.py deleted file mode 100755 index 6736128242..0000000000 --- a/python/pylibraft/pylibraft/test/test_fused_distance_argmin.py +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from scipy.spatial.distance import cdist - -from pylibraft.common import DeviceResources, device_ndarray -from pylibraft.distance import fused_distance_nn_argmin - - -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("n_rows", [10, 100]) -@pytest.mark.parametrize("n_clusters", [50, 100]) -@pytest.mark.parametrize("n_cols", [128, 31]) -@pytest.mark.parametrize("dtype", [np.float32]) -@pytest.mark.parametrize( - "metric", - [ - "euclidean", - "cosine", - "sqeuclidean", - ], -) -def test_fused_distance_nn_minarg( - n_rows, n_cols, n_clusters, dtype, inplace, metric -): - input1 = np.random.random_sample((n_rows, n_cols)) - input1 = np.asarray(input1, order="C").astype(dtype) - - input2 = np.random.random_sample((n_clusters, n_cols)) - input2 = np.asarray(input2, order="C").astype(dtype) - - output = np.zeros((n_rows), dtype="int32") - expected = cdist(input1, input2, metric) - - expected = expected.argmin(axis=1) - - input1_device = device_ndarray(input1) - input2_device = device_ndarray(input2) - output_device = device_ndarray(output) if inplace else None - - is_sqrt = True if metric == "sqeuclidean" else False - handle = DeviceResources() - ret_output = fused_distance_nn_argmin( - input1_device, - input2_device, - output_device, - is_sqrt, - metric, - handle=handle, - ) - handle.sync() - output_device = ret_output if not inplace else output_device - actual = output_device.copy_to_host() - - assert np.allclose(expected, actual, rtol=1e-4) diff --git a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py b/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py deleted file mode 100644 index 086bb26f17..0000000000 --- a/python/pylibraft/pylibraft/test/test_fused_l2_argmin.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from scipy.spatial.distance import cdist - -from pylibraft.common import DeviceResources, device_ndarray -from pylibraft.distance import fused_l2_nn_argmin - - -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("n_rows", [10, 100]) -@pytest.mark.parametrize("n_clusters", [5, 10]) -@pytest.mark.parametrize("n_cols", [3, 5]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_fused_l2_nn_minarg(n_rows, n_cols, n_clusters, dtype, inplace): - input1 = np.random.random_sample((n_rows, n_cols)) - input1 = np.asarray(input1, order="C").astype(dtype) - - input2 = np.random.random_sample((n_clusters, n_cols)) - input2 = np.asarray(input2, order="C").astype(dtype) - - output = np.zeros((n_rows), dtype="int32") - expected = cdist(input1, input2, metric="euclidean") - - expected = expected.argmin(axis=1) - - input1_device = device_ndarray(input1) - input2_device = device_ndarray(input2) - output_device = device_ndarray(output) if inplace else None - - handle = DeviceResources() - ret_output = fused_l2_nn_argmin( - input1_device, input2_device, output_device, True, handle=handle - ) - handle.sync() - output_device = ret_output if not inplace else output_device - actual = output_device.copy_to_host() - - assert np.allclose(expected, actual, rtol=1e-4) diff --git a/python/pylibraft/pylibraft/test/test_ivf_flat.py b/python/pylibraft/pylibraft/test/test_ivf_flat.py deleted file mode 100644 index 2e38dab7bc..0000000000 --- a/python/pylibraft/pylibraft/test/test_ivf_flat.py +++ /dev/null @@ -1,518 +0,0 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# h ttp://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from sklearn.metrics import pairwise_distances -from sklearn.neighbors import NearestNeighbors -from sklearn.preprocessing import normalize - -from pylibraft.common import device_ndarray -from pylibraft.neighbors import ivf_flat - - -def generate_data(shape, dtype): - if dtype == np.byte: - x = np.random.randint(-127, 128, size=shape, dtype=np.byte) - elif dtype == np.ubyte: - x = np.random.randint(0, 255, size=shape, dtype=np.ubyte) - else: - x = np.random.random_sample(shape).astype(dtype) - - return x - - -def calc_recall(ann_idx, true_nn_idx): - assert ann_idx.shape == true_nn_idx.shape - n = 0 - for i in range(ann_idx.shape[0]): - n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size - recall = n / ann_idx.size - return recall - - -def check_distances(dataset, queries, metric, out_idx, out_dist, eps=None): - """ - Calculate the real distance between queries and dataset[out_idx], - and compare it to out_dist. - """ - if eps is None: - # Quantization leads to errors in the distance calculation. - # The aim of this test is not to test precision, but to catch obvious - # errors. - eps = 0.1 - - dist = np.empty(out_dist.shape, out_dist.dtype) - for i in range(queries.shape[0]): - X = queries[np.newaxis, i, :] - Y = dataset[out_idx[i, :], :] - if metric == "sqeuclidean": - dist[i, :] = pairwise_distances(X, Y, "sqeuclidean") - elif metric == "euclidean": - dist[i, :] = pairwise_distances(X, Y, "euclidean") - elif metric == "inner_product": - dist[i, :] = np.matmul(X, Y.T) - else: - raise ValueError("Invalid metric") - - dist_eps = abs(dist) - dist_eps[dist < 1e-3] = 1e-3 - diff = abs(out_dist - dist) / dist_eps - - assert np.mean(diff) < eps - - -def run_ivf_flat_build_search_test( - n_rows, - n_cols, - n_queries, - k, - n_lists, - metric, - dtype, - add_data_on_build=True, - n_probes=100, - kmeans_trainset_fraction=1, - kmeans_n_iters=20, - compare=True, - inplace=True, - array_type="device", -): - dataset = generate_data((n_rows, n_cols), dtype) - if metric == "inner_product": - dataset = normalize(dataset, norm="l2", axis=1) - dataset_device = device_ndarray(dataset) - - build_params = ivf_flat.IndexParams( - n_lists=n_lists, - metric=metric, - kmeans_n_iters=kmeans_n_iters, - kmeans_trainset_fraction=kmeans_trainset_fraction, - add_data_on_build=add_data_on_build, - ) - - if array_type == "device": - index = ivf_flat.build(build_params, dataset_device) - else: - index = ivf_flat.build(build_params, dataset) - - assert index.trained - - assert index.metric == build_params.metric - assert index.n_lists == build_params.n_lists - - if not add_data_on_build: - dataset_1 = dataset[: n_rows // 2, :] - dataset_2 = dataset[n_rows // 2 :, :] - indices_1 = np.arange(n_rows // 2, dtype=np.int64) - indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.int64) - if array_type == "device": - dataset_1_device = device_ndarray(dataset_1) - dataset_2_device = device_ndarray(dataset_2) - indices_1_device = device_ndarray(indices_1) - indices_2_device = device_ndarray(indices_2) - index = ivf_flat.extend(index, dataset_1_device, indices_1_device) - index = ivf_flat.extend(index, dataset_2_device, indices_2_device) - else: - index = ivf_flat.extend(index, dataset_1, indices_1) - index = ivf_flat.extend(index, dataset_2, indices_2) - - assert index.size >= n_rows - - queries = generate_data((n_queries, n_cols), dtype) - out_idx = np.zeros((n_queries, k), dtype=np.int64) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - - queries_device = device_ndarray(queries) - out_idx_device = device_ndarray(out_idx) if inplace else None - out_dist_device = device_ndarray(out_dist) if inplace else None - - search_params = ivf_flat.SearchParams(n_probes=n_probes) - - ret_output = ivf_flat.search( - search_params, - index, - queries_device, - k, - neighbors=out_idx_device, - distances=out_dist_device, - ) - - if not inplace: - out_dist_device, out_idx_device = ret_output - - if not compare: - return - - out_idx = out_idx_device.copy_to_host() - out_dist = out_dist_device.copy_to_host() - - # Calculate reference values with sklearn - skl_metric = { - "sqeuclidean": "sqeuclidean", - "inner_product": "cosine", - "euclidean": "euclidean", - }[metric] - nn_skl = NearestNeighbors( - n_neighbors=k, algorithm="brute", metric=skl_metric - ) - nn_skl.fit(dataset) - skl_idx = nn_skl.kneighbors(queries, return_distance=False) - - recall = calc_recall(out_idx, skl_idx) - assert recall > 0.7 - - check_distances(dataset, queries, metric, out_idx, out_dist) - - -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("n_rows", [10000]) -@pytest.mark.parametrize("n_cols", [10]) -@pytest.mark.parametrize("n_queries", [100]) -@pytest.mark.parametrize("n_lists", [100]) -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -@pytest.mark.parametrize("array_type", ["device"]) -def test_ivf_pq_dtypes( - n_rows, n_cols, n_queries, n_lists, dtype, inplace, array_type -): - # Note that inner_product tests use normalized input which we cannot - # represent in int8, therefore we test only sqeuclidean metric here. - run_ivf_flat_build_search_test( - n_rows=n_rows, - n_cols=n_cols, - n_queries=n_queries, - k=10, - n_lists=n_lists, - metric="sqeuclidean", - dtype=dtype, - inplace=inplace, - array_type=array_type, - ) - - -@pytest.mark.parametrize( - "params", - [ - pytest.param( - { - "n_rows": 0, - "n_cols": 10, - "n_queries": 10, - "k": 1, - "n_lists": 10, - }, - marks=pytest.mark.xfail(reason="empty dataset"), - ), - {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 1}, - {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10}, - # {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k": 137, - # "n_lists": 53}, - ], -) -def test_ivf_flat_n(params): - # We do not test recall, just confirm that we can handle edge cases for - # certain parameters - run_ivf_flat_build_search_test( - n_rows=params["n_rows"], - n_cols=params["n_cols"], - n_queries=params["n_queries"], - k=params["k"], - n_lists=params["n_lists"], - metric="sqeuclidean", - dtype=np.float32, - compare=False, - ) - - -@pytest.mark.parametrize( - "metric", ["sqeuclidean", "inner_product", "euclidean"] -) -@pytest.mark.parametrize("dtype", [np.float32]) -def test_ivf_flat_build_params(metric, dtype): - run_ivf_flat_build_search_test( - n_rows=10000, - n_cols=10, - n_queries=1000, - k=10, - n_lists=100, - metric=metric, - dtype=dtype, - add_data_on_build=True, - n_probes=100, - ) - - -@pytest.mark.parametrize( - "params", - [ - { - "n_lists": 100, - "trainset_fraction": 0.9, - "n_iters": 30, - }, - ], -) -def test_ivf_flat_params(params): - run_ivf_flat_build_search_test( - n_rows=10000, - n_cols=16, - n_queries=1000, - k=10, - n_lists=params["n_lists"], - metric="sqeuclidean", - dtype=np.float32, - kmeans_trainset_fraction=params.get("trainset_fraction", 1.0), - kmeans_n_iters=params.get("n_iters", 20), - ) - - -@pytest.mark.parametrize( - "params", - [ - { - "k": 10, - "n_probes": 100, - }, - { - "k": 10, - "n_probes": 99, - }, - { - "k": 10, - "n_probes": 100, - }, - { - "k": 129, - "n_probes": 100, - }, - { - "k": 257, - "n_probes": 100, - }, - { - "k": 4096, - "n_probes": 100, - }, - ], -) -def test_ivf_pq_search_params(params): - run_ivf_flat_build_search_test( - n_rows=10000, - n_cols=16, - n_queries=1000, - k=params["k"], - n_lists=100, - n_probes=params["n_probes"], - metric="sqeuclidean", - dtype=np.float32, - ) - - -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -@pytest.mark.parametrize("array_type", ["device"]) -def test_extend(dtype, array_type): - run_ivf_flat_build_search_test( - n_rows=10000, - n_cols=10, - n_queries=100, - k=10, - n_lists=100, - metric="sqeuclidean", - dtype=dtype, - add_data_on_build=False, - array_type=array_type, - ) - - -def test_build_assertions(): - with pytest.raises(TypeError): - run_ivf_flat_build_search_test( - n_rows=1000, - n_cols=10, - n_queries=100, - k=10, - n_lists=100, - metric="sqeuclidean", - dtype=np.float64, - ) - - n_rows = 1000 - n_cols = 100 - n_queries = 212 - k = 10 - dataset = generate_data((n_rows, n_cols), np.float32) - dataset_device = device_ndarray(dataset) - - index_params = ivf_flat.IndexParams( - n_lists=50, - metric="sqeuclidean", - kmeans_n_iters=20, - kmeans_trainset_fraction=1, - add_data_on_build=False, - ) - - index = ivf_flat.Index() - - queries = generate_data((n_queries, n_cols), np.float32) - out_idx = np.zeros((n_queries, k), dtype=np.int64) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - - queries_device = device_ndarray(queries) - out_idx_device = device_ndarray(out_idx) - out_dist_device = device_ndarray(out_dist) - - search_params = ivf_flat.SearchParams(n_probes=50) - - with pytest.raises(ValueError): - # Index must be built before search - ivf_flat.search( - search_params, - index, - queries_device, - k, - out_idx_device, - out_dist_device, - ) - - index = ivf_flat.build(index_params, dataset_device) - assert index.trained - - indices = np.arange(n_rows + 1, dtype=np.int64) - indices_device = device_ndarray(indices) - - with pytest.raises(ValueError): - # Dataset dimension mismatch - ivf_flat.extend(index, queries_device, indices_device) - - with pytest.raises(ValueError): - # indices dimension mismatch - ivf_flat.extend(index, dataset_device, indices_device) - - -@pytest.mark.parametrize( - "params", - [ - {"q_dt": np.float64}, - {"q_order": "F"}, - {"q_cols": 101}, - {"idx_dt": np.uint32}, - {"idx_order": "F"}, - {"idx_rows": 42}, - {"idx_cols": 137}, - {"dist_dt": np.float64}, - {"dist_order": "F"}, - {"dist_rows": 42}, - {"dist_cols": 137}, - ], -) -def test_search_inputs(params): - """Test with invalid input dtype, order, or dimension.""" - n_rows = 1000 - n_cols = 100 - n_queries = 256 - k = 10 - dtype = np.float32 - - q_dt = params.get("q_dt", np.float32) - q_order = params.get("q_order", "C") - queries = generate_data( - (n_queries, params.get("q_cols", n_cols)), q_dt - ).astype(q_dt, order=q_order) - queries_device = device_ndarray(queries) - - idx_dt = params.get("idx_dt", np.int64) - idx_order = params.get("idx_order", "C") - out_idx = np.zeros( - (params.get("idx_rows", n_queries), params.get("idx_cols", k)), - dtype=idx_dt, - order=idx_order, - ) - out_idx_device = device_ndarray(out_idx) - - dist_dt = params.get("dist_dt", np.float32) - dist_order = params.get("dist_order", "C") - out_dist = np.zeros( - (params.get("dist_rows", n_queries), params.get("dist_cols", k)), - dtype=dist_dt, - order=dist_order, - ) - out_dist_device = device_ndarray(out_dist) - - index_params = ivf_flat.IndexParams( - n_lists=50, metric="sqeuclidean", add_data_on_build=True - ) - - dataset = generate_data((n_rows, n_cols), dtype) - dataset_device = device_ndarray(dataset) - index = ivf_flat.build(index_params, dataset_device) - assert index.trained - - with pytest.raises(Exception): - search_params = ivf_flat.SearchParams(n_probes=50) - ivf_flat.search( - search_params, - index, - queries_device, - k, - out_idx_device, - out_dist_device, - ) - - -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte]) -def test_save_load(dtype): - n_rows = 10000 - n_cols = 50 - n_queries = 1000 - - dataset = generate_data((n_rows, n_cols), dtype) - dataset_device = device_ndarray(dataset) - - build_params = ivf_flat.IndexParams(n_lists=100, metric="sqeuclidean") - index = ivf_flat.build(build_params, dataset_device) - - assert index.trained - filename = "my_index.bin" - ivf_flat.save(filename, index) - loaded_index = ivf_flat.load(filename) - - assert index.metric == loaded_index.metric - assert index.n_lists == loaded_index.n_lists - assert index.dim == loaded_index.dim - assert index.adaptive_centers == loaded_index.adaptive_centers - - queries = generate_data((n_queries, n_cols), dtype) - - queries_device = device_ndarray(queries) - search_params = ivf_flat.SearchParams(n_probes=100) - k = 10 - - distance_dev, neighbors_dev = ivf_flat.search( - search_params, index, queries_device, k - ) - - neighbors = neighbors_dev.copy_to_host() - dist = distance_dev.copy_to_host() - del index - - distance_dev, neighbors_dev = ivf_flat.search( - search_params, loaded_index, queries_device, k - ) - - neighbors2 = neighbors_dev.copy_to_host() - dist2 = distance_dev.copy_to_host() - - assert np.all(neighbors == neighbors2) - assert np.allclose(dist, dist2, rtol=1e-6) diff --git a/python/pylibraft/pylibraft/test/test_ivf_pq.py b/python/pylibraft/pylibraft/test/test_ivf_pq.py deleted file mode 100644 index aa58e2a8fc..0000000000 --- a/python/pylibraft/pylibraft/test/test_ivf_pq.py +++ /dev/null @@ -1,550 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# h ttp://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from sklearn.metrics import pairwise_distances -from sklearn.neighbors import NearestNeighbors -from sklearn.preprocessing import normalize - -from pylibraft.common import device_ndarray -from pylibraft.neighbors import ivf_pq - - -def generate_data(shape, dtype): - if dtype == np.byte: - x = np.random.randint(-127, 128, size=shape, dtype=np.byte) - elif dtype == np.ubyte: - x = np.random.randint(0, 255, size=shape, dtype=np.ubyte) - else: - x = np.random.random_sample(shape).astype(dtype) - - return x - - -def calc_recall(ann_idx, true_nn_idx): - assert ann_idx.shape == true_nn_idx.shape - n = 0 - for i in range(ann_idx.shape[0]): - n += np.intersect1d(ann_idx[i, :], true_nn_idx[i, :]).size - recall = n / ann_idx.size - return recall - - -def check_distances(dataset, queries, metric, out_idx, out_dist, eps=None): - """ - Calculate the real distance between queries and dataset[out_idx], - and compare it to out_dist. - """ - if eps is None: - # Quantization leads to errors in the distance calculation. - # The aim of this test is not to test precision, but to catch obvious - # errors. - eps = 0.1 - - dist = np.empty(out_dist.shape, out_dist.dtype) - for i in range(queries.shape[0]): - X = queries[np.newaxis, i, :] - Y = dataset[out_idx[i, :], :] - if metric == "sqeuclidean": - dist[i, :] = pairwise_distances(X, Y, "sqeuclidean") - elif metric == "euclidean": - dist[i, :] = pairwise_distances(X, Y, "euclidean") - elif metric == "inner_product": - dist[i, :] = np.matmul(X, Y.T) - else: - raise ValueError("Invalid metric") - - dist_eps = abs(dist) - dist_eps[dist < 1e-3] = 1e-3 - diff = abs(out_dist - dist) / dist_eps - - assert np.mean(diff) < eps - - -def run_ivf_pq_build_search_test( - n_rows, - n_cols, - n_queries, - k, - n_lists, - metric, - dtype, - pq_bits=8, - pq_dim=0, - codebook_kind="subspace", - add_data_on_build="True", - n_probes=100, - lut_dtype=np.float32, - internal_distance_dtype=np.float32, - force_random_rotation=False, - kmeans_trainset_fraction=1, - kmeans_n_iters=20, - compare=True, - inplace=True, - array_type="device", -): - dataset = generate_data((n_rows, n_cols), dtype) - if metric == "inner_product": - dataset = normalize(dataset, norm="l2", axis=1) - dataset_device = device_ndarray(dataset) - - build_params = ivf_pq.IndexParams( - n_lists=n_lists, - metric=metric, - kmeans_n_iters=kmeans_n_iters, - kmeans_trainset_fraction=kmeans_trainset_fraction, - pq_bits=pq_bits, - pq_dim=pq_dim, - codebook_kind=codebook_kind, - force_random_rotation=force_random_rotation, - add_data_on_build=add_data_on_build, - ) - - if array_type == "device": - index = ivf_pq.build(build_params, dataset_device) - else: - index = ivf_pq.build(build_params, dataset) - - assert index.trained - if pq_dim != 0: - assert index.pq_dim == build_params.pq_dim - assert index.pq_bits == build_params.pq_bits - assert index.metric == build_params.metric - assert index.n_lists == build_params.n_lists - - if not add_data_on_build: - dataset_1 = dataset[: n_rows // 2, :] - dataset_2 = dataset[n_rows // 2 :, :] - indices_1 = np.arange(n_rows // 2, dtype=np.int64) - indices_2 = np.arange(n_rows // 2, n_rows, dtype=np.int64) - if array_type == "device": - dataset_1_device = device_ndarray(dataset_1) - dataset_2_device = device_ndarray(dataset_2) - indices_1_device = device_ndarray(indices_1) - indices_2_device = device_ndarray(indices_2) - index = ivf_pq.extend(index, dataset_1_device, indices_1_device) - index = ivf_pq.extend(index, dataset_2_device, indices_2_device) - else: - index = ivf_pq.extend(index, dataset_1, indices_1) - index = ivf_pq.extend(index, dataset_2, indices_2) - - assert index.size >= n_rows - - queries = generate_data((n_queries, n_cols), dtype) - out_idx = np.zeros((n_queries, k), dtype=np.int64) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - - queries_device = device_ndarray(queries) - out_idx_device = device_ndarray(out_idx) if inplace else None - out_dist_device = device_ndarray(out_dist) if inplace else None - - search_params = ivf_pq.SearchParams( - n_probes=n_probes, - lut_dtype=lut_dtype, - internal_distance_dtype=internal_distance_dtype, - ) - - ret_output = ivf_pq.search( - search_params, - index, - queries_device, - k, - neighbors=out_idx_device, - distances=out_dist_device, - ) - - if not inplace: - out_dist_device, out_idx_device = ret_output - - if not compare: - return - - out_idx = out_idx_device.copy_to_host() - out_dist = out_dist_device.copy_to_host() - - # Calculate reference values with sklearn - skl_metric = { - "sqeuclidean": "sqeuclidean", - "inner_product": "cosine", - "euclidean": "euclidean", - }[metric] - nn_skl = NearestNeighbors( - n_neighbors=k, algorithm="brute", metric=skl_metric - ) - nn_skl.fit(dataset) - skl_idx = nn_skl.kneighbors(queries, return_distance=False) - - recall = calc_recall(out_idx, skl_idx) - assert recall > 0.7 - - check_distances(dataset, queries, metric, out_idx, out_dist) - - -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("n_rows", [10000]) -@pytest.mark.parametrize("n_cols", [10]) -@pytest.mark.parametrize("n_queries", [100]) -@pytest.mark.parametrize("n_lists", [100]) -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -@pytest.mark.parametrize("array_type", ["host", "device"]) -def test_ivf_pq_dtypes( - n_rows, n_cols, n_queries, n_lists, dtype, inplace, array_type -): - # Note that inner_product tests use normalized input which we cannot - # represent in int8, therefore we test only sqeuclidean metric here. - run_ivf_pq_build_search_test( - n_rows=n_rows, - n_cols=n_cols, - n_queries=n_queries, - k=10, - n_lists=n_lists, - metric="sqeuclidean", - dtype=dtype, - inplace=inplace, - array_type=array_type, - ) - - -@pytest.mark.parametrize( - "params", - [ - pytest.param( - { - "n_rows": 0, - "n_cols": 10, - "n_queries": 10, - "k": 1, - "n_lists": 10, - }, - marks=pytest.mark.xfail(reason="empty dataset"), - ), - {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "n_lists": 1}, - {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "n_lists": 10}, - # {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k": 137, - # "n_lists": 53}, - ], -) -def test_ivf_pq_n(params): - # We do not test recall, just confirm that we can handle edge cases for - # certain parameters - run_ivf_pq_build_search_test( - n_rows=params["n_rows"], - n_cols=params["n_cols"], - n_queries=params["n_queries"], - k=params["k"], - n_lists=params["n_lists"], - metric="sqeuclidean", - dtype=np.float32, - compare=False, - ) - - -@pytest.mark.parametrize( - "metric", ["sqeuclidean", "inner_product", "euclidean"] -) -@pytest.mark.parametrize("dtype", [np.float32]) -@pytest.mark.parametrize("codebook_kind", ["subspace", "cluster"]) -@pytest.mark.parametrize("rotation", [True, False]) -def test_ivf_pq_build_params(metric, dtype, codebook_kind, rotation): - run_ivf_pq_build_search_test( - n_rows=10000, - n_cols=10, - n_queries=1000, - k=10, - n_lists=100, - metric=metric, - dtype=dtype, - pq_bits=8, - pq_dim=0, - codebook_kind=codebook_kind, - add_data_on_build=True, - n_probes=100, - force_random_rotation=rotation, - ) - - -@pytest.mark.parametrize( - "params", - [ - {"pq_dims": 10, "pq_bits": 8, "n_lists": 100}, - {"pq_dims": 16, "pq_bits": 7, "n_lists": 100}, - {"pq_dims": 0, "pq_bits": 8, "n_lists": 90}, - { - "pq_dims": 0, - "pq_bits": 8, - "n_lists": 100, - "trainset_fraction": 0.9, - "n_iters": 30, - }, - ], -) -def test_ivf_pq_params(params): - run_ivf_pq_build_search_test( - n_rows=10000, - n_cols=16, - n_queries=1000, - k=10, - n_lists=params["n_lists"], - metric="sqeuclidean", - dtype=np.float32, - pq_bits=params["pq_bits"], - pq_dim=params["pq_dims"], - kmeans_trainset_fraction=params.get("trainset_fraction", 1.0), - kmeans_n_iters=params.get("n_iters", 20), - ) - - -@pytest.mark.parametrize( - "params", - [ - { - "k": 10, - "n_probes": 100, - "lut": np.float16, - "idd": np.float32, - }, - { - "k": 10, - "n_probes": 99, - "lut": np.uint8, - "idd": np.float32, - }, - { - "k": 10, - "n_probes": 100, - "lut": np.float16, - "idd": np.float16, - }, - { - "k": 129, - "n_probes": 100, - "lut": np.float32, - "idd": np.float32, - }, - ], -) -def test_ivf_pq_search_params(params): - run_ivf_pq_build_search_test( - n_rows=10000, - n_cols=16, - n_queries=1000, - k=params["k"], - n_lists=100, - n_probes=params["n_probes"], - metric="sqeuclidean", - dtype=np.float32, - lut_dtype=params["lut"], - internal_distance_dtype=params["idd"], - ) - - -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -@pytest.mark.parametrize("array_type", ["host", "device"]) -def test_extend(dtype, array_type): - run_ivf_pq_build_search_test( - n_rows=10000, - n_cols=10, - n_queries=100, - k=10, - n_lists=100, - metric="sqeuclidean", - dtype=dtype, - add_data_on_build=False, - array_type=array_type, - ) - - -def test_build_assertions(): - with pytest.raises(TypeError): - run_ivf_pq_build_search_test( - n_rows=1000, - n_cols=10, - n_queries=100, - k=10, - n_lists=100, - metric="sqeuclidean", - dtype=np.float64, - ) - - n_rows = 1000 - n_cols = 100 - n_queries = 212 - k = 10 - dataset = generate_data((n_rows, n_cols), np.float32) - dataset_device = device_ndarray(dataset) - - index_params = ivf_pq.IndexParams( - n_lists=50, - metric="sqeuclidean", - kmeans_n_iters=20, - kmeans_trainset_fraction=1, - add_data_on_build=False, - ) - - index = ivf_pq.Index() - - queries = generate_data((n_queries, n_cols), np.float32) - out_idx = np.zeros((n_queries, k), dtype=np.int64) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - - queries_device = device_ndarray(queries) - out_idx_device = device_ndarray(out_idx) - out_dist_device = device_ndarray(out_dist) - - search_params = ivf_pq.SearchParams(n_probes=50) - - with pytest.raises(ValueError): - # Index must be built before search - ivf_pq.search( - search_params, - index, - queries_device, - k, - out_idx_device, - out_dist_device, - ) - - index = ivf_pq.build(index_params, dataset_device) - assert index.trained - - indices = np.arange(n_rows + 1, dtype=np.int64) - indices_device = device_ndarray(indices) - - with pytest.raises(ValueError): - # Dataset dimension mismatch - ivf_pq.extend(index, queries_device, indices_device) - - with pytest.raises(ValueError): - # indices dimension mismatch - ivf_pq.extend(index, dataset_device, indices_device) - - -@pytest.mark.parametrize( - "params", - [ - {"q_dt": np.float64}, - {"q_order": "F"}, - {"q_cols": 101}, - {"idx_dt": np.uint32}, - {"idx_order": "F"}, - {"idx_rows": 42}, - {"idx_cols": 137}, - {"dist_dt": np.float64}, - {"dist_order": "F"}, - {"dist_rows": 42}, - {"dist_cols": 137}, - ], -) -def test_search_inputs(params): - """Test with invalid input dtype, order, or dimension.""" - n_rows = 1000 - n_cols = 100 - n_queries = 256 - k = 10 - dtype = np.float32 - - q_dt = params.get("q_dt", np.float32) - q_order = params.get("q_order", "C") - queries = generate_data( - (n_queries, params.get("q_cols", n_cols)), q_dt - ).astype(q_dt, order=q_order) - queries_device = device_ndarray(queries) - - idx_dt = params.get("idx_dt", np.int64) - idx_order = params.get("idx_order", "C") - out_idx = np.zeros( - (params.get("idx_rows", n_queries), params.get("idx_cols", k)), - dtype=idx_dt, - order=idx_order, - ) - out_idx_device = device_ndarray(out_idx) - - dist_dt = params.get("dist_dt", np.float32) - dist_order = params.get("dist_order", "C") - out_dist = np.zeros( - (params.get("dist_rows", n_queries), params.get("dist_cols", k)), - dtype=dist_dt, - order=dist_order, - ) - out_dist_device = device_ndarray(out_dist) - - index_params = ivf_pq.IndexParams( - n_lists=50, metric="sqeuclidean", add_data_on_build=True - ) - - dataset = generate_data((n_rows, n_cols), dtype) - dataset_device = device_ndarray(dataset) - index = ivf_pq.build(index_params, dataset_device) - assert index.trained - - with pytest.raises(Exception): - search_params = ivf_pq.SearchParams(n_probes=50) - ivf_pq.search( - search_params, - index, - queries_device, - k, - out_idx_device, - out_dist_device, - ) - - -def test_save_load(): - n_rows = 10000 - n_cols = 50 - n_queries = 1000 - dtype = np.float32 - - dataset = generate_data((n_rows, n_cols), dtype) - dataset_device = device_ndarray(dataset) - - build_params = ivf_pq.IndexParams(n_lists=100, metric="sqeuclidean") - index = ivf_pq.build(build_params, dataset_device) - - assert index.trained - filename = "my_index.bin" - ivf_pq.save(filename, index) - loaded_index = ivf_pq.load(filename) - - assert index.pq_dim == loaded_index.pq_dim - assert index.pq_bits == loaded_index.pq_bits - assert index.metric == loaded_index.metric - assert index.n_lists == loaded_index.n_lists - assert index.size == loaded_index.size - - queries = generate_data((n_queries, n_cols), dtype) - - queries_device = device_ndarray(queries) - search_params = ivf_pq.SearchParams(n_probes=100) - k = 10 - - distance_dev, neighbors_dev = ivf_pq.search( - search_params, index, queries_device, k - ) - - neighbors = neighbors_dev.copy_to_host() - dist = distance_dev.copy_to_host() - del index - - distance_dev, neighbors_dev = ivf_pq.search( - search_params, loaded_index, queries_device, k - ) - - neighbors2 = neighbors_dev.copy_to_host() - dist2 = distance_dev.copy_to_host() - - assert np.all(neighbors == neighbors2) - assert np.allclose(dist, dist2, rtol=1e-6) diff --git a/python/pylibraft/pylibraft/test/test_kmeans.py b/python/pylibraft/pylibraft/test/test_kmeans.py deleted file mode 100644 index 8736c6ee7a..0000000000 --- a/python/pylibraft/pylibraft/test/test_kmeans.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest - -from pylibraft.cluster.kmeans import ( - KMeansParams, - cluster_cost, - compute_new_centroids, - fit, - init_plus_plus, -) -from pylibraft.common import DeviceResources, device_ndarray -from pylibraft.distance import pairwise_distance - - -@pytest.mark.parametrize("n_rows", [100]) -@pytest.mark.parametrize("n_cols", [5, 25]) -@pytest.mark.parametrize("n_clusters", [5, 15]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_kmeans_fit(n_rows, n_cols, n_clusters, dtype): - # generate some random input points / centroids - X_host = np.random.random_sample((n_rows, n_cols)).astype(dtype) - centroids = device_ndarray(X_host[:n_clusters]) - X = device_ndarray(X_host) - - # compute the inertia, before fitting centroids - original_inertia = cluster_cost(X, centroids) - - params = KMeansParams(n_clusters=n_clusters, seed=42) - - # fit the centroids, make sure inertia has gone down - # TODO: once we have make_blobs exposed to python - # (https://github.com/rapidsai/raft/issues/1059) - # we should use that to test out the kmeans fit, like the C++ - # tests do right now - centroids, inertia, n_iter = fit(params, X, centroids) - assert inertia < original_inertia - assert n_iter >= 1 - assert np.allclose(cluster_cost(X, centroids), inertia, rtol=1e-6) - - -@pytest.mark.parametrize("n_rows", [100]) -@pytest.mark.parametrize("n_cols", [5, 25]) -@pytest.mark.parametrize("n_clusters", [5, 15]) -@pytest.mark.parametrize("metric", ["euclidean", "sqeuclidean"]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("additional_args", [True, False]) -def test_compute_new_centroids( - n_rows, n_cols, metric, n_clusters, dtype, additional_args -): - - # A single RAFT handle can optionally be reused across - # pylibraft functions. - handle = DeviceResources() - - X = np.random.random_sample((n_rows, n_cols)).astype(dtype) - X_device = device_ndarray(X) - - centroids = X[:n_clusters] - centroids_device = device_ndarray(centroids) - - weight_per_cluster = np.zeros((n_clusters,), dtype=dtype) - weight_per_cluster_device = ( - device_ndarray(weight_per_cluster) if additional_args else None - ) - - new_centroids = np.zeros((n_clusters, n_cols), dtype=dtype) - new_centroids_device = device_ndarray(new_centroids) - - sample_weights = np.ones((n_rows,)).astype(dtype) / n_rows - sample_weights_device = ( - device_ndarray(sample_weights) if additional_args else None - ) - - # Compute new centroids naively - dists = np.zeros((n_rows, n_clusters), dtype=dtype) - dists_device = device_ndarray(dists) - pairwise_distance(X_device, centroids_device, dists_device, metric=metric) - handle.sync() - - labels = np.argmin(dists_device.copy_to_host(), axis=1).astype(np.int32) - labels_device = device_ndarray(labels) - - expected_centers = np.empty((n_clusters, n_cols), dtype=dtype) - expected_wX = X * sample_weights.reshape((-1, 1)) - for i in range(n_clusters): - j = expected_wX[labels == i] - j = j.sum(axis=0) - g = sample_weights[labels == i].sum() - expected_centers[i, :] = j / g - - compute_new_centroids( - X_device, - centroids_device, - labels_device, - new_centroids_device, - sample_weights=sample_weights_device, - weight_per_cluster=weight_per_cluster_device, - handle=handle, - ) - - # pylibraft functions are often asynchronous so the - # handle needs to be explicitly synchronized - handle.sync() - - actual_centers = new_centroids_device.copy_to_host() - - assert np.allclose(expected_centers, actual_centers, rtol=1e-6) - - -@pytest.mark.parametrize("n_rows", [100]) -@pytest.mark.parametrize("n_cols", [5, 25]) -@pytest.mark.parametrize("n_clusters", [4, 15]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_cluster_cost(n_rows, n_cols, n_clusters, dtype): - X = np.random.random_sample((n_rows, n_cols)).astype(dtype) - X_device = device_ndarray(X) - - centroids = X[:n_clusters] - centroids_device = device_ndarray(centroids) - - inertia = cluster_cost(X_device, centroids_device) - - # compute the nearest centroid to each sample - distances = pairwise_distance( - X_device, centroids_device, metric="sqeuclidean" - ).copy_to_host() - cluster_ids = np.argmin(distances, axis=1) - - cluster_distances = np.take_along_axis( - distances, cluster_ids[:, None], axis=1 - ) - - # need reduced tolerance for float32 - tol = 1e-3 if dtype == np.float32 else 1e-6 - assert np.allclose(inertia, sum(cluster_distances), rtol=tol, atol=tol) - - -@pytest.mark.parametrize("n_rows", [100]) -@pytest.mark.parametrize("n_cols", [5, 25]) -@pytest.mark.parametrize("n_clusters", [4, 15]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_init_plus_plus(n_rows, n_cols, n_clusters, dtype): - X = np.random.random_sample((n_rows, n_cols)).astype(dtype) - X_device = device_ndarray(X) - - centroids = init_plus_plus(X_device, n_clusters, seed=1) - centroids_ = centroids.copy_to_host() - - assert centroids_.shape == (n_clusters, X.shape[1]) - - # Centroids are selected from the existing points - for centroid in centroids_: - assert (centroid == X).all(axis=1).any() - - -@pytest.mark.parametrize("n_rows", [100]) -@pytest.mark.parametrize("n_cols", [5, 25]) -@pytest.mark.parametrize("n_clusters", [4, 15]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_init_plus_plus_preallocated_output(n_rows, n_cols, n_clusters, dtype): - X = np.random.random_sample((n_rows, n_cols)).astype(dtype) - X_device = device_ndarray(X) - - centroids = device_ndarray.empty((n_clusters, n_cols), dtype=dtype) - - new_centroids = init_plus_plus(X_device, centroids=centroids, seed=1) - new_centroids_ = new_centroids.copy_to_host() - - # The shape should not have changed - assert new_centroids_.shape == centroids.shape - - # Centroids are selected from the existing points - for centroid in new_centroids_: - assert (centroid == X).all(axis=1).any() - - -def test_init_plus_plus_exclusive_arguments(): - # Check an exception is raised when n_clusters and centroids shape - # are inconsistent. - X = np.random.random_sample((10, 5)).astype(np.float64) - X = device_ndarray(X) - - n_clusters = 3 - - centroids = np.random.random_sample((n_clusters + 1, 5)).astype(np.float64) - centroids = device_ndarray(centroids) - - with pytest.raises( - RuntimeError, match="Parameters 'n_clusters' and 'centroids'" - ): - init_plus_plus(X, n_clusters, centroids=centroids) diff --git a/python/pylibraft/pylibraft/test/test_refine.py b/python/pylibraft/pylibraft/test/test_refine.py deleted file mode 100644 index 397ea70ec7..0000000000 --- a/python/pylibraft/pylibraft/test/test_refine.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# h ttp://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from sklearn.neighbors import NearestNeighbors -from sklearn.preprocessing import normalize -from test_ivf_pq import calc_recall, check_distances, generate_data - -from pylibraft.common import device_ndarray -from pylibraft.neighbors import refine - - -def run_refine( - n_rows=500, - n_cols=50, - n_queries=100, - metric="sqeuclidean", - k0=40, - k=10, - inplace=False, - dtype=np.float32, - memory_type="device", -): - - dataset = generate_data((n_rows, n_cols), dtype) - queries = generate_data((n_queries, n_cols), dtype) - - if metric == "inner_product": - if dtype != np.float32: - pytest.skip("Normalized input cannot be represented in int8") - return - dataset = normalize(dataset, norm="l2", axis=1) - queries = normalize(queries, norm="l2", axis=1) - - dataset_device = device_ndarray(dataset) - queries_device = device_ndarray(queries) - - # Calculate reference values with sklearn - skl_metric = {"sqeuclidean": "euclidean", "inner_product": "cosine"}[ - metric - ] - nn_skl = NearestNeighbors( - n_neighbors=k0, algorithm="brute", metric=skl_metric - ) - nn_skl.fit(dataset) - skl_dist, candidates = nn_skl.kneighbors(queries) - candidates = candidates.astype(np.int64) - candidates_device = device_ndarray(candidates) - - out_idx = np.zeros((n_queries, k), dtype=np.int64) - out_dist = np.zeros((n_queries, k), dtype=np.float32) - out_idx_device = device_ndarray(out_idx) if inplace else None - out_dist_device = device_ndarray(out_dist) if inplace else None - - if memory_type == "device": - if inplace: - refine( - dataset_device, - queries_device, - candidates_device, - indices=out_idx_device, - distances=out_dist_device, - metric=metric, - ) - else: - out_dist_device, out_idx_device = refine( - dataset_device, - queries_device, - candidates_device, - k=k, - metric=metric, - ) - out_idx = out_idx_device.copy_to_host() - out_dist = out_dist_device.copy_to_host() - elif memory_type == "host": - if inplace: - refine( - dataset, - queries, - candidates, - indices=out_idx, - distances=out_dist, - metric=metric, - ) - else: - out_dist, out_idx = refine( - dataset, queries, candidates, k=k, metric=metric - ) - - skl_idx = candidates[:, :k] - - recall = calc_recall(out_idx, skl_idx) - if recall <= 0.999: - # We did not find the same neighbor indices. - # We could have found other neighbor with same distance. - if metric == "sqeuclidean": - skl_dist = np.power(skl_dist[:, :k], 2) - elif metric == "inner_product": - skl_dist = 1 - skl_dist[:, :k] - else: - raise ValueError("Invalid metric") - mask = out_idx != skl_idx - assert np.all(out_dist[mask] <= skl_dist[mask] + 1.0e-6) - - check_distances(dataset, queries, metric, out_idx, out_dist, 0.001) - - -@pytest.mark.parametrize("n_queries", [100, 1024, 37]) -@pytest.mark.parametrize("inplace", [True, False]) -@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) -@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) -@pytest.mark.parametrize("memory_type", ["device", "host"]) -def test_refine_dtypes(n_queries, dtype, inplace, metric, memory_type): - run_refine( - n_rows=2000, - n_queries=n_queries, - n_cols=50, - k0=40, - k=10, - dtype=dtype, - inplace=inplace, - metric=metric, - memory_type=memory_type, - ) - - -@pytest.mark.parametrize( - "params", - [ - pytest.param( - { - "n_rows": 0, - "n_cols": 10, - "n_queries": 10, - "k0": 10, - "k": 1, - }, - marks=pytest.mark.xfail(reason="empty dataset"), - ), - {"n_rows": 1, "n_cols": 10, "n_queries": 10, "k": 1, "k0": 1}, - {"n_rows": 10, "n_cols": 1, "n_queries": 10, "k": 10, "k0": 10}, - {"n_rows": 999, "n_cols": 42, "n_queries": 453, "k0": 137, "k": 53}, - ], -) -@pytest.mark.parametrize("memory_type", ["device", "host"]) -def test_refine_row_col(params, memory_type): - run_refine( - n_rows=params["n_rows"], - n_queries=params["n_queries"], - n_cols=params["n_cols"], - k0=params["k0"], - k=params["k"], - memory_type=memory_type, - ) - - -@pytest.mark.parametrize("memory_type", ["device", "host"]) -def test_input_dtype(memory_type): - with pytest.raises(Exception): - run_refine(dtype=np.float64, memory_type=memory_type) - - -@pytest.mark.parametrize( - "params", - [ - {"idx_shape": None, "dist_shape": None, "k": None}, - {"idx_shape": [100, 9], "dist_shape": None, "k": 10}, - {"idx_shape": [101, 10], "dist_shape": None, "k": None}, - {"idx_shape": None, "dist_shape": [100, 11], "k": 10}, - {"idx_shape": None, "dist_shape": [99, 10], "k": None}, - ], -) -@pytest.mark.parametrize("memory_type", ["device", "host"]) -def test_input_assertions(params, memory_type): - n_cols = 5 - n_queries = 100 - k0 = 40 - dtype = np.float32 - dataset = generate_data((500, n_cols), dtype) - dataset_device = device_ndarray(dataset) - - queries = generate_data((n_queries, n_cols), dtype) - queries_device = device_ndarray(queries) - - candidates = np.random.randint( - 0, 500, size=(n_queries, k0), dtype=np.int64 - ) - candidates_device = device_ndarray(candidates) - - if params["idx_shape"] is not None: - out_idx = np.zeros(params["idx_shape"], dtype=np.int64) - out_idx_device = device_ndarray(out_idx) - else: - out_idx_device = None - if params["dist_shape"] is not None: - out_dist = np.zeros(params["dist_shape"], dtype=np.float32) - out_dist_device = device_ndarray(out_dist) - else: - out_dist_device = None - - if memory_type == "device": - with pytest.raises(Exception): - distances, indices = refine( - dataset_device, - queries_device, - candidates_device, - k=params["k"], - indices=out_idx_device, - distances=out_dist_device, - ) - else: - with pytest.raises(Exception): - distances, indices = refine( - dataset, - queries, - candidates, - k=params["k"], - indices=out_idx, - distances=out_dist, - ) diff --git a/python/pylibraft/pylibraft/test/test_select_k.py b/python/pylibraft/pylibraft/test/test_select_k.py deleted file mode 100644 index 203e735b9c..0000000000 --- a/python/pylibraft/pylibraft/test/test_select_k.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest - -from pylibraft.common import device_ndarray -from pylibraft.matrix import select_k - - -@pytest.mark.parametrize("n_rows", [32, 100]) -@pytest.mark.parametrize("n_cols", [40, 100]) -@pytest.mark.parametrize("k", [1, 5, 16, 35]) -@pytest.mark.parametrize("inplace", [True, False]) -def test_select_k(n_rows, n_cols, k, inplace): - dataset = np.random.random_sample((n_rows, n_cols)).astype("float32") - dataset_device = device_ndarray(dataset) - - indices = np.zeros((n_rows, k), dtype="int64") - distances = np.zeros((n_rows, k), dtype="float32") - indices_device = device_ndarray(indices) - distances_device = device_ndarray(distances) - - ret_distances, ret_indices = select_k( - dataset_device, - k=k, - distances=distances_device, - indices=indices_device, - ) - - distances_device = ret_distances if not inplace else distances_device - actual_distances = distances_device.copy_to_host() - argsort = np.argsort(dataset, axis=1) - - for i in range(dataset.shape[0]): - expected_indices = argsort[i] - gpu_dists = actual_distances[i] - - cpu_ordered = dataset[i, expected_indices] - np.testing.assert_allclose( - cpu_ordered[:k], gpu_dists, atol=1e-4, rtol=1e-4 - )