From 35f297172e59db7287695aafc0c001b1516783ff Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Mon, 18 Mar 2024 20:37:27 +0100 Subject: [PATCH 01/30] Rebase the PR to enable CI --- cpp/CMakeLists.txt | 96 +++ .../neighbors/detail/cagra/cagra_search.cuh | 233 ++++++-- .../detail/cagra/compute_distance.hpp | 329 +++++------ .../detail/cagra/compute_distance_vpq.cuh | 252 ++++++++ .../neighbors/detail/cagra/device_common.hpp | 8 +- .../raft/neighbors/detail/cagra/factory.cuh | 28 +- .../detail/cagra/search_multi_cta.cuh | 100 ++-- .../cagra/search_multi_cta_kernel-ext.cuh | 403 +++++++++++-- .../cagra/search_multi_cta_kernel-inl.cuh | 186 +++--- .../detail/cagra/search_multi_kernel.cuh | 404 +++++++------ .../neighbors/detail/cagra/search_plan.cuh | 8 +- .../detail/cagra/search_single_cta.cuh | 86 +-- .../cagra/search_single_cta_kernel-ext.cuh | 553 ++++++++++++++++-- .../cagra/search_single_cta_kernel-inl.cuh | 193 +++--- .../raft/neighbors/detail/cagra/utils.hpp | 5 + .../cagra/q_search_multi_cta_00_generate.py | 119 ++++ ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 71 +++ ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 71 +++ ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 71 +++ ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 71 +++ ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 71 +++ ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 71 +++ ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 71 +++ ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 71 +++ ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 71 +++ ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 71 +++ ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 71 +++ ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 71 +++ ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 71 +++ ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 71 +++ ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 71 +++ ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 71 +++ ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 71 +++ ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 71 +++ ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 71 +++ ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 71 +++ ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 71 +++ ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 71 +++ ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 71 +++ ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 71 +++ ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 71 +++ ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 71 +++ ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 71 +++ ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 71 +++ ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 71 +++ ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 71 +++ ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 71 +++ ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 71 +++ ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 71 +++ ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 71 +++ ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 71 +++ ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 71 +++ ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 71 +++ ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 71 +++ ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 71 +++ ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 71 +++ ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 71 +++ ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 71 +++ ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 71 +++ ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 71 +++ ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 71 +++ ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 71 +++ ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 71 +++ ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 71 +++ .../cagra/q_search_single_cta_00_generate.py | 125 ++++ ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 73 +++ ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 73 +++ ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 73 +++ ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 73 +++ ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 73 +++ ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 73 +++ ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 73 +++ ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 73 +++ ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 73 +++ ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 73 +++ ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 73 +++ ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 73 +++ ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 73 +++ ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 73 +++ ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 73 +++ ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 73 +++ ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 73 +++ ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 73 +++ ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 73 +++ ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 73 +++ ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 73 +++ ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 73 +++ ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 73 +++ ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 73 +++ ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 73 +++ ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 73 +++ ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 73 +++ ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 73 +++ ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 73 +++ ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 73 +++ ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 73 +++ ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 73 +++ ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 73 +++ ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 73 +++ ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 73 +++ ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 73 +++ ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 73 +++ ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 73 +++ ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 73 +++ ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 73 +++ ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 73 +++ ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 73 +++ ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 73 +++ ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 73 +++ ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 73 +++ ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 73 +++ ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 73 +++ ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 73 +++ .../cagra/search_multi_cta_00_generate.py | 22 +- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 63 +- ...search_multi_cta_float_uint32_dim128_t8.cu | 63 +- ...earch_multi_cta_float_uint32_dim256_t16.cu | 63 +- ...earch_multi_cta_float_uint32_dim512_t32.cu | 63 +- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 63 +- ...search_multi_cta_float_uint64_dim128_t8.cu | 63 +- ...earch_multi_cta_float_uint64_dim256_t16.cu | 63 +- ...earch_multi_cta_float_uint64_dim512_t32.cu | 63 +- ...earch_multi_cta_half_uint32_dim1024_t32.cu | 61 +- .../search_multi_cta_half_uint32_dim128_t8.cu | 61 +- ...search_multi_cta_half_uint32_dim256_t16.cu | 61 +- ...search_multi_cta_half_uint32_dim512_t32.cu | 61 +- ...earch_multi_cta_half_uint64_dim1024_t32.cu | 61 +- .../search_multi_cta_half_uint64_dim128_t8.cu | 61 +- ...search_multi_cta_half_uint64_dim256_t16.cu | 61 +- ...search_multi_cta_half_uint64_dim512_t32.cu | 61 +- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 63 +- .../search_multi_cta_int8_uint32_dim128_t8.cu | 63 +- ...search_multi_cta_int8_uint32_dim256_t16.cu | 63 +- ...search_multi_cta_int8_uint32_dim512_t32.cu | 63 +- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 63 +- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 63 +- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 63 +- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 63 +- .../cagra/search_single_cta_00_generate.py | 22 +- ...rch_single_cta_float_uint32_dim1024_t32.cu | 66 ++- ...earch_single_cta_float_uint32_dim128_t8.cu | 66 ++- ...arch_single_cta_float_uint32_dim256_t16.cu | 66 ++- ...arch_single_cta_float_uint32_dim512_t32.cu | 66 ++- ...rch_single_cta_float_uint64_dim1024_t32.cu | 66 ++- ...earch_single_cta_float_uint64_dim128_t8.cu | 66 ++- ...arch_single_cta_float_uint64_dim256_t16.cu | 66 ++- ...arch_single_cta_float_uint64_dim512_t32.cu | 66 ++- ...arch_single_cta_half_uint32_dim1024_t32.cu | 64 +- ...search_single_cta_half_uint32_dim128_t8.cu | 64 +- ...earch_single_cta_half_uint32_dim256_t16.cu | 64 +- ...earch_single_cta_half_uint32_dim512_t32.cu | 64 +- ...arch_single_cta_half_uint64_dim1024_t32.cu | 64 +- ...search_single_cta_half_uint64_dim128_t8.cu | 64 +- ...earch_single_cta_half_uint64_dim256_t16.cu | 64 +- ...earch_single_cta_half_uint64_dim512_t32.cu | 64 +- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 66 ++- ...search_single_cta_int8_uint32_dim128_t8.cu | 66 ++- ...earch_single_cta_int8_uint32_dim256_t16.cu | 66 ++- ...earch_single_cta_int8_uint32_dim512_t32.cu | 66 ++- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 66 ++- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 66 ++- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 66 ++- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 66 ++- cpp/test/CMakeLists.txt | 1 + .../ann_cagra/search_kernel_uint64_t.cuh | 194 +++--- cpp/test/neighbors/ann_cagra_vpq.cuh | 296 ++++++++++ .../ann_cagra_vpq/test_float_int64_t.cu | 28 + 167 files changed, 11312 insertions(+), 2355 deletions(-) create mode 100644 cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/test/neighbors/ann_cagra_vpq.cuh create mode 100644 cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 638ceb3b45..08a1433bf6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -371,6 +371,102 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 0832e75633..836710bc04 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -16,6 +16,7 @@ #pragma once +#include "compute_distance_vpq.cuh" #include "factory.cuh" #include "search_plan.cuh" #include "search_single_cta.cuh" @@ -77,46 +78,24 @@ inline return filter; } -/** - * @brief Search ANN using the constructed index. - * - * See the [build](#build) documentation for a usage example. - * - * @tparam T data element type - * @tparam IdxT type of database vector indices - * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need - * separate kernels for int/uint. - * - * @param[in] handle - * @param[in] params configure the search - * @param[in] idx ivf-pq constructed index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - */ - -template -void search_main(raft::resources const& res, - search_params params, - const index& index, - raft::device_matrix_view queries, - raft::device_matrix_view neighbors, - raft::device_matrix_view distances, - CagraSampleFilterT sample_filter = CagraSampleFilterT()) +template +void search_main_core( + raft::resources const& res, + search_params params, + DatasetDescriptorT dataset_desc, + raft::device_matrix_view graph, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) { RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", - static_cast(index.dataset().extent(0)), - static_cast(index.dataset().extent(1))); + static_cast(index.dataset_view().extent(0)), + static_cast(index.dataset_view().extent(1))); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); - RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); + RAFT_EXPECTS(queries.extent(1) == dataset_desc.dim, "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); cudaDeviceProp deviceProp = resource::get_device_properties(res); @@ -125,12 +104,15 @@ void search_main(raft::resources const& res, } common::nvtx::range fun_scope( - "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); + "cagra::search(max_queries = %u, k = %u, dim = %zu)", + params.max_queries, + topk, + dataset_desc.dim); using CagraSampleFilterT_s = typename CagraSampleFilterT_Selector::type; - std::unique_ptr> plan = - factory::create( - res, params, index.dim(), index.graph_degree(), topk); + std::unique_ptr> plan = + factory::create( + res, params, dataset_desc.dim, graph.extent(1), topk); plan->check(topk); @@ -140,30 +122,22 @@ void search_main(raft::resources const& res, for (unsigned qid = 0; qid < queries.extent(0); qid += max_queries) { const uint32_t n_queries = std::min(max_queries, queries.extent(0) - qid); - internal_IdxT* _topk_indices_ptr = - reinterpret_cast(neighbors.data_handle()) + (topk * qid); - DistanceT* _topk_distances_ptr = distances.data_handle() + (topk * qid); + auto _topk_indices_ptr = + reinterpret_cast(neighbors.data_handle()) + + (topk * qid); + auto _topk_distances_ptr = distances.data_handle() + (topk * qid); // todo(tfeher): one could keep distances optional and pass nullptr - const T* _query_ptr = queries.data_handle() + (query_dim * qid); - const internal_IdxT* _seed_ptr = + const auto* _query_ptr = queries.data_handle() + (query_dim * qid); + const auto* _seed_ptr = plan->num_seeds > 0 - ? reinterpret_cast(plan->dev_seed.data()) + (plan->num_seeds * qid) + ? reinterpret_cast(plan->dev_seed.data()) + + (plan->num_seeds * qid) : nullptr; uint32_t* _num_executed_iterations = nullptr; - auto dataset_internal = - make_device_strided_matrix_view(index.dataset().data_handle(), - index.dataset().extent(0), - index.dataset().extent(1), - index.dataset().stride(0)); - auto graph_internal = raft::make_device_matrix_view( - reinterpret_cast(index.graph().data_handle()), - index.graph().extent(0), - index.graph().extent(1)); - (*plan)(res, - dataset_internal, - graph_internal, + dataset_desc, + graph, _topk_indices_ptr, _topk_distances_ptr, _query_ptr, @@ -173,6 +147,151 @@ void search_main(raft::resources const& res, topk, set_offset(sample_filter, qid)); } +} + +template +void lauch_vpq_search_main_core( + raft::resources const& res, + const vpq_dataset* vpq_dset, + search_params params, + raft::device_matrix_view graph, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter) +{ + const float vq_scale = 1.0f; + const float pq_scale = 1.0f; + + if (vpq_dset->pq_bits() == 8) { + if (vpq_dset->pq_len() == 2) { + using dataset_desc_t = cagra_q_dataset_descriptor_t; + dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), + vpq_dset->encoded_row_length(), + vpq_dset->pq_dim(), + vpq_dset->vq_code_book.data_handle(), + vq_scale, + vpq_dset->pq_code_book.data_handle(), + pq_scale, + size_t(vpq_dset->n_rows()), + vpq_dset->dim()); + search_main_core( + res, params, dataset_desc, graph, queries, neighbors, distances, sample_filter); + } else if (vpq_dset->pq_len() == 4) { + using dataset_desc_t = cagra_q_dataset_descriptor_t; + dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), + vpq_dset->encoded_row_length(), + vpq_dset->pq_dim(), + vpq_dset->vq_code_book.data_handle(), + vq_scale, + vpq_dset->pq_code_book.data_handle(), + pq_scale, + size_t(vpq_dset->n_rows()), + vpq_dset->dim()); + search_main_core( + res, params, dataset_desc, graph, queries, neighbors, distances, sample_filter); + } else { + RAFT_FAIL("Subspace dimension must be 2 or 4"); + } + } else { + RAFT_FAIL("Only 8-bit PQ is supported now"); + } +} + +/** + * @brief Search ANN using the constructed index. + * + * See the [build](#build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of database vector indices + * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need + * separate kernels for int/uint. + * + * @param[in] handle + * @param[in] params configure the search + * @param[in] idx ivf-pq constructed index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + */ +template +void search_main(raft::resources const& res, + search_params params, + const index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) +{ + const auto& graph = index.graph(); + auto graph_internal = raft::make_device_matrix_view( + reinterpret_cast(graph.data_handle()), graph.extent(0), graph.extent(1)); + + // n_rows has the same type as the dataset index (the array extents type) + using ds_idx_type = decltype(index.dataset().n_rows()); + // Dispatch search parameters based on the dataset kind. + if (auto* strided_dset = dynamic_cast*>(&index.dataset()); + strided_dset != nullptr) { + const auto& internal_dataset = make_device_strided_matrix_view( + index.dataset_view().data_handle(), + index.dataset_view().extent(0), + index.dataset_view().extent(1), + index.dataset_view().stride(0)); + // Set TEAM_SIZE and DATASET_BLOCK_SIZE to zero tentatively since these parameters cannot be + // determined here. They are set just before kernel launch. + using dataset_desc_t = standard_dataset_descriptor_t; + // Search using a plain (strided) row-major dataset + const dataset_desc_t dataset_desc(strided_dset->view().data_handle(), + strided_dset->n_rows(), + strided_dset->dim(), + strided_dset->stride()); + + search_main_core( + res, params, dataset_desc, graph_internal, queries, neighbors, distances, sample_filter); + } else if (auto* vpq_dset = + dynamic_cast*>(&index.dataset()); + vpq_dset != nullptr) { + // Search using a compressed dataset + RAFT_FAIL("FP32 VPQ dataset support is coming soon"); + } else if (auto* vpq_dset = dynamic_cast*>(&index.dataset()); + vpq_dset != nullptr) { + lauch_vpq_search_main_core( + res, vpq_dset, params, graph_internal, queries, neighbors, distances, sample_filter); + } else if (auto* empty_dset = dynamic_cast*>(&index.dataset()); + empty_dset != nullptr) { + // Forgot to add a dataset. + RAFT_FAIL( + "Attempted to search without a dataset. Please call index.update_dataset(...) first."); + } else { + // This is a logic error. + RAFT_FAIL("Unrecognized dataset format"); + } static_assert(std::is_same_v, "only float distances are supported at the moment"); diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 3732dcf3fe..752d729da7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -36,152 +36,54 @@ _RAFT_DEVICE constexpr unsigned get_vlen() return utils::size_of() / utils::size_of(); } -template -struct data_load_t { - union { - LOAD_T load; - DATA_T data[VLEN]; - }; +template +struct code_book_load_t_core { + using type = void; }; - -template -struct distance_op; -template -struct distance_op { - const float* const query_buffer; - __device__ distance_op(const float* const query_buffer) : query_buffer(query_buffer) {} - - __device__ DISTANCE_T operator()(const DATA_T* const dataset_ptr, - const std::uint32_t dataset_dim, - const bool valid) - { - const unsigned lane_id = threadIdx.x % TEAM_SIZE; - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - data_load_t dl_buff[reg_nelem]; - - DISTANCE_T norm2 = 0; - if (valid) { - for (uint32_t elem_offset = 0; elem_offset < dataset_dim; elem_offset += DATASET_BLOCK_DIM) { -#pragma unroll - for (uint32_t e = 0; e < reg_nelem; e++) { - const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; - if (k >= dataset_dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); - } -#pragma unroll - for (uint32_t e = 0; e < reg_nelem; e++) { - const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; - if (k >= dataset_dim) break; -#pragma unroll - for (uint32_t v = 0; v < vlen; v++) { - const uint32_t kv = k + v; - // if (kv >= dataset_dim) break; - DISTANCE_T diff = query_buffer[device::swizzling(kv)]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); - norm2 += diff * diff; - } - } - } - } - for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { - norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); - } - return norm2; - } +template <> +struct code_book_load_t_core<1> { + using type = std::uint8_t; +}; +template <> +struct code_book_load_t_core<2> { + using type = std::uint16_t; +}; +template <> +struct code_book_load_t_core<4> { + using type = std::uint32_t; +}; +template <> +struct code_book_load_t_core<8> { + using type = LOAD_64BIT_T; +}; +template <> +struct code_book_load_t_core<16> { + using type = LOAD_128BIT_T; }; -template -struct distance_op { - static constexpr unsigned N_FRAGS = (DATASET_BLOCK_DIM + TEAM_SIZE - 1) / TEAM_SIZE; - float query_frags[N_FRAGS]; - - __device__ distance_op(const float* const query_buffer) - { - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - const std::uint32_t lane_id = threadIdx.x % TEAM_SIZE; - // Pre-load query vectors into registers when register usage is not too large. -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - // if (k >= dataset_dim) break; -#pragma unroll - for (unsigned v = 0; v < vlen; v++) { - const unsigned kv = k + v; - const unsigned ev = (vlen * e) + v; - query_frags[ev] = query_buffer[device::swizzling(kv)]; - } - } - } - __device__ DISTANCE_T operator()(const DATA_T* const dataset_ptr, - const std::uint32_t dataset_dim, - const bool valid) - { - const unsigned lane_id = threadIdx.x % TEAM_SIZE; - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - data_load_t dl_buff[reg_nelem]; +template +struct code_book_load_t { + using type = typename code_book_load_t_core() * vlen>::type; +}; - DISTANCE_T norm2 = 0; - if (valid) { -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= dataset_dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); - } -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= dataset_dim) break; -#pragma unroll - for (unsigned v = 0; v < vlen; v++) { - DISTANCE_T diff; - const unsigned ev = (vlen * e) + v; - diff = query_frags[ev]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); - norm2 += diff * diff; - } - } - } - for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { - norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); - } - return norm2; - } +template +struct data_load_t { + union { + typename code_book_load_t::type load; + DATA_T data[VLEN]; + }; }; template _RAFT_DEVICE void compute_distance_to_random_nodes( INDEX_T* const result_indices_ptr, // [num_pickup] DISTANCE_T* const result_distances_ptr, // [num_pickup] - const float* const query_buffer, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, + const typename DATASET_DESCRIPTOR_T::QUERY_T* const query_buffer, + const DATASET_DESCRIPTOR_T& dataset_desc, const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, @@ -195,9 +97,6 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( uint32_t max_i = num_pickup; if (max_i % (32 / TEAM_SIZE)) { max_i += (32 / TEAM_SIZE) - (max_i % (32 / TEAM_SIZE)); } - distance_op dist_op( - query_buffer); - for (uint32_t i = threadIdx.x / TEAM_SIZE; i < max_i; i += blockDim.x / TEAM_SIZE) { const bool valid_i = (i < num_pickup); @@ -212,11 +111,11 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( if (seed_ptr && (gid < num_seeds)) { seed_index = seed_ptr[gid]; } else { - seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_size; + seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_desc.size; } } - const auto norm2 = dist_op(dataset_ptr + dataset_ld * seed_index, dataset_dim, valid_i); + const auto norm2 = dataset_desc.compute_similarity(query_buffer, seed_index, valid_i); if (valid_i && (norm2 < best_norm2_team_local)) { best_norm2_team_local = norm2; @@ -240,27 +139,25 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( template -_RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_indices_ptr, - DISTANCE_T* const result_child_distances_ptr, - // query - const float* const query_buffer, - // [dataset_dim, dataset_size] - const DATA_T* const dataset_ptr, - const std::size_t dataset_dim, - const std::size_t dataset_ld, - // [knn_k, dataset_size] - const INDEX_T* const knn_graph, - const std::uint32_t knn_k, - // hashmap - INDEX_T* const visited_hashmap_ptr, - const std::uint32_t hash_bitlen, - const INDEX_T* const parent_indices, - const INDEX_T* const internal_topk_list, - const std::uint32_t search_width) +_RAFT_DEVICE void compute_distance_to_child_nodes( + INDEX_T* const result_child_indices_ptr, + DISTANCE_T* const result_child_distances_ptr, + // query + const typename DATASET_DESCRIPTOR_T::QUERY_T* const query_buffer, + // [dataset_dim, dataset_size] + const DATASET_DESCRIPTOR_T& dataset_desc, + // [knn_k, dataset_size] + const INDEX_T* const knn_graph, + const std::uint32_t knn_k, + // hashmap + INDEX_T* const visited_hashmap_ptr, + const std::uint32_t hash_bitlen, + const INDEX_T* const parent_indices, + const INDEX_T* const internal_topk_list, + const std::uint32_t search_width) { constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; const INDEX_T invalid_index = utils::get_max_value(); @@ -281,16 +178,6 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in } result_child_indices_ptr[i] = child_id; } - - // [Notice] - // Loading the query vector here from shared memory into registers reduces - // shared memory trafiic. However, register usage increase. The - // MAX_N_FRAGS below is used as the threshold to enable or disable this, - // but the appropriate value should be discussed. - constexpr unsigned N_FRAGS = (DATASET_BLOCK_DIM + TEAM_SIZE - 1) / TEAM_SIZE; - constexpr bool use_fragment = N_FRAGS <= MAX_N_FRAGS; - distance_op dist_op( - query_buffer); __syncthreads(); // Compute the distance to child nodes @@ -302,8 +189,8 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in INDEX_T child_id = invalid_index; if (valid_i) { child_id = result_child_indices_ptr[i]; } - DISTANCE_T norm2 = - dist_op(dataset_ptr + child_id * dataset_ld, dataset_dim, child_id != invalid_index); + const auto norm2 = + dataset_desc.compute_similarity(query_buffer, child_id, child_id != invalid_index); // Store the distance const unsigned lane_id = threadIdx.x % TEAM_SIZE; @@ -318,4 +205,108 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in } } // namespace device + +template +struct dataset_descriptor_base_t { + using INDEX_T = INDEX_T_; + using QUERY_T = QUERY_T_; + using DISTANCE_T = DISTANCE_T_; + + const INDEX_T size; + const std::uint32_t dim; + + dataset_descriptor_base_t(const INDEX_T size, const std::uint32_t dim) : size(size), dim(dim) {} +}; + +template +struct standard_dataset_descriptor_t + : public dataset_descriptor_base_t { + using LOAD_T = device::LOAD_128BIT_T; + using DATA_T = DATA_T_; + using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; + static const std::uint32_t DATASET_BLOCK_DIM = DATASET_BLOCK_DIM_; + static const std::uint32_t TEAM_SIZE = TEAM_SIZE_; + + const DATA_T* const ptr; + const std::size_t ld; + using dataset_descriptor_base_t::size; + using dataset_descriptor_base_t::dim; + + standard_dataset_descriptor_t(const DATA_T* const ptr, + const std::size_t size, + const std::uint32_t dim, + const std::size_t ld) + : dataset_descriptor_base_t(size, dim), ptr(ptr), ld(ld) + { + } + + static const std::uint32_t smem_buffer_size_in_byte = 0; + __device__ void set_smem_ptr(void* const){}; + + __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, + const INDEX_T dataset_i, + const bool valid) const + { + const auto dataset_ptr = ptr + dataset_i * ld; + const unsigned lane_id = threadIdx.x % TEAM_SIZE; + constexpr unsigned vlen = device::get_vlen(); + // #include (DATASET_BLOCK_DIM, TEAM_SIZE * vlen); + device::data_load_t dl_buff[reg_nelem]; + + DISTANCE_T norm2 = 0; + if (valid) { + for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { +#pragma unroll + for (uint32_t e = 0; e < reg_nelem; e++) { + const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= dim) break; + dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); + } +#pragma unroll + for (uint32_t e = 0; e < reg_nelem; e++) { + const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= dim) break; +#pragma unroll + for (uint32_t v = 0; v < vlen; v++) { + const uint32_t kv = k + v; + // if (kv >= dataset_dim) break; + DISTANCE_T diff = query_ptr[device::swizzling(kv)]; + diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); + norm2 += diff * diff; + } + } + } + } + for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { + norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); + } + return norm2; + } +}; + +template +standard_dataset_descriptor_t +set_compute_template_params( + standard_dataset_descriptor_t& + desc_in) +{ + return standard_dataset_descriptor_t( + desc_in.ptr, desc_in.size, desc_in.dim, desc_in.ld); +} + } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh new file mode 100644 index 0000000000..642047af6a --- /dev/null +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "compute_distance.hpp" + +namespace raft::neighbors::cagra::detail { +template +struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { + using LOAD_T = device::LOAD_128BIT_T; + using DATA_T = DATA_T_; + using CODE_BOOK_T = CODE_BOOK_T_; + using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; + static const std::uint32_t DATASET_BLOCK_DIM = DATASET_BLOCK_DIM_; + static const std::uint32_t TEAM_SIZE = TEAM_SIZE_; + + const std::uint8_t* encoded_dataset_ptr; + const std::uint32_t encoded_dataset_dim; + const std::uint32_t n_subspace; + const CODE_BOOK_T* vq_code_book_ptr; + const float vq_scale; + const CODE_BOOK_T* pq_code_book_ptr; + const float pq_scale; + using dataset_descriptor_base_t::size; + using dataset_descriptor_base_t::dim; + + // Set on device + CODE_BOOK_T* smem_pq_code_book_ptr; + static const std::uint32_t smem_buffer_size_in_byte = + (1 << PQ_BITS) * PQ_CODE_BOOK_DIM * utils::size_of(); + + __device__ void set_smem_ptr(void* const smem_ptr) + { + smem_pq_code_book_ptr = reinterpret_cast(smem_ptr); + + // Copy PQ table + if constexpr (std::is_same::value) { + for (unsigned i = threadIdx.x * 2; i < (1 << PQ_BITS) * PQ_CODE_BOOK_DIM; + i += blockDim.x * 2) { + half2 buf2; + buf2.x = pq_code_book_ptr[i]; + buf2.y = pq_code_book_ptr[i + 1]; + (reinterpret_cast(smem_pq_code_book_ptr + i))[0] = buf2; + } + } else { + for (unsigned i = threadIdx.x; i < (1 << PQ_BITS) * PQ_CODE_BOOK_DIM; i += blockDim.x) { + // TODO: vectorize + smem_pq_code_book_ptr[i] = pq_code_book_ptr[i]; + } + } + } + + cagra_q_dataset_descriptor_t(const std::uint8_t* encoded_dataset_ptr, + const std::uint32_t encoded_dataset_dim, + const std::uint32_t n_subspace, + const CODE_BOOK_T* const vq_code_book_ptr, + const float vq_scale, + const CODE_BOOK_T* const pq_code_book_ptr, + const float pq_scale, + const std::size_t size, + const std::uint32_t dim) + : dataset_descriptor_base_t(size, dim), + encoded_dataset_ptr(encoded_dataset_ptr), + encoded_dataset_dim(encoded_dataset_dim), + n_subspace(n_subspace), + vq_code_book_ptr(vq_code_book_ptr), + vq_scale(vq_scale), + pq_code_book_ptr(pq_code_book_ptr), + pq_scale(pq_scale) + { + } + + __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, + const INDEX_T node_id, + const bool valid) const + { + float norm = 0; + if (valid) { + const unsigned lane_id = threadIdx.x % TEAM_SIZE; + const uint32_t vq_code = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id))); + if (PQ_BITS == 8) { + constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** + constexpr unsigned nelem = + ((DATASET_BLOCK_DIM / PQ_CODE_BOOK_DIM) + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); + // Loading PQ codes + uint32_t pq_codes[nelem]; +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; + if (k >= n_subspace) break; + // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) + pq_codes[e] = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id) + 4 + + k)); + } + // + if constexpr ((std::is_same::value) && (PQ_CODE_BOOK_DIM % 2 == 0)) { + // **** Use half2 for distance computation **** + half2 norm2{0, 0}; +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; + if (k >= n_subspace) break; + // Loading VQ code-book + device::data_load_t vq_vals[PQ_CODE_BOOK_DIM]; + using vq_vals_load_t = typename device::code_book_load_t::type; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 1) { + const uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k); + if (d >= dim) break; + // Loading 4 x 16-bit VQ-values using 64-bit load ops (from L2$ or device memory) + vq_vals[m].load = + *(reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code))); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 2) { + const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); + const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); + // Loading query vector in smem + half2 diff2 = (reinterpret_cast( + query_ptr))[device::swizzling(d / 2)]; + // Loading PQ code book in smem + diff2 -= *(reinterpret_cast( + smem_pq_code_book_ptr + (1 << PQ_BITS) * 2 * (m / 2) + (2 * (pq_code & 0xff)))); + diff2 -= vq_vals[d1 / vlen].data[(d1 % vlen) / 2]; + norm2 += diff2 * diff2; + } + pq_code >>= 8; + } + } + norm = static_cast(norm2.x + norm2.y); + } else { + // **** Use float for distance computation **** +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; + if (k >= n_subspace) break; + // Loading VQ code-book + typename device::data_load_t::type vq_vals[PQ_CODE_BOOK_DIM]; + using vq_vals_load_t = typename device::code_book_load_t::type; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { + const std::uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k); + if (d >= dim) break; + // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device memory) + vq_vals[m].load = + *(reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code))); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; + typename device::data_load_t::type pq_vals; + using pq_vals_load_t = device::code_book_load_t; + pq_vals.load = *(reinterpret_cast( + smem_pq_code_book_ptr + + (PQ_CODE_BOOK_DIM * (pq_code & 0xff)))); // (from L1$ or smem) +#pragma unroll + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { + const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); + const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); + // if (d >= dataset_dim) break; + DISTANCE_T diff = query_ptr[d]; // (from smem) + diff -= pq_scale * static_cast(pq_vals.data[m]); + diff -= vq_scale * static_cast(vq_vals[d1 / vlen].data[d1 % vlen]); + norm += diff * diff; + } + pq_code >>= 8; + } + } + } + } + } + for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { + norm += __shfl_xor_sync(0xffffffff, norm, offset); + } + return norm; + } +}; + +template +cagra_q_dataset_descriptor_t +set_compute_template_params(cagra_q_dataset_descriptor_t& desc_in) +{ + return cagra_q_dataset_descriptor_t(desc_in.encoded_dataset_ptr, + desc_in.encoded_dataset_dim, + desc_in.n_subspace, + desc_in.vq_code_book_ptr, + desc_in.vq_scale, + desc_in.pq_code_book_ptr, + desc_in.pq_scale, + desc_in.size, + desc_in.dim); +} +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp index cd7469b55e..d4d69e6a67 100644 --- a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp @@ -42,13 +42,17 @@ _RAFT_HOST_DEVICE inline uint64_t xorshift64(uint64_t u) return u * 0x2545F4914F6CDD1DULL; } -template +template _RAFT_DEVICE inline T swizzling(T x) { // Address swizzling reduces bank conflicts in shared memory, but increases // the amount of operation instead. // return x; - return x ^ (x >> 5); // "x" must be less than 1024 + if constexpr (X_MAX <= 1024) { + return (x) ^ ((x) >> 5); + } else { + return (x) ^ (((x) >> 5) & 0x1f); + } } } // namespace device diff --git a/cpp/include/raft/neighbors/detail/cagra/factory.cuh b/cpp/include/raft/neighbors/detail/cagra/factory.cuh index 0aee912e25..4944b57c46 100644 --- a/cpp/include/raft/neighbors/detail/cagra/factory.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/factory.cuh @@ -25,16 +25,18 @@ namespace raft::neighbors::cagra::detail { -template class factory { + using T = typename DATASET_DESCRIPTOR_T::DATA_T; + using IdxT = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DistanceT = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + public: /** * Create a search structure for dataset with dim features. */ - static std::unique_ptr> create( + static std::unique_ptr> create( raft::resources const& res, search_params const& params, int64_t dim, @@ -63,28 +65,28 @@ class factory { break; default: THROW("Incorrect dataset_block_dim (%lu)\n", plan.dataset_block_dim); } - return std::unique_ptr>(); + return std::unique_ptr>(); } private: template - static std::unique_ptr> dispatch_kernel( - raft::resources const& res, search_plan_impl_base& plan) + static std::unique_ptr> + dispatch_kernel(raft::resources const& res, search_plan_impl_base& plan) { if (plan.algo == search_algo::SINGLE_CTA) { - return std::unique_ptr>( + return std::unique_ptr>( new single_cta_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } else if (plan.algo == search_algo::MULTI_CTA) { - return std::unique_ptr>( + return std::unique_ptr>( new multi_cta_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } else { - return std::unique_ptr>( + return std::unique_ptr>( new multi_kernel_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } } diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 1fcd159959..8192b1ae51 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -45,44 +45,46 @@ namespace multi_cta_search { template -struct search : public search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; +struct search : public search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; - using search_plan_impl::hash_bitlen; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; + using search_plan_impl::hash_bitlen; - using search_plan_impl::smem_size; + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_cta_per_query; rmm::device_uvector intermediate_indices; @@ -95,8 +97,7 @@ struct search : public search_plan_impl( - res, params, dim, graph_degree, topk), + : search_plan_impl(res, params, dim, graph_degree, topk), intermediate_indices(0, resource::get_cuda_stream(res)), intermediate_distances(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)) @@ -120,9 +121,11 @@ struct search : public search_plan_impl(dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + smem_size = sizeof(float) * query_smem_buffer_length + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + - sizeof(uint32_t) * search_width + sizeof(uint32_t); + sizeof(uint32_t) * search_width + sizeof(uint32_t) + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; RAFT_LOG_DEBUG("# smem_size: %u", smem_size); // @@ -191,22 +194,25 @@ struct search : public search_plan_impl dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk, - SAMPLE_FILTER_T sample_filter) + void operator()( + raft::resources const& res, + // raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view + graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const uint32_t num_queries, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); - select_and_run( - dataset, + select_and_run( + dataset_desc, graph, intermediate_indices.data(), intermediate_distances.data(), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 7a5ad17460..1e3095771f 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include // none_cagra_sample_filter #include // RAFT_EXPLICIT @@ -27,63 +28,66 @@ namespace multi_cta_search { template -void select_and_run(raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, - DISTANCE_T* const topk_distances_ptr, - const DATA_T* const queries_ptr, - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, - uint32_t* const num_executed_iterations, - uint32_t topk, - uint32_t block_size, - uint32_t result_buffer_size, - uint32_t smem_size, - int64_t hash_bitlen, - INDEX_T* hashmap_ptr, - uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, - uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, - SAMPLE_FILTER_T sample_filter, - cudaStream_t stream) RAFT_EXPLICIT; +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, + const uint32_t num_queries, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, + uint32_t* const num_executed_iterations, + uint32_t topk, + uint32_t block_size, + uint32_t result_buffer_size, + uint32_t smem_size, + int64_t hash_bitlen, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, + uint32_t num_cta_per_query, + uint32_t num_random_samplings, + uint64_t rand_xor_mask, + uint32_t num_seeds, + size_t itopk_size, + size_t search_width, + size_t min_iterations, + size_t max_iterations, + SAMPLE_FILTER_T sample_filter, + cudaStream_t stream) RAFT_EXPLICIT; #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_kernel_selection( @@ -120,5 +124,296 @@ instantiate_kernel_selection( 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection + +#define instantiate_q_kernel_selection(TEAM_SIZE, \ + MAX_DATASET_DIM, \ + CODE_BOOK_T, \ + PQ_BITS, \ + PQ_CODE_BOOK_DIM, \ + DATA_T, \ + INDEX_T, \ + DISTANCE_T, \ + SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_q_kernel_selection( + 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection( + 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_q_kernel_selection } // namespace multi_cta_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 30f56780d6..eb771c9325 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -123,29 +123,22 @@ __device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] // // multiple CTAs per single query // -template +template __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( - INDEX_T* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] - DISTANCE_T* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const size_t dataset_dim, - const size_t dataset_size, - const size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const INDEX_T* const knn_graph, // [dataset_size, graph_degree] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] + DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const knn_graph, // [dataset_size, graph_degree] const uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, const uint32_t itopk_size, const uint32_t search_width, @@ -154,6 +147,13 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( uint32_t* const num_executed_iterations, /* stats */ SAMPLE_FILTER_T sample_filter) { + constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; + constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + const auto num_queries = gridDim.y; const auto query_id = blockIdx.y; const auto num_cta_per_query = gridDim.x; @@ -188,14 +188,20 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( assert(result_buffer_size_32 <= MAX_ELEMENTS); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - auto query_buffer = reinterpret_cast(smem); + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto parent_indices_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); - auto terminate_flag = reinterpret_cast(parent_indices_buffer + search_width); + auto distance_work_buffer_ptr = + reinterpret_cast(parent_indices_buffer + search_width); + auto terminate_flag = reinterpret_cast(distance_work_buffer_ptr + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte); + + // Set smem working buffer for the distance calculation + dataset_desc.set_smem_ptr(distance_work_buffer_ptr); #if 0 /* debug */ @@ -204,10 +210,10 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( result_distances_buffer[i] = utils::get_max_value(); } #endif - const DATA_T* const query_ptr = queries_ptr + (dataset_dim * query_id); + const DATA_T* const query_ptr = queries_ptr + (dataset_desc.dim * query_id); for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { + if (i < dataset_desc.dim) { query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); } else { query_buffer[j] = 0.0; @@ -224,23 +230,19 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; uint32_t block_id = cta_id + (num_cta_per_query * query_id); uint32_t num_blocks = num_cta_per_query * num_queries; - device::compute_distance_to_random_nodes( - result_indices_buffer, - result_distances_buffer, - query_buffer, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - block_id, - num_blocks); + device::compute_distance_to_random_nodes(result_indices_buffer, + result_distances_buffer, + query_buffer, + dataset_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + block_id, + num_blocks); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -272,13 +274,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( _CLK_START(); // constexpr unsigned max_n_frags = 16; constexpr unsigned max_n_frags = 0; - device::compute_distance_to_child_nodes( + device::compute_distance_to_child_nodes( result_indices_buffer + itopk_size, result_distances_buffer + itopk_size, query_buffer, - dataset_ptr, - dataset_dim, - dataset_ld, + dataset_desc, knn_graph, graph_degree, local_visited_hashmap_ptr, @@ -398,54 +398,21 @@ void set_value_batch(T* const dev_ptr, <<>>(dev_ptr, ld, val, count, batch_size); } -template +template struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value // parameters do not matter, because they are not part of the function signature. The // second to fourth value parameters will be selected by the choose_* functions below. - using kernel_t = decltype(&search_kernel); + using kernel_t = decltype(&search_kernel<128, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>); static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { if (result_buffer_size <= 64) { - return search_kernel; + return search_kernel<64, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; } else if (result_buffer_size <= 128) { - return search_kernel; + return search_kernel<128, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; } else if (result_buffer_size <= 256) { - return search_kernel; + return search_kernel<256, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; } THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); } @@ -453,26 +420,24 @@ struct search_kernel_config { template -void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, // multi_cta_search (params struct) uint32_t block_size, // uint32_t result_buffer_size, uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, uint32_t num_cta_per_query, uint32_t num_random_samplings, uint64_t rand_xor_mask, @@ -484,20 +449,24 @@ void select_and_run( // raft::resources const& res, SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - auto kernel = - search_kernel_config::choose_buffer_size(result_buffer_size, block_size); - - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + const auto dataset_desc_ = + set_compute_template_params(dataset_desc); + + using dataset_desc_t = typename std::remove_const::type; + auto kernel = search_kernel_config::choose_buffer_size( + result_buffer_size, block_size); + + RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size + dataset_desc_t::smem_buffer_size_in_byte)); // Initialize hash table const uint32_t hash_size = hashmap::get_size(hash_bitlen); - set_value_batch( - hashmap_ptr, hash_size, utils::get_max_value(), hash_size, num_queries, stream); + set_value_batch(hashmap_ptr, + hash_size, + utils::get_max_value(), + hash_size, + num_queries, + stream); dim3 block_dims(block_size, 1, 1); dim3 grid_dims(num_cta_per_query, num_queries, 1); @@ -508,10 +477,7 @@ void select_and_run( // raft::resources const& res, smem_size); kernel<<>>(topk_indices_ptr, topk_distances_ptr, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc_, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index e4a30675bb..0098c5d844 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -16,6 +16,7 @@ #pragma once #include "compute_distance.hpp" +#include "compute_distance_vpq.cuh" #include "device_common.hpp" #include "hashmap.hpp" #include "search_plan.cuh" @@ -86,27 +87,27 @@ void get_value(T* const host_ptr, const T* const dev_ptr, cudaStream_t cuda_stre } // MAX_DATASET_DIM : must equal to or greater than dataset_dim -template -RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const std::size_t num_pickup, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const result_indices_ptr, // [num_queries, ldr] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::uint32_t ldr, // (*) ldr >= num_pickup - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] - const std::uint32_t hash_bitlen) +template +RAFT_KERNEL random_pickup_kernel( + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::uint32_t ldr, // (*) ldr >= num_pickup + typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen) { + constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; + constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + const auto ldb = hashmap::get_size(hash_bitlen); const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) / TEAM_SIZE; const uint32_t query_id = blockIdx.y; @@ -114,19 +115,17 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s // Load a query extern __shared__ float query_buffer[]; const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; for (uint32_t i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = - spatial::knn::detail::utils::mapping{}((queries_ptr + query_id * dataset_dim)[i]); + if (i < dataset_desc.dim) { + query_buffer[j] = spatial::knn::detail::utils::mapping{}( + (queries_ptr + query_id * dataset_desc.dim)[i]); } else { query_buffer[j] = 0.0; } } __syncthreads(); - device::distance_op dist_op( - query_buffer); INDEX_T best_index_team_local; DISTANCE_T best_norm2_team_local = utils::get_max_value(); @@ -136,10 +135,11 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s seed_index = seed_ptr[global_team_index + (num_seeds * query_id)]; } else { // Chose a seed node randomly - seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; + seed_index = + device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_desc.size; } - const auto norm2 = dist_op(dataset_ptr + (dataset_ld * seed_index), dataset_dim, true); + const auto norm2 = dataset_desc.compute_similarity(query_buffer, seed_index, true); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -161,43 +161,36 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s } // MAX_DATASET_DIM : must be equal to or greater than dataset_dim -template -void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const std::size_t num_queries, - const std::size_t num_pickup, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const result_indices_ptr, // [num_queries, ldr] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::size_t ldr, // (*) ldr >= num_pickup - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] - const std::uint32_t hash_bitlen, - cudaStream_t const cuda_stream = 0) +template +void random_pickup( + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_queries, + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::size_t ldr, // (*) ldr >= num_pickup + typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen, + cudaStream_t const cuda_stream = 0) { - const auto block_size = 256u; - const auto num_teams_per_threadblock = block_size / TEAM_SIZE; + constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; + constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + const auto block_size = 256u; + const auto num_teams_per_threadblock = block_size / TEAM_SIZE; const dim3 grid_size((num_pickup + num_teams_per_threadblock - 1) / num_teams_per_threadblock, num_queries); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; const auto smem_size = query_smem_buffer_length * sizeof(float); - random_pickup_kernel - <<>>(dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + random_pickup_kernel + <<>>(dataset_desc, queries_ptr, num_pickup, num_distilation, @@ -311,32 +304,36 @@ void pickup_next_parents(INDEX_T* const parent_candidates_ptr, // [num_queries, terminate_flag); } -template RAFT_KERNEL compute_distance_to_child_nodes_kernel( - const INDEX_T* const parent_node_list, // [num_queries, search_width] - INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_node_list, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_candidates_ptr, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const std::uint32_t search_width, - const DATA_T* const dataset_ptr, // [dataset_size, data_dim] - const std::uint32_t dataset_dim, - const std::uint32_t dataset_size, - const std::uint32_t dataset_ld, - const INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, - const DATA_T* query_ptr, // [num_queries, data_dim] - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const typename DATASET_DESCRIPTOR_T::DATA_T* query_ptr, // [num_queries, data_dim] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - INDEX_T* const result_indices_ptr, // [num_queries, ldd] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter) { + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; + constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; @@ -344,12 +341,12 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( extern __shared__ float query_buffer[]; const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; for (uint32_t i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { + if (i < dataset_desc.dim) { query_buffer[j] = - spatial::knn::detail::utils::mapping{}((query_ptr + query_id * dataset_dim)[i]); + spatial::knn::detail::utils::mapping{}((query_ptr + query_id * dataset_desc.dim)[i]); } else { query_buffer[j] = 0.0; } @@ -357,9 +354,6 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } - device::distance_op dist_op( - query_buffer); - const std::size_t parent_list_index = parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; @@ -381,8 +375,7 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( const auto compute_distance_flag = hashmap::insert( visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - const auto norm2 = - dist_op(dataset_ptr + (dataset_ld * child_id), dataset_dim, compute_distance_flag); + const auto norm2 = dataset_desc.compute_similarity(query_buffer, child_id, compute_distance_flag); if (compute_distance_flag) { if (threadIdx.x % TEAM_SIZE == 0) { @@ -405,54 +398,53 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( } } -template +template void compute_distance_to_child_nodes( - const INDEX_T* const parent_node_list, // [num_queries, search_width] - INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_node_list, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_candidates_ptr, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const uint32_t search_width, - const DATA_T* const dataset_ptr, // [dataset_size, data_dim] - const std::uint32_t dataset_dim, - const std::uint32_t dataset_size, - const std::uint32_t dataset_ld, - const INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, - const DATA_T* query_ptr, // [num_queries, data_dim] + const typename DATASET_DESCRIPTOR_T::DATA_T* query_ptr, // [num_queries, data_dim] const std::uint32_t num_queries, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - INDEX_T* const result_indices_ptr, // [num_queries, ldd] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream = 0) { + constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; + constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + const auto block_size = 128; const dim3 grid_size( (search_width * graph_degree + (block_size / TEAM_SIZE) - 1) / (block_size / TEAM_SIZE), num_queries); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - const auto smem_size = query_smem_buffer_length * sizeof(float); + const auto smem_size = + query_smem_buffer_length * sizeof(float) + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; - compute_distance_to_child_nodes_kernel + compute_distance_to_child_nodes_kernel <<>>(parent_node_list, parent_candidates_ptr, parent_distance_ptr, lds, search_width, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + dataset_desc, neighbor_graph_ptr, graph_degree, query_ptr, @@ -609,47 +601,51 @@ void set_value_batch(T* const dev_ptr, // |<--- result_buffer_size --->| // Double buffer (B) template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; +struct search : search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + static_assert(std::is_same_v, "Only float is supported as resulting distance"); + + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; size_t result_buffer_allocation_size; - rmm::device_uvector result_indices; // results_indices_buffer - rmm::device_uvector result_distances; // result_distances_buffer + rmm::device_uvector result_indices; // results_indices_buffer + rmm::device_uvector result_distances; // result_distances_buffer rmm::device_uvector parent_node_list; rmm::device_uvector topk_hint; rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; @@ -666,8 +662,7 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl( - res, params, dim, graph_degree, topk), + : search_plan_impl(res, params, dim, graph_degree, topk), result_indices(0, resource::get_cuda_stream(res)), result_distances(0, resource::get_cuda_stream(res)), parent_node_list(0, resource::get_cuda_stream(res)), @@ -800,7 +795,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc_, raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] @@ -811,6 +806,8 @@ struct search : search_plan_impl { uint32_t topk, SAMPLE_FILTER_T sample_filter) { + const auto dataset_desc = + set_compute_template_params(dataset_desc_); // Init hashmap cudaStream_t stream = resource::get_cuda_stream(res); const uint32_t hash_size = hashmap::get_size(hash_bitlen); @@ -828,24 +825,20 @@ struct search : search_plan_impl { } // Choose initial entry point candidates at random - random_pickup( - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), - queries_ptr, - num_queries, - result_buffer_size, - num_random_samplings, - rand_xor_mask, - dev_seed_ptr, - num_seeds, - result_indices.data(), - result_distances.data(), - result_buffer_allocation_size, - hashmap.data(), - hash_bitlen, - stream); + random_pickup(dataset_desc, + queries_ptr, + num_queries, + result_buffer_size, + num_random_samplings, + rand_xor_mask, + dev_seed_ptr, + num_seeds, + result_indices.data(), + result_distances.data(), + result_buffer_allocation_size, + hashmap.data(), + hash_bitlen, + stream); unsigned iter = 0; while (1) { @@ -897,16 +890,13 @@ struct search : search_plan_impl { } // Compute distance to child nodes that are adjacent to the parent node - compute_distance_to_child_nodes( + compute_distance_to_child_nodes( parent_node_list.data(), result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, result_buffer_allocation_size, search_width, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, graph.data_handle(), graph.extent(1), queries_ptr, @@ -993,5 +983,71 @@ struct search : search_plan_impl { } }; +template +struct search, + SAMPLE_FILTER_T> + : public search_plan_impl, + SAMPLE_FILTER_T> { + using DATASET_DESCRIPTOR_T = cagra_q_dataset_descriptor_t; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + search(raft::resources const& res, + search_params params, + int64_t dim, + int64_t graph_degree, + uint32_t topk) + : search_plan_impl(res, params, dim, graph_degree, topk) + { + } + + void set_params(raft::resources const& res) {} + + void operator()(raft::resources const& res, + DATASET_DESCRIPTOR_T dataset_desc_, + raft::device_matrix_view graph, + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const uint32_t num_queries, + const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + uint32_t topk, + SAMPLE_FILTER_T sample_filter) + { + } +}; + } // namespace multi_kernel_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 11ef7e5211..be5ac0554f 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -71,8 +71,12 @@ struct search_plan_impl_base : public search_params { } }; -template +template struct search_plan_impl : public search_plan_impl_base { + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + int64_t hash_bitlen; size_t small_hash_bitlen; @@ -111,7 +115,7 @@ struct search_plan_impl : public search_plan_impl_base { virtual ~search_plan_impl() {} virtual void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index f1e74ee7a5..4430b929fb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -48,43 +48,45 @@ namespace single_cta_search { template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; +struct search : search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_itopk_candidates; @@ -93,8 +95,7 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl( - res, params, dim, graph_degree, topk) + : search_plan_impl(res, params, dim, graph_degree, topk) { set_params(res); } @@ -128,7 +129,8 @@ struct search : search_plan_impl { sizeof(float) * query_smem_buffer_length + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) + sizeof(INDEX_T) * search_width + - sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t); + sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t) + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; smem_size = base_smem_size; if (num_itopk_candidates > 256) { // Tentatively calculate the required share memory size when radix @@ -205,7 +207,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] @@ -217,8 +219,8 @@ struct search : search_plan_impl { SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); - select_and_run( - dataset, + select_and_run( + dataset_desc, graph, result_indices_ptr, result_distances_ptr, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index fef060ffee..44cc575b52 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -27,25 +27,23 @@ namespace single_cta_search { template void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, uint32_t num_random_samplings, @@ -60,34 +58,39 @@ void select_and_run( // raft::resources const& res, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail:: \ + standard_dataset_descriptor_t dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_single_cta_select_and_run( @@ -125,5 +128,477 @@ instantiate_single_cta_select_and_run( #undef instantiate_single_cta_select_and_run +#define instantiate_q_single_cta_select_and_run(TEAM_SIZE, \ + MAX_DATASET_DIM, \ + CODE_BOOK_T, \ + PQ_BITS, \ + PQ_CODE_BOOK_DIM, \ + DATA_T, \ + INDEX_T, \ + DISTANCE_T, \ + SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 1024, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 1024, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + float, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + float, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_q_single_cta_select_and_run + } // namespace single_cta_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 652115928b..52b7e549d8 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -456,42 +456,44 @@ __device__ inline void set_value_device(T* const ptr, const T fill, const std::u } // One query one thread block -template -__launch_bounds__(1024, 1) RAFT_KERNEL - search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] - DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] - const std::uint32_t top_k, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, // stride of dataset - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const INDEX_T* const knn_graph, // [dataset_size, graph_degree] - const std::uint32_t graph_degree, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] - const std::uint32_t internal_topk, - const std::uint32_t search_width, - const std::uint32_t min_iteration, - const std::uint32_t max_iteration, - std::uint32_t* const num_executed_iterations, // [num_queries] - const std::uint32_t hash_bitlen, - const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval, - SAMPLE_FILTER_T sample_filter) +__launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, top_k] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] + const std::uint32_t top_k, + DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const knn_graph, // [dataset_size, graph_degree] + const std::uint32_t graph_degree, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, // [num_queries] + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + SAMPLE_FILTER_T sample_filter) { - using LOAD_T = device::LOAD_128BIT_T; + using LOAD_T = device::LOAD_128BIT_T; + + constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; + constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + const auto query_id = blockIdx.y; #ifdef _CLK_BREAKDOWN @@ -525,25 +527,32 @@ __launch_bounds__(1024, 1) RAFT_KERNEL const auto small_hash_size = hashmap::get_size(small_hash_bitlen); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - auto query_buffer = reinterpret_cast(smem); + raft::ceildiv(dataset_desc.dim, DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM) * + DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto visited_hash_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); auto parent_list_buffer = reinterpret_cast(visited_hash_buffer + small_hash_size); - auto topk_ws = reinterpret_cast(parent_list_buffer + search_width); - auto terminate_flag = reinterpret_cast(topk_ws + 3); - auto smem_working_ptr = reinterpret_cast(terminate_flag + 1); + auto distance_work_buffer_ptr = + reinterpret_cast(parent_list_buffer + search_width); + auto topk_ws = reinterpret_cast(distance_work_buffer_ptr + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte); + auto terminate_flag = reinterpret_cast(topk_ws + 3); + auto smem_work_ptr = reinterpret_cast(terminate_flag + 1); + + // Set smem working buffer for the distance calculation + dataset_desc.set_smem_ptr(distance_work_buffer_ptr); // A flag for filtering. auto filter_flag = terminate_flag; - const DATA_T* const query_ptr = queries_ptr + query_id * dataset_dim; + const DATA_T* const query_ptr = queries_ptr + query_id * dataset_desc.dim; for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { + if (i < dataset_desc.dim) { query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); } else { query_buffer[j] = 0.0; @@ -568,14 +577,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL // compute distance to randomly selecting nodes _CLK_START(); const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - device::compute_distance_to_random_nodes( + device::compute_distance_to_random_nodes( result_indices_buffer, result_distances_buffer, query_buffer, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + dataset_desc, result_buffer_size, num_distilation, rand_xor_mask, @@ -667,7 +673,7 @@ __launch_bounds__(1024, 1) RAFT_KERNEL nullptr, topk_ws, true, - reinterpret_cast(smem_working_ptr)); + reinterpret_cast(smem_work_ptr)); _CLK_REC(clk_topk); // reset small-hash table @@ -688,7 +694,7 @@ __launch_bounds__(1024, 1) RAFT_KERNEL parent_list_buffer, result_indices_buffer, internal_topk, - dataset_size, + dataset_desc.size, search_width); _CLK_REC(clk_pickup_parents); } @@ -708,13 +714,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL // compute the norms between child nodes and query node _CLK_START(); constexpr unsigned max_n_frags = 8; - device::compute_distance_to_child_nodes( + device::compute_distance_to_child_nodes( result_indices_buffer + internal_topk, result_distances_buffer + internal_topk, query_buffer, - dataset_ptr, - dataset_dim, - dataset_ld, + dataset_desc, knn_graph, graph_degree, local_visited_hashmap_ptr, @@ -814,50 +818,36 @@ __launch_bounds__(1024, 1) RAFT_KERNEL #endif } -template +template struct search_kernel_config { - using kernel_t = - decltype(&search_kernel); + using kernel_t = decltype(&search_kernel<64, 64, 0, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>); template static auto choose_search_kernel(unsigned itopk_size) -> kernel_t { if (itopk_size <= 64) { - return search_kernel; + return search_kernel<64, + MAX_CANDIDATES, + USE_BITONIC_SORT, + DATASET_DESCRIPTOR_T, + SAMPLE_FILTER_T>; } else if (itopk_size <= 128) { - return search_kernel; } else if (itopk_size <= 256) { - return search_kernel; } else if (itopk_size <= 512) { - return search_kernel; } THROW("No kernel for parametels itopk_size %u, max_candidates %u", itopk_size, MAX_CANDIDATES); @@ -878,9 +868,9 @@ struct search_kernel_config { // Radix-based topk is used constexpr unsigned max_candidates = 32; // to avoid build failure if (itopk_size <= 256) { - return search_kernel; + return search_kernel<256, max_candidates, 0, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; } else if (itopk_size <= 512) { - return search_kernel; + return search_kernel<512, max_candidates, 0, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; } } THROW("No kernel for parametels itopk_size %u, num_itopk_candidates %u", @@ -891,25 +881,23 @@ struct search_kernel_config { template -void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, // uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, uint32_t num_random_samplings, @@ -922,17 +910,15 @@ void select_and_run( // raft::resources const& res, SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { + const auto dataset_desc_ = + set_compute_template_params(dataset_desc); + using dataset_desc_t = typename std::remove_const::type; auto kernel = - search_kernel_config::choose_itopk_and_mx_candidates(itopk_size, - num_itopk_candidates, - block_size); - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + search_kernel_config::choose_itopk_and_mx_candidates( + itopk_size, num_itopk_candidates, block_size); + RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size + dataset_desc_t::smem_buffer_size_in_byte)); dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( @@ -940,10 +926,7 @@ void select_and_run( // raft::resources const& res, kernel<<>>(topk_indices_ptr, topk_distances_ptr, topk, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc_, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/utils.hpp b/cpp/include/raft/neighbors/detail/cagra/utils.hpp index 7e403abe91..265cbfdceb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/utils.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/utils.hpp @@ -111,6 +111,11 @@ _RAFT_HOST_DEVICE constexpr unsigned size_of() { return 2; } +template <> +_RAFT_HOST_DEVICE constexpr unsigned size_of() +{ + return 4; +} // max values for data types template diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py new file mode 100644 index 0000000000..5bdddc447c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection( \\ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ + template void \\ + select_and_run( \\ + DATASET_DESC_T dataset_desc, \\ + raft::device_matrix_view graph, \\ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \\ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \\ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \\ + const uint32_t num_queries, \\ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ + uint32_t* const num_executed_iterations, \\ + uint32_t topk, \\ + uint32_t block_size, \\ + uint32_t result_buffer_size, \\ + uint32_t smem_size, \\ + int64_t hash_bitlen, \\ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ + uint32_t num_cta_per_query, \\ + uint32_t num_random_samplings, \\ + uint64_t rand_xor_mask, \\ + uint32_t num_seeds, \\ + size_t itopk_size, \\ + size_t search_width, \\ + size_t min_iterations, \\ + size_t max_iterations, \\ + SAMPLE_FILTER_T sample_filter, \\ + cudaStream_t stream); + +""" + +trailer = """ +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search +""" + +mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] +pq_bits = [8] +subspace_dims = [2, 4] +# block = [(64, 16), (128, 8), (256, 4), (512, 2), (1024, 1)] +# mxelem = [64, 128, 256] +load_types = ["uint4"] +code_book_types = ["half"] +search_types = dict( + float_uint32=( + "float", + "uint32_t", + "float", + ), # data_t, vec_idx_t, distance_t + half_uint32=("half", "uint32_t", "float"), + int8_uint32=("int8_t", "uint32_t", "float"), + uint8_uint32=("uint8_t", "uint32_t", "float"), + float_uint64=("float", "uint64_t", "float"), + half_uint64=("half", "uint64_t", "float"), +) +# knn +for type_path, (data_t, idx_t, distance_t) in search_types.items(): + for (mxdim, team) in mxdim_team: + for code_book_t in code_book_types: + for subspace_dim in subspace_dims: + for pq_bit in pq_bits: + path = f"q_search_multi_cta_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{subspace_dim}subd_{code_book_t}.cu" + with open(path, "w") as f: + f.write(header) + f.write( + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA 0 COMMA {distance_t} COMMA {idx_t} COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + ) + f.write(trailer) + # For pasting into CMakeLists.txt + print(f"src/neighbors/detail/cagra/{path}") diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..2cedb81030 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..0fb0b71f76 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..d366aa7e46 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..05a1f2d101 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..2d0d3b3a9c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..ec0472365f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..767a237100 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..06a933244d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..c79ab09af3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a459a0ff26 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..39c03511b7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..1d969a1a91 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..4d33d983d8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..4cc2e8709d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..42a1d31eb7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..aea3d7cd06 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..9002e7b18d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..20f484b814 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..35269059c1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..9a1e7174d0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..325548ac9d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..44bc031788 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..e03065c9c7 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..c7de7cb2a4 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..2087f2e796 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..147b650b14 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..6a351ee484 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..b3f25d9dcf --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..bf82c42d7e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..0ff43e0556 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..53a3fd1ab4 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..5bff2228f9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..cb4612b28a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..54df72a242 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..700a9b9954 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..b9c14f8c5d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..71342a1683 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..bdd447ef3f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..a6c2948239 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..587477bdb5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..6086369e6d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..528b3e3d26 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..4844c4ab98 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..0ae7b6e415 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..fbaee68569 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..6ac499334f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..c2ce5d8c35 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a97fafdd1f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_multi_cta_00_generate.py + * + */ + +#include +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py new file mode 100644 index 0000000000..83346ea70e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py @@ -0,0 +1,125 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \\ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ + template void \\ + select_and_run( \\ + DATASET_DESC_T dataset_desc, \\ + raft::device_matrix_view graph, \\ + typename DATASET_DESC_T::INDEX_T* topk_indices_ptr, \\ + typename DATASET_DESC_T::DISTANCE_T* topk_distances_ptr, \\ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \\ + const uint32_t num_queries, \\ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ + uint32_t* num_executed_iterations, \\ + uint32_t topk, \\ + uint32_t num_itopk_candidates, \\ + uint32_t block_size, \\ + uint32_t smem_size, \\ + int64_t hash_bitlen, \\ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ + size_t small_hash_bitlen, \\ + size_t small_hash_reset_interval, \\ + uint32_t num_random_samplings, \\ + uint64_t rand_xor_mask, \\ + uint32_t num_seeds, \\ + size_t itopk_size, \\ + size_t search_width, \\ + size_t min_iterations, \\ + size_t max_iterations, \\ + SAMPLE_FILTER_T sample_filter, \\ + cudaStream_t stream); + +""" + +trailer = """ +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search +""" + +mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] +# block = [(64, 16), (128, 8), (256, 4), (512, 2), (1024, 1)] +# itopk_candidates = [64, 128, 256] +# itopk_size = [64, 128, 256, 512] +# mxelem = [64, 128, 256] + +pq_bits = [8] +subspace_dims = [2, 4] + +# rblock = [(256, 4), (512, 2), (1024, 1)] +# rcandidates = [32] +# rsize = [256, 512] +code_book_types = ["half"] + +search_types = dict( + float_uint32=("float", "uint32_t", "float"), # data_t, idx_t, distance_t + half_uint32=("half", "uint32_t", "float"), + int8_uint32=("int8_t", "uint32_t", "float"), + uint8_uint32=("uint8_t", "uint32_t", "float"), + float_uint64=("float", "uint64_t", "float"), + half_uint64=("half", "uint64_t", "float"), +) + +# knn +for type_path, (data_t, idx_t, distance_t) in search_types.items(): + for (mxdim, team) in mxdim_team: + for code_book_t in code_book_types: + for subspace_dim in subspace_dims: + for pq_bit in pq_bits: + path = f"q_search_single_cta_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{subspace_dim}subd_{code_book_t}.cu" + with open(path, "w") as f: + f.write(header) + f.write( + f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA 0 COMMA {distance_t} COMMA {idx_t} COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + ) + + f.write(trailer) + # For pasting into CMakeLists.txt + print(f"src/neighbors/detail/cagra/{path}") diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..923ebb0f39 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..cc5ed1fcfd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..802915010f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..de18551dca --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..23e59a6567 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..cc16f86865 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..4913fdcec8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..2025541036 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..5a1d01bf36 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..6454cdd6d2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..c99b9a06df --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..56ff585150 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..1337c52346 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..94b8e0f43d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..672649c665 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a68a2db7c5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..23baf22b22 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..efc808398a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..3e492a6c56 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..1b41e52853 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..2ca8109c42 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..127ceb5b20 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..2a832075b6 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..901e3ecf1b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..41cd2f29f8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..4385966ddd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..08bc44e0ed --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..7ab54e22b4 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..e54fa50a95 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..894c782ba5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..dc1e9b988b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..21b41903bd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..2cc366d243 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..3b19648cc1 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..847fbb9e0d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..c51e30f77a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..d7b90d36b6 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..57b7299312 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..deeef5499d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..e9ead11eaf --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..aa3ddb46f9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..9a0051ee59 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..cdafb745b0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..c3e7d7c808 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..92b9f9c2fc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..479d4bcb58 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..d8fe1806fa --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..8957f1df00 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,73 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python search_single_cta_00_generate.py + * + */ + +#include +#include +#include + +#define COMMA , + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_single_cta_search_kernel + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 6f8766c86b..abc0afeb5e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -41,26 +41,28 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::multi_cta_search { #define instantiate_kernel_selection( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ template void \\ - select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ + select_and_run( \\ + DATASET_DESC_T dataset_desc, \\ + raft::device_matrix_view graph, \\ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \\ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \\ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \\ const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ uint32_t* const num_executed_iterations, \\ uint32_t topk, \\ uint32_t block_size, \\ uint32_t result_buffer_size, \\ uint32_t smem_size, \\ int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ uint32_t num_cta_per_query, \\ uint32_t num_random_samplings, \\ uint64_t rand_xor_mask, \\ @@ -103,7 +105,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA 0 COMMA 0 COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 1a3b2284bd..129d31788f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 36e86d9ed6..790e186c42 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 6f1af2d93f..0b8850c4da 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 1279f8e415..0e1303721d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 0dabff0df5..3e13743871 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index 72bb74cdb8..c6c32c0664 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index dceea10b5d..49bbb0fd8c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index acb8bd6a12..92fda41528 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu index fa89bca45f..c2ce339aab 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu index 645ca61ff5..2c32bcb2ea 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu index 41b6f9b420..ed7e51494e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu index 38f0ac3b04..9d2f81005f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu index c462a9d359..a104ac67c0 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu index f5b2874e20..9629ab956c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu index 0b01428b86..f7d430d625 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu index 70228a129d..4f89823b2b 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index 0254f09ff0..2d80aac4d6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index 2b67e7e968..fa2a911b86 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 17d6722e58..1f1d2f2088 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 38f02812e2..683dbbbcc4 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index fa111196c6..be0a84b2bf 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 1ef3c28aa3..105c192797 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index d26cb44843..61c2dea41c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 4d4322f261..152cd4360c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,37 +29,40 @@ namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define COMMA , + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 1515f43134..7aa3fb790e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -41,26 +41,28 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ template void \\ - select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ + select_and_run( \\ + DATASET_DESC_T dataset_desc, \\ + raft::device_matrix_view graph, \\ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \\ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \\ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \\ const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ uint32_t* const num_executed_iterations, \\ uint32_t topk, \\ uint32_t num_itopk_candidates, \\ uint32_t block_size, \\ uint32_t smem_size, \\ int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ size_t small_hash_bitlen, \\ size_t small_hash_reset_interval, \\ uint32_t num_random_samplings, \\ @@ -107,7 +109,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA 0 COMMA 0 COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index b8c23103ba..d696a241b3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 8ab1897119..7deee0ebb6 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 9fd36b4cb9..3d6e8aa0cf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index a9ee2c864b..ed5c0b6e50 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index dadc574b65..08926144de 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 30e043f47e..08f96d4340 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index 089e4c930f..982719e75a 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 3e8ffb8bf8..82a05a81b7 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu index 29e7bfa250..c1a3b108c3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu index a004f900d0..6c91fa2f11 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu index 549849b21d..f9022574b7 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu index 3825f572f7..579b33f369 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu index 31d83f443b..0ef0b61421 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu index 3493ab294c..f1458c1bdf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu index 6e09709994..6f6f8bbe54 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu index 4bc0158f7e..2a48da9ca4 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index 279587738e..9b2230e229 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index ef127d3f7d..ff092380e1 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 7fcfdcc28e..6dc7f152ea 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index a6c606d99b..7a6a691d26 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 0b8be56614..42212f3000 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index 4c193b9408..2602d63280 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index bdf16d2f03..75557e019d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 93624df4aa..934d2374dd 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,40 +27,44 @@ #include #include +#define COMMA , + namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index dd7eb839ab..4c40c6385b 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -369,6 +369,7 @@ if(BUILD_TESTS) test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu test/neighbors/ann_cagra/test_float_int64_t.cu test/neighbors/ann_cagra/test_half_int64_t.cu + test/neighbors/ann_cagra_vpq/test_float_int64_t.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index 175e4ef483..67bc1a713f 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,87 +21,135 @@ namespace raft::neighbors::cagra::detail { namespace multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR \ + dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 32, + 1024, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 8, + 128, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 16, + 256, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 32, + 512, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection } // namespace multi_cta_search namespace single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR \ + dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 32, + 1024, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 8, + 128, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 16, + 256, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 32, + 512, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail \ No newline at end of file +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh new file mode 100644 index 0000000000..476df406bc --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Search with filter instantiation + +#include "../test_utils.cuh" +#include "ann_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include + +namespace raft::neighbors::cagra { +struct AnnCagraVpqInputs { + int n_queries; + int n_rows; + int dim; + int k; + int subspace_dim; + int pq_bits; + graph_build_algo build_algo; + search_algo algo; + int max_queries; + int team_size; + int itopk_size; + int search_width; + raft::distance::DistanceType metric; + bool host_dataset; + bool include_serialized_dataset; + // std::optional + double min_recall; // = std::nullopt; +}; + +inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraVpqInputs& p) +{ + std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; + std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; + os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim + << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", subspace_dim=" << p.subspace_dim + << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size + << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) + << (p.host_dataset ? ", host" : ", device") + << ", build_algo=" << build_algo.at((int)p.build_algo) << '}' << std::endl; + return os; +} + +template +class AnnCagraVpqTest : public ::testing::TestWithParam { + public: + AnnCagraVpqTest() + : stream_(resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + protected: + void testCagra() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.n_queries, + ps.n_rows, + ps.dim, + ps.k, + ps.metric); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + const auto vpq_k = ps.k * 2; + { + rmm::device_uvector distances_dev(vpq_k * ps.n_queries, stream_); + rmm::device_uvector indices_dev(vpq_k * ps.n_queries, stream_); + + { + cagra::index_params index_params; + index_params.compression = + vpq_params{.pq_bits = static_cast(ps.pq_bits), + .pq_dim = static_cast(ps.dim / ps.subspace_dim)}; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + index_params.build_algo = ps.build_algo; + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + { + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + }; + cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset); + } + + auto index = cagra::deserialize(handle_, "cagra_index"); + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, vpq_k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, vpq_k); + + cagra::search( + handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); + + { + auto host_dataset = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy( + host_dataset.data_handle(), (const DataT*)database.data(), ps.n_rows * ps.dim, stream_); + + auto host_queries = raft::make_host_matrix(ps.n_queries, ps.dim); + raft::copy(host_queries.data_handle(), + (const DataT*)search_queries_view.data_handle(), + ps.n_queries * ps.dim, + stream_); + + auto host_index_candidate = raft::make_host_matrix(ps.n_queries, vpq_k); + raft::copy(host_index_candidate.data_handle(), + indices_out_view.data_handle(), + ps.n_queries * vpq_k, + stream_); + + auto host_indices_Cagra_view = + raft::make_host_matrix_view(indices_Cagra.data(), ps.n_queries, ps.k); + + auto host_dists_Cagra_view = + raft::make_host_matrix_view(distances_Cagra.data(), ps.n_queries, ps.k); + + resource::sync_stream(handle_); + + raft::neighbors::refine(handle_, + raft::make_const_mdspan(host_dataset.view()), + raft::make_const_mdspan(host_queries.view()), + raft::make_const_mdspan(host_index_candidate.view()), + host_indices_Cagra_view, + host_dists_Cagra_view, + ps.metric); + + resource::sync_stream(handle_); + } + } + + // for (int i = 0; i < min(ps.n_queries, 10); i++) { + // // std::cout << "query " << i << std::end; + // print_vector("T", indices_naive.data() + i * ps.k, ps.k, std::cout); + // print_vector("C", indices_Cagra.data() + i * ps.k, ps.k, std::cout); + // print_vector("T", distances_naive.data() + i * ps.k, ps.k, std::cout); + // print_vector("C", distances_Cagra.data() + i * ps.k, ps.k, std::cout); + // } + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.003, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + search_queries.resize(ps.n_queries * ps.dim, stream_); + raft::random::RngState r(1234ULL); + if constexpr (std::is_same_v || std::is_same_v) { + raft::random::uniform(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + raft::random::uniform( + handle_, r, search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20)); + } else { + raft::random::uniformInt( + handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + raft::random::uniformInt( + handle_, r, search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20)); + } + resource::sync_stream(handle_); + } + + void TearDown() override + { + resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnCagraVpqInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + +inline std::vector generate_inputs() +{ + // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries + std::vector inputs = raft::util::itertools::product( + {100}, + {1000}, + {128}, + {16}, // k + {2, 4}, // subspace dim + {8}, // PQ bit + {graph_build_algo::NN_DESCENT}, + {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, + {0, 1, 10, 100}, // query size + {0}, + {256}, + {1}, + {raft::distance::DistanceType::L2Expanded}, + {false}, + {true}, + {0.995}); + + return inputs; +} + +const std::vector inputs = generate_inputs(); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu new file mode 100644 index 0000000000..ea10e81a29 --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ann_cagra_vpq.cuh" + +#include + +namespace raft::neighbors::cagra { + +typedef AnnCagraVpqTest AnnCagraVpqTestF_I64; +TEST_P(AnnCagraVpqTestF_I64, AnnCagraVpq) { this->testCagra(); } + +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_I64, ::testing::ValuesIn(inputs)); + +} // namespace raft::neighbors::cagra From dab54e0532a5ac4099d3f47f76299bf934615c5d Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 18 Mar 2024 19:16:35 +0100 Subject: [PATCH 02/30] Fix broken usage of index.dataset() and the like after the merge of the updated version of CAGRA-Q API --- .../neighbors/detail/cagra/cagra_search.cuh | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 836710bc04..51c689543b 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -90,8 +90,8 @@ void search_main_core( CagraSampleFilterT sample_filter = CagraSampleFilterT()) { RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", - static_cast(index.dataset_view().extent(0)), - static_cast(index.dataset_view().extent(1))); + static_cast(index.data().n_rows()), + static_cast(index.data().dim())); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); @@ -254,15 +254,10 @@ void search_main(raft::resources const& res, reinterpret_cast(graph.data_handle()), graph.extent(0), graph.extent(1)); // n_rows has the same type as the dataset index (the array extents type) - using ds_idx_type = decltype(index.dataset().n_rows()); + using ds_idx_type = decltype(index.data().n_rows()); // Dispatch search parameters based on the dataset kind. - if (auto* strided_dset = dynamic_cast*>(&index.dataset()); + if (auto* strided_dset = dynamic_cast*>(&index.data()); strided_dset != nullptr) { - const auto& internal_dataset = make_device_strided_matrix_view( - index.dataset_view().data_handle(), - index.dataset_view().extent(0), - index.dataset_view().extent(1), - index.dataset_view().stride(0)); // Set TEAM_SIZE and DATASET_BLOCK_SIZE to zero tentatively since these parameters cannot be // determined here. They are set just before kernel launch. using dataset_desc_t = standard_dataset_descriptor_t; @@ -275,15 +270,15 @@ void search_main(raft::resources const& res, search_main_core( res, params, dataset_desc, graph_internal, queries, neighbors, distances, sample_filter); } else if (auto* vpq_dset = - dynamic_cast*>(&index.dataset()); + dynamic_cast*>(&index.data()); vpq_dset != nullptr) { // Search using a compressed dataset RAFT_FAIL("FP32 VPQ dataset support is coming soon"); - } else if (auto* vpq_dset = dynamic_cast*>(&index.dataset()); + } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); vpq_dset != nullptr) { lauch_vpq_search_main_core( res, vpq_dset, params, graph_internal, queries, neighbors, distances, sample_filter); - } else if (auto* empty_dset = dynamic_cast*>(&index.dataset()); + } else if (auto* empty_dset = dynamic_cast*>(&index.data()); empty_dset != nullptr) { // Forgot to add a dataset. RAFT_FAIL( From 22709e2ee106ca52aa010d2a27e6f7211f200229 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 18 Mar 2024 18:58:46 +0100 Subject: [PATCH 03/30] Add explicit instantiations for IVF-PQ search kernels used in tests (#2212) Compilation of IVF-PQ search kernels can be time consuming. In `libraft.so` the compilation is done in parallel for kernels without filtering and with `int64_t` index type. We have test with `uint32_t` index type as well as tests for `bitset_filter` with both 32 and 64 bit index types. This PR adds explicit template instantiations for the test. This way we avoid repeated compilation of the kernels with filter and this also enables parallel compilation of the `compute_similarity` kernel for different template types. The kernels with these additional type parameters are not added to `libraft.so`, only linked together with the test executable. Note that this PR does not increase the number of compiled kernels, but it enables to compile them in parallel. Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/2212 --- cpp/bench/prims/CMakeLists.txt | 8 + .../knn/ivf_pq_filter_float_int64_t.cu | 5 +- cpp/include/raft/core/detail/nvtx.hpp | 1 + .../raft/neighbors/detail/ivf_pq_build.cuh | 46 +++-- .../ivf_pq_compute_similarity_template.cuh | 71 +++++++ ...pq_compute_similarity_filters_test-ext.cuh | 181 ++++++++++++++++++ .../neighbors/ivf_pq_search_test-ext.cuh | 88 +++++++++ .../ivf_pq_compute_similarity_00_generate.py | 107 ++++------- .../ivf_pq_compute_similarity_float_float.cu | 57 +----- ...compute_similarity_float_float_bitset32.cu | 28 +++ ...compute_similarity_float_float_bitset64.cu | 28 +++ ...q_compute_similarity_float_float_filt32.cu | 28 +++ ...f_pq_compute_similarity_float_fp8_false.cu | 57 +----- ...ute_similarity_float_fp8_false_bitset32.cu | 28 +++ ...ute_similarity_float_fp8_false_bitset64.cu | 28 +++ ...mpute_similarity_float_fp8_false_filt32.cu | 28 +++ ...vf_pq_compute_similarity_float_fp8_true.cu | 57 +----- ...pute_similarity_float_fp8_true_bitset32.cu | 28 +++ ...pute_similarity_float_fp8_true_bitset64.cu | 28 +++ ...ompute_similarity_float_fp8_true_filt32.cu | 28 +++ .../ivf_pq_compute_similarity_float_half.cu | 57 +----- ..._compute_similarity_float_half_bitset32.cu | 28 +++ ..._compute_similarity_float_half_bitset64.cu | 28 +++ ...pq_compute_similarity_float_half_filt32.cu | 28 +++ ...vf_pq_compute_similarity_half_fp8_false.cu | 57 +----- ...pute_similarity_half_fp8_false_bitset32.cu | 28 +++ ...pute_similarity_half_fp8_false_bitset64.cu | 28 +++ ...ompute_similarity_half_fp8_false_filt32.cu | 28 +++ ...ivf_pq_compute_similarity_half_fp8_true.cu | 57 +----- ...mpute_similarity_half_fp8_true_bitset32.cu | 28 +++ ...mpute_similarity_half_fp8_true_bitset64.cu | 28 +++ ...compute_similarity_half_fp8_true_filt32.cu | 28 +++ .../ivf_pq_compute_similarity_half_half.cu | 57 +----- ...q_compute_similarity_half_half_bitset32.cu | 28 +++ ...q_compute_similarity_half_half_bitset64.cu | 28 +++ ..._pq_compute_similarity_half_half_filt32.cu | 28 +++ .../ivf_pq_search_filtering_float_int64_t.cu | 43 +++++ cpp/test/CMakeLists.txt | 24 +++ .../ann_ivf_pq/ivf_pq_build_float_uint32_t.cu | 37 ++++ .../ann_ivf_pq/ivf_pq_build_test-ext.cuh | 38 ++++ .../ivf_pq_search_float_uint32_t.cu | 68 +++++++ .../ann_ivf_pq/test_filter_float_int64_t.cu | 6 +- .../ann_ivf_pq/test_filter_int8_t_int64_t.cu | 6 +- .../ann_ivf_pq/test_float_uint32_t.cu | 12 +- .../ann_ivf_pq/test_int8_t_int64_t.cu | 3 +- 45 files changed, 1245 insertions(+), 486 deletions(-) create mode 100644 cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity_template.cuh create mode 100644 cpp/internal/raft_internal/neighbors/ivf_pq_compute_similarity_filters_test-ext.cuh create mode 100644 cpp/internal/raft_internal/neighbors/ivf_pq_search_test-ext.cuh create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_filt32.cu create mode 100644 cpp/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu create mode 100644 cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu create mode 100644 cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh create mode 100644 cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index 3a2431cd34..5577881ef7 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -156,6 +156,14 @@ if(BUILD_PRIMS_BENCH) bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu + src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu bench/prims/neighbors/refine_float_int64_t.cu bench/prims/neighbors/refine_uint8_t_int64_t.cu bench/prims/main.cpp diff --git a/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu b/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu index 9534515cbb..1840eca99d 100644 --- a/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu +++ b/cpp/bench/prims/neighbors/knn/ivf_pq_filter_float_int64_t.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,10 @@ * limitations under the License. */ -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Enable instantiation of search with filter #include "../knn.cuh" +#include +#include namespace raft::bench::spatial { KNN_REGISTER(float, int64_t, ivf_pq_filter_knn, kInputsFilter, kNoCopyOnly, kScopeFull); diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index 8afd1f16c6..82db75de84 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace raft::common::nvtx::detail { diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index b7796d52fa..8e3f7dbaf3 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -61,6 +61,8 @@ namespace raft::neighbors::ivf_pq::detail { using namespace raft::spatial::knn::detail; // NOLINT +using internal_extents_t = int64_t; // The default mdspan extent type used internally. + template __launch_bounds__(BlockDim) RAFT_KERNEL copy_warped_kernel( T* out, uint32_t ld_out, const S* in, uint32_t ld_in, uint32_t n_cols, size_t n_rows) @@ -442,15 +444,16 @@ void train_per_subset(raft::resources const& handle, stream); // train PQ codebook for this subspace - auto sub_trainset_view = - raft::make_device_matrix_view(sub_trainset.data(), n_rows, index.pq_len()); - auto centers_tmp_view = raft::make_device_matrix_view( + auto sub_trainset_view = raft::make_device_matrix_view( + sub_trainset.data(), n_rows, index.pq_len()); + auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + index.pq_book_size() * index.pq_len() * j, index.pq_book_size(), index.pq_len()); - auto sub_labels_view = raft::make_device_vector_view(sub_labels.data(), n_rows); - auto cluster_sizes_view = - raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); + auto sub_labels_view = + raft::make_device_vector_view(sub_labels.data(), n_rows); + auto cluster_sizes_view = raft::make_device_vector_view( + pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; kmeans_params.n_iters = kmeans_n_iters; kmeans_params.metric = raft::distance::DistanceType::L2Expanded; @@ -525,17 +528,17 @@ void train_per_cluster(raft::resources const& handle, size_t available_rows = size_t(cluster_size) * size_t(index.pq_dim()); auto pq_n_rows = uint32_t(std::min(big_enough, available_rows)); // train PQ codebook for this cluster - auto rot_vectors_view = raft::make_device_matrix_view( + auto rot_vectors_view = raft::make_device_matrix_view( rot_vectors.data(), pq_n_rows, index.pq_len()); - auto centers_tmp_view = raft::make_device_matrix_view( + auto centers_tmp_view = raft::make_device_matrix_view( pq_centers_tmp.data() + static_cast(index.pq_book_size()) * static_cast(index.pq_len()) * static_cast(l), index.pq_book_size(), index.pq_len()); auto pq_labels_view = - raft::make_device_vector_view(pq_labels.data(), pq_n_rows); - auto pq_cluster_sizes_view = - raft::make_device_vector_view(pq_cluster_sizes.data(), index.pq_book_size()); + raft::make_device_vector_view(pq_labels.data(), pq_n_rows); + auto pq_cluster_sizes_view = raft::make_device_vector_view( + pq_cluster_sizes.data(), index.pq_book_size()); raft::cluster::kmeans_balanced_params kmeans_params; kmeans_params.n_iters = kmeans_n_iters; kmeans_params.metric = raft::distance::DistanceType::L2Expanded; @@ -1587,11 +1590,11 @@ void extend(raft::resources const& handle, cudaMemcpyDefault, stream)); for (const auto& batch : vec_batches) { - auto batch_data_view = - raft::make_device_matrix_view(batch.data(), batch.size(), index->dim()); - auto batch_labels_view = raft::make_device_vector_view( + auto batch_data_view = raft::make_device_matrix_view( + batch.data(), batch.size(), index->dim()); + auto batch_labels_view = raft::make_device_vector_view( new_data_labels.data() + batch.offset(), batch.size()); - auto centers_view = raft::make_device_matrix_view( + auto centers_view = raft::make_device_matrix_view( cluster_centers.data(), n_clusters, index->dim()); raft::cluster::kmeans_balanced_params kmeans_params; kmeans_params.metric = index->metric(); @@ -1767,10 +1770,10 @@ auto build(raft::resources const& handle, auto cluster_centers = cluster_centers_buf.data(); // Train balanced hierarchical kmeans clustering - auto trainset_const_view = - raft::make_device_matrix_view(trainset.data(), n_rows_train, index.dim()); - auto centers_view = - raft::make_device_matrix_view(cluster_centers, index.n_lists(), index.dim()); + auto trainset_const_view = raft::make_device_matrix_view( + trainset.data(), n_rows_train, index.dim()); + auto centers_view = raft::make_device_matrix_view( + cluster_centers, index.n_lists(), index.dim()); raft::cluster::kmeans_balanced_params kmeans_params; kmeans_params.n_iters = params.kmeans_n_iters; kmeans_params.metric = index.metric(); @@ -1779,9 +1782,10 @@ auto build(raft::resources const& handle, // Trainset labels are needed for training PQ codebooks rmm::device_uvector labels(n_rows_train, stream, device_memory); - auto centers_const_view = raft::make_device_matrix_view( + auto centers_const_view = raft::make_device_matrix_view( cluster_centers, index.n_lists(), index.dim()); - auto labels_view = raft::make_device_vector_view(labels.data(), n_rows_train); + auto labels_view = + raft::make_device_vector_view(labels.data(), n_rows_train); raft::cluster::kmeans_balanced::predict(handle, kmeans_params, trainset_const_view, diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity_template.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity_template.cuh new file mode 100644 index 0000000000..83dd994bd6 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity_template.cuh @@ -0,0 +1,71 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is to be used in source files generated by + * src/neighbors/detailivf_pq_compute_similarity_00_generate.py + */ + +#pragma once + +#include +#include +#include + +#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + template auto \ + raft::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) \ + ->raft::neighbors::ivf_pq::detail::selected; \ + \ + template void \ + raft::neighbors::ivf_pq::detail::compute_similarity_run( \ + raft::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + raft::distance::DistanceType metric, \ + raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ + uint32_t* _out_indices); + +#define COMMA , diff --git a/cpp/internal/raft_internal/neighbors/ivf_pq_compute_similarity_filters_test-ext.cuh b/cpp/internal/raft_internal/neighbors/ivf_pq_compute_similarity_filters_test-ext.cuh new file mode 100644 index 0000000000..aa14ab19b8 --- /dev/null +++ b/cpp/internal/raft_internal/neighbors/ivf_pq_compute_similarity_filters_test-ext.cuh @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // RAFT_WEAK_FUNCTION +#include // raft::distance::DistanceType +#include +#include // raft::neighbors::ivf_pq::detail::fp_8bit +#include // none_ivf_sample_filter +#include // none_ivf_sample_filter + +#include // rmm::cuda_stream_view + +#include // __half + +#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ + OutT, LutT, IvfSampleFilterT) \ + extern template auto \ + raft::neighbors::ivf_pq::detail::compute_similarity_select( \ + const cudaDeviceProp& dev_props, \ + bool manage_local_topk, \ + int locality_hint, \ + double preferred_shmem_carveout, \ + uint32_t pq_bits, \ + uint32_t pq_dim, \ + uint32_t precomp_data_count, \ + uint32_t n_queries, \ + uint32_t n_probes, \ + uint32_t topk) \ + ->raft::neighbors::ivf_pq::detail::selected; \ + \ + extern template void \ + raft::neighbors::ivf_pq::detail::compute_similarity_run( \ + raft::neighbors::ivf_pq::detail::selected s, \ + rmm::cuda_stream_view stream, \ + uint32_t dim, \ + uint32_t n_probes, \ + uint32_t pq_dim, \ + uint32_t n_queries, \ + uint32_t queries_offset, \ + raft::distance::DistanceType metric, \ + raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ + uint32_t topk, \ + uint32_t max_samples, \ + const float* cluster_centers, \ + const float* pq_centers, \ + const uint8_t* const* pq_dataset, \ + const uint32_t* cluster_labels, \ + const uint32_t* _chunk_indices, \ + const float* queries, \ + const uint32_t* index_list, \ + float* query_kths, \ + IvfSampleFilterT sample_filter, \ + LutT* lut_scores, \ + OutT* _out_scores, \ + uint32_t* _out_indices); + +#define COMMA , +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + float, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); + +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + float, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + float, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); +#undef COMMA + +#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/internal/raft_internal/neighbors/ivf_pq_search_test-ext.cuh b/cpp/internal/raft_internal/neighbors/ivf_pq_search_test-ext.cuh new file mode 100644 index 0000000000..7a65e2d2f8 --- /dev/null +++ b/cpp/internal/raft_internal/neighbors/ivf_pq_search_test-ext.cuh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // raft::device_matrix_view +#include // raft::resources +#include +#include // raft::neighbors::ivf_pq::index +#include +#include + +#include + +#include // int64_t + +#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT) \ + extern template void raft::neighbors::ivf_pq::search( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::search_params& params, \ + const raft::neighbors::ivf_pq::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + extern template void raft::neighbors::ivf_pq::search( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::search_params& params, \ + const raft::neighbors::ivf_pq::index& idx, \ + const T* queries, \ + uint32_t n_queries, \ + uint32_t k, \ + IdxT* neighbors, \ + float* distances, \ + rmm::mr::device_memory_resource* mr); \ + \ + extern template void raft::neighbors::ivf_pq::search( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::search_params& params, \ + const raft::neighbors::ivf_pq::index& idx, \ + const T* queries, \ + uint32_t n_queries, \ + uint32_t k, \ + IdxT* neighbors, \ + float* distances) + +instantiate_raft_neighbors_ivf_pq_search(float, uint32_t); + +#undef instantiate_raft_neighbors_ivf_pq_search + +#define instantiate_raft_neighbors_ivf_pq_search_with_filtering(T, IdxT, FilterT) \ + extern template void raft::neighbors::ivf_pq::search_with_filtering( \ + raft::resources const& handle, \ + const search_params& params, \ + const index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + FilterT sample_filter) + +#define COMMA , +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + float, uint32_t, raft::neighbors::filtering::bitset_filter); + +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + float, uint32_t, raft::neighbors::filtering::none_ivf_sample_filter); + +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + float, int64_t, raft::neighbors::filtering::bitset_filter); + +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + int8_t, int64_t, raft::neighbors::filtering::bitset_filter); + +#undef COMMA +#undef instantiate_raft_neighbors_ivf_pq_search_with_filtering diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py index 670ed57ed1..9825a48f81 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_00_generate.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,78 +30,56 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ - -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select(OutT, LutT, IvfSampleFilterT) \\ - template auto raft::neighbors::ivf_pq::detail::compute_similarity_select( \\ - const cudaDeviceProp& dev_props, \\ - bool manage_local_topk, \\ - int locality_hint, \\ - double preferred_shmem_carveout, \\ - uint32_t pq_bits, \\ - uint32_t pq_dim, \\ - uint32_t precomp_data_count, \\ - uint32_t n_queries, \\ - uint32_t n_probes, \\ - uint32_t topk) -> raft::neighbors::ivf_pq::detail::selected; \\ -\\ - template void raft::neighbors::ivf_pq::detail::compute_similarity_run( \\ - raft::neighbors::ivf_pq::detail::selected s, \\ - rmm::cuda_stream_view stream, \\ - uint32_t dim, \\ - uint32_t n_probes, \\ - uint32_t pq_dim, \\ - uint32_t n_queries, \\ - uint32_t queries_offset, \\ - raft::distance::DistanceType metric, \\ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \\ - uint32_t topk, \\ - uint32_t max_samples, \\ - const float* cluster_centers, \\ - const float* pq_centers, \\ - const uint8_t* const* pq_dataset, \\ - const uint32_t* cluster_labels, \\ - const uint32_t* _chunk_indices, \\ - const float* queries, \\ - const uint32_t* index_list, \\ - float* query_kths, \\ - IvfSampleFilterT sample_filter, \\ - LutT* lut_scores, \\ - OutT* _out_scores, \\ - uint32_t* _out_indices); - - -#define COMMA , + +#include """ -trailer = """ -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select -""" +none_filter_int64 = "raft::neighbors::filtering::ivf_to_sample_filter" \ + "" +none_filter_int32 = "raft::neighbors::filtering::ivf_to_sample_filter" \ + "" +bitset_filter32 = "raft::neighbors::filtering::ivf_to_sample_filter" \ + ">" +bitset_filter64 = "raft::neighbors::filtering::ivf_to_sample_filter" \ + ">" types = dict( - half_fp8_false=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>"), - half_fp8_true=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>"), - half_half=("half", "half"), - float_half=("float", "half"), - float_float= ("float", "float"), - float_fp8_false=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>"), - float_fp8_true=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>"), + half_fp8_false=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", none_filter_int64), + half_fp8_true=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", none_filter_int64), + half_half=("half", "half", none_filter_int64), + float_half=("float", "half", none_filter_int64), + float_float= ("float", "float", none_filter_int64), + float_fp8_false=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", none_filter_int64), + float_fp8_true=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", none_filter_int64), + half_fp8_false_filt32=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", none_filter_int32), + half_fp8_true_filt32=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", none_filter_int32), + half_half_filt32=("half", "half", none_filter_int32), + float_half_filt32=("float", "half", none_filter_int32), + float_float_filt32= ("float", "float", none_filter_int32), + float_fp8_false_filt32=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", none_filter_int32), + float_fp8_true_filt32=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", none_filter_int32), + half_fp8_false_bitset32=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", bitset_filter32), + half_fp8_true_bitset32=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", bitset_filter32), + half_half_bitset32=("half", "half", bitset_filter32), + float_half_bitset32=("float", "half", bitset_filter32), + float_float_bitset32= ("float", "float", bitset_filter32), + float_fp8_false_bitset32=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", bitset_filter32), + float_fp8_true_bitset32=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", bitset_filter32), + half_fp8_false_bitset64=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", bitset_filter64), + half_fp8_true_bitset64=("half", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", bitset_filter64), + half_half_bitset64=("half", "half", bitset_filter64), + float_half_bitset64=("float", "half", bitset_filter64), + float_float_bitset64= ("float", "float", bitset_filter64), + float_fp8_false_bitset64=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>", bitset_filter64), + float_fp8_true_bitset64=("float", "raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>", bitset_filter64) ) -for path_key, (OutT, LutT) in types.items(): +for path_key, (OutT, LutT, FilterT) in types.items(): path = f"ivf_pq_compute_similarity_{path_key}.cu" with open(path, "w") as f: f.write(header) - f.write(f"instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT}, raft::neighbors::filtering::ivf_to_sample_filter);\n") - f.write(trailer) + f.write(f"instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select({OutT}, {LutT}, {FilterT});\n") print(f"src/neighbors/detail/{path}") diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu index 7e17d6822a..db51608ae1 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( float, float, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset32.cu new file mode 100644 index 0000000000..caaf40abdf --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + float, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu new file mode 100644 index 0000000000..7801c25e9f --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + float, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_filt32.cu new file mode 100644 index 0000000000..45ae348849 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_float_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + float, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu index c1b72dab33..2f5bcf8f92 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset32.cu new file mode 100644 index 0000000000..e7f2c44254 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu new file mode 100644 index 0000000000..01b6900bb8 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_filt32.cu new file mode 100644 index 0000000000..9f8d453364 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu index fdff0860fc..06d21bcd50 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( float, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset32.cu new file mode 100644 index 0000000000..8b733a23c1 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu new file mode 100644 index 0000000000..77e4f9a023 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_filt32.cu new file mode 100644 index 0000000000..3e036e3df4 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu index 7205544370..ff42f5e041 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( float, half, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset32.cu new file mode 100644 index 0000000000..40b6313865 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu new file mode 100644 index 0000000000..9cedabdb11 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_filt32.cu new file mode 100644 index 0000000000..61422bbc36 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_float_half_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + float, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu index 2ac6c3527b..d2064cfe97 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset32.cu new file mode 100644 index 0000000000..1127f39f71 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu new file mode 100644 index 0000000000..0330bf58d6 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_filt32.cu new file mode 100644 index 0000000000..d20f7921d5 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu index 70f3ffdb0c..9dc954406e 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( half, raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset32.cu new file mode 100644 index 0000000000..9131fa25a8 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu new file mode 100644 index 0000000000..8b4521b31b --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_filt32.cu new file mode 100644 index 0000000000..71b63cf4a0 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu index 5cc1cb8038..f527d879be 100644 --- a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,65 +16,13 @@ /* * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py - * * Make changes there and run in this directory: - * * > python ivf_pq_compute_similarity_00_generate.py - * */ -#include -#include - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , +#include instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( half, half, raft::neighbors::filtering::ivf_to_sample_filter< int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset32.cu new file mode 100644 index 0000000000..8e1962e2bb --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu new file mode 100644 index 0000000000..e9671703e7 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + int64_t COMMA raft::neighbors::filtering::bitset_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_filt32.cu b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_filt32.cu new file mode 100644 index 0000000000..b66a07d1a9 --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_compute_similarity_half_half_filt32.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by ivf_pq_compute_similarity_00_generate.py + * Make changes there and run in this directory: + * > python ivf_pq_compute_similarity_00_generate.py + */ + +#include +instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( + half, + half, + raft::neighbors::filtering::ivf_to_sample_filter< + uint32_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); diff --git a/cpp/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu b/cpp/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu new file mode 100644 index 0000000000..39af78f12e --- /dev/null +++ b/cpp/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include // raft::device_matrix_view +#include // raft::resources +#include +#include // raft::neighbors::ivf_pq::index +#include +#include + +#include + +#include // int64_t + +#define instantiate_raft_neighbors_ivf_pq_search_with_filtering(T, IdxT, FilterT) \ + template void raft::neighbors::ivf_pq::search_with_filtering( \ + raft::resources const& handle, \ + const search_params& params, \ + const index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + FilterT sample_filter) + +#define COMMA , +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + float, int64_t, raft::neighbors::filtering::bitset_filter); + +#undef COMMA +#undef instantiate_raft_neighbors_ivf_pq_search_with_filtering diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 4c40c6385b..9bb9a8d54c 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -402,6 +402,30 @@ if(BUILD_TESTS) test/neighbors/ann_ivf_flat/test_float_int64_t.cu test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu + test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu + src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_float_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_half_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_half_filt32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset32.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu + src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu test/neighbors/ann_ivf_pq/test_float_uint32_t.cu test/neighbors/ann_ivf_pq/test_float_int64_t.cu test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu diff --git a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu new file mode 100644 index 0000000000..5ba21c3c2f --- /dev/null +++ b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_float_uint32_t.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include // raft::neighbors::ivf_pq::index +#include + +#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT) \ + template raft::neighbors::ivf_pq::index raft::neighbors::ivf_pq::build( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset); \ + \ + template auto raft::neighbors::ivf_pq::build( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim) \ + ->raft::neighbors::ivf_pq::index; + +instantiate_raft_neighbors_ivf_pq_build(float, uint32_t); + +#undef instantiate_raft_neighbors_ivf_pq_build diff --git a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh new file mode 100644 index 0000000000..cd5435ab2e --- /dev/null +++ b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_build_test-ext.cuh @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include // raft::neighbors::ivf_pq::index +#include + +#define instantiate_raft_neighbors_ivf_pq_build(T, IdxT) \ + extern template raft::neighbors::ivf_pq::index raft::neighbors::ivf_pq::build( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + raft::device_matrix_view dataset); \ + \ + extern template auto raft::neighbors::ivf_pq::build( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::index_params& params, \ + const T* dataset, \ + IdxT n_rows, \ + uint32_t dim) \ + ->raft::neighbors::ivf_pq::index; + +instantiate_raft_neighbors_ivf_pq_build(float, uint32_t); + +#undef instantiate_raft_neighbors_ivf_pq_build diff --git a/cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu new file mode 100644 index 0000000000..942d0fcc44 --- /dev/null +++ b/cpp/test/neighbors/ann_ivf_pq/ivf_pq_search_float_uint32_t.cu @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include // raft::neighbors::ivf_pq::index +#include + +#include + +#define instantiate_raft_neighbors_ivf_pq_search(T, IdxT) \ + template void raft::neighbors::ivf_pq::search( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::search_params& params, \ + const raft::neighbors::ivf_pq::index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances); \ + \ + template void raft::neighbors::ivf_pq::search( \ + raft::resources const& handle, \ + const raft::neighbors::ivf_pq::search_params& params, \ + const raft::neighbors::ivf_pq::index& idx, \ + const T* queries, \ + uint32_t n_queries, \ + uint32_t k, \ + IdxT* neighbors, \ + float* distances, \ + rmm::mr::device_memory_resource* mr) + +instantiate_raft_neighbors_ivf_pq_search(float, uint32_t); + +#undef instantiate_raft_neighbors_ivf_pq_search + +#define instantiate_raft_neighbors_ivf_pq_search_with_filtering(T, IdxT, FilterT) \ + template void raft::neighbors::ivf_pq::search_with_filtering( \ + raft::resources const& handle, \ + const search_params& params, \ + const index& idx, \ + raft::device_matrix_view queries, \ + raft::device_matrix_view neighbors, \ + raft::device_matrix_view distances, \ + FilterT sample_filter) + +#define COMMA , +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + float, uint32_t, raft::neighbors::filtering::bitset_filter); + +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + int8_t, int64_t, raft::neighbors::filtering::bitset_filter); + +instantiate_raft_neighbors_ivf_pq_search_with_filtering( + float, uint32_t, raft::neighbors::filtering::none_ivf_sample_filter); + +#undef COMMA +#undef instantiate_raft_neighbors_ivf_pq_search_with_filtering diff --git a/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu index 17f72fb08a..70d5d8761f 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_filter_float_int64_t.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,11 @@ * limitations under the License. */ -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Enable instantiation of search with filter #include "../ann_ivf_pq.cuh" +#include +#include + namespace raft::neighbors::ivf_pq { using f32_f32_i64_filter = ivf_pq_filter_test; diff --git a/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu index 537dbb4979..ba96a8db0b 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_filter_int8_t_int64_t.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,11 @@ * limitations under the License. */ -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Enable instantiation of search with filter #include "../ann_ivf_pq.cuh" +#include +#include + namespace raft::neighbors::ivf_pq { using f32_i08_i64_filter = ivf_pq_filter_test; diff --git a/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu index a6cfab1f19..b8ada2249a 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_float_uint32_t.cu @@ -14,15 +14,11 @@ * limitations under the License. */ -// XXX: the uint32_t instance is not compiled in libraft.so. So we allow -// instantiating the template here. -// -// TODO: consider removing this test or consider adding an instantiation to the -// library. - -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY - #include "../ann_ivf_pq.cuh" +#include "ivf_pq_build_test-ext.cuh" + +#include +#include namespace raft::neighbors::ivf_pq { diff --git a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu b/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu index 014e96a2db..970bdd6a12 100644 --- a/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu +++ b/cpp/test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "../ann_ivf_pq.cuh" +#include namespace raft::neighbors::ivf_pq { using f32_i08_i64 = ivf_pq_test; From 80c45f1d4ea2d7b8de300cd54d0e4825e22fcf46 Mon Sep 17 00:00:00 2001 From: achirkin Date: Mon, 18 Mar 2024 19:22:04 +0100 Subject: [PATCH 04/30] Fix style errors --- cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 51c689543b..12d334c7d3 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -269,8 +269,7 @@ void search_main(raft::resources const& res, search_main_core( res, params, dataset_desc, graph_internal, queries, neighbors, distances, sample_filter); - } else if (auto* vpq_dset = - dynamic_cast*>(&index.data()); + } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); vpq_dset != nullptr) { // Search using a compressed dataset RAFT_FAIL("FP32 VPQ dataset support is coming soon"); From 49b6b61210dd52a25ff6b747d5f2471989f4b638 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 18 Mar 2024 18:58:46 +0100 Subject: [PATCH 05/30] Add explicit instantiations for IVF-PQ search kernels used in tests (#2212) Compilation of IVF-PQ search kernels can be time consuming. In `libraft.so` the compilation is done in parallel for kernels without filtering and with `int64_t` index type. We have test with `uint32_t` index type as well as tests for `bitset_filter` with both 32 and 64 bit index types. This PR adds explicit template instantiations for the test. This way we avoid repeated compilation of the kernels with filter and this also enables parallel compilation of the `compute_similarity` kernel for different template types. The kernels with these additional type parameters are not added to `libraft.so`, only linked together with the test executable. Note that this PR does not increase the number of compiled kernels, but it enables to compile them in parallel. Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/raft/pull/2212 From a69f66d7a73a150616e1c2c457c541375d0add2c Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 19 Mar 2024 16:47:56 +0900 Subject: [PATCH 06/30] Add uint32 VPQ test --- cpp/test/CMakeLists.txt | 1 + .../ann_cagra_vpq/test_float_uint32_t.cu | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 9bb9a8d54c..e1d32ade8d 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -370,6 +370,7 @@ if(BUILD_TESTS) test/neighbors/ann_cagra/test_float_int64_t.cu test/neighbors/ann_cagra/test_half_int64_t.cu test/neighbors/ann_cagra_vpq/test_float_int64_t.cu + test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu new file mode 100644 index 0000000000..f3477a4ccb --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ann_cagra_vpq.cuh" + +#include + +namespace raft::neighbors::cagra { + +typedef AnnCagraVpqTest AnnCagraVpqTestF_U32; +TEST_P(AnnCagraVpqTestF_U32, AnnCagraVpq) { this->testCagra(); } + +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_U32, ::testing::ValuesIn(inputs)); + +} // namespace raft::neighbors::cagra From 2d3544613da80782178078dd3a34c71f92ab5812 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 19 Mar 2024 16:48:21 +0900 Subject: [PATCH 07/30] Update VPQ test --- cpp/test/neighbors/ann_cagra_vpq.cuh | 89 +++++++++++++++++++++------- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh index 476df406bc..7d5176a471 100644 --- a/cpp/test/neighbors/ann_cagra_vpq.cuh +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -43,9 +43,50 @@ #include #include +#include #include #include +namespace { +template +void GenerateDataset(T* const dataset_ptr, + T* const query_ptr, + const std::size_t dataset_size, + const std::size_t query_size, + const std::size_t dim, + const std::size_t num_centers, + cudaStream_t cuda_stream) +{ + auto center_list = raft::make_host_matrix(num_centers, dim); + auto host_dataset = raft::make_host_matrix(std::max(dataset_size, query_size), dim); + + std::normal_distribution dist(0, 1); + std::mt19937 mt(0); + for (std::size_t i = 0; i < center_list.size(); i++) { + center_list.data_handle()[i] = dist(mt); + } + + std::uniform_int_distribution i_dist(0, num_centers - 1); + for (std::size_t i = 0; i < dataset_size; i++) { + const auto center_index = i_dist(mt); + for (std::size_t j = 0; j < dim; j++) { + host_dataset.data_handle()[i * dim + j] = + center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; + } + } + raft::copy(dataset_ptr, host_dataset.data_handle(), dataset_size * dim, cuda_stream); + + for (std::size_t i = 0; i < query_size; i++) { + const auto center_index = i_dist(mt); + for (std::size_t j = 0; j < dim; j++) { + host_dataset.data_handle()[i * dim + j] = + center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; + } + } + raft::copy(query_ptr, host_dataset.data_handle(), query_size * dim, cuda_stream); +} +} // namespace + namespace raft::neighbors::cagra { struct AnnCagraVpqInputs { int n_queries; @@ -72,7 +113,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraVpqInputs& p std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim - << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", subspace_dim=" << p.subspace_dim + << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", subspace_dim=" << p.subspace_dim << ", " << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") @@ -118,7 +159,7 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { resource::sync_stream(handle_); } - const auto vpq_k = ps.k * 2; + const auto vpq_k = ps.k * 16; { rmm::device_uvector distances_dev(vpq_k * ps.n_queries, stream_); rmm::device_uvector indices_dev(vpq_k * ps.n_queries, stream_); @@ -201,6 +242,16 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { ps.metric); resource::sync_stream(handle_); + + raft::copy(indices_dev.data(), + host_indices_Cagra_view.data_handle(), + ps.k * ps.n_queries, + stream_); + raft::copy(distances_dev.data(), + host_dists_Cagra_view.data_handle(), + ps.k * ps.n_queries, + stream_); + resource::sync_stream(handle_); } } @@ -238,17 +289,13 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { { database.resize(((size_t)ps.n_rows) * ps.dim, stream_); search_queries.resize(ps.n_queries * ps.dim, stream_); - raft::random::RngState r(1234ULL); - if constexpr (std::is_same_v || std::is_same_v) { - raft::random::uniform(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); - raft::random::uniform( - handle_, r, search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20)); - } else { - raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); - raft::random::uniformInt( - handle_, r, search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20)); - } + GenerateDataset(database.data(), + search_queries.data(), + ps.n_rows, + ps.n_queries, + ps.dim, + static_cast(std::sqrt(ps.n_rows)), + stream_); resource::sync_stream(handle_); } @@ -272,21 +319,21 @@ inline std::vector generate_inputs() // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries std::vector inputs = raft::util::itertools::product( {100}, - {1000}, - {128}, - {16}, // k - {2, 4}, // subspace dim - {8}, // PQ bit + {1000, 10000}, // datsset size + {128, 256}, // dataset dim + {8, 12}, // k + {2, 4}, // subspace dim + {8}, // PQ bit {graph_build_algo::NN_DESCENT}, {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, - {0, 1, 10, 100}, // query size + {0}, // query size {0}, - {256}, + {512}, {1}, {raft::distance::DistanceType::L2Expanded}, {false}, {true}, - {0.995}); + {0.8}); return inputs; } From 678f76749fb5875fee02ec3f4ca4619adfa83f6f Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 19 Mar 2024 13:47:14 +0100 Subject: [PATCH 08/30] Cleanup the tests a little bit, add a sanity check for the index type --- cpp/test/neighbors/ann_cagra_vpq.cuh | 80 +++++++++---------- .../ann_cagra_vpq/test_float_int64_t.cu | 4 +- .../ann_cagra_vpq/test_float_uint32_t.cu | 4 +- cpp/test/neighbors/ann_utils.cuh | 2 +- 4 files changed, 42 insertions(+), 48 deletions(-) diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh index 7d5176a471..a1c33e5a52 100644 --- a/cpp/test/neighbors/ann_cagra_vpq.cuh +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -93,7 +93,7 @@ struct AnnCagraVpqInputs { int n_rows; int dim; int k; - int subspace_dim; + int pq_len; int pq_bits; graph_build_algo build_algo; search_algo algo; @@ -113,7 +113,7 @@ inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraVpqInputs& p std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim - << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", subspace_dim=" << p.subspace_dim << ", " + << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", pq_len=" << p.pq_len << ", " << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) << (p.host_dataset ? ", host" : ", device") @@ -165,10 +165,14 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { rmm::device_uvector indices_dev(vpq_k * ps.n_queries, stream_); { + if ((ps.dim % ps.pq_len) != 0) { + // TODO: remove this requirement in the algorithm. + GTEST_SKIP() << "(TODO) At the moment dim, (" << ps.dim + << ") must be a multiple of pq_len (" << ps.pq_len << ")"; + } cagra::index_params index_params; - index_params.compression = - vpq_params{.pq_bits = static_cast(ps.pq_bits), - .pq_dim = static_cast(ps.dim / ps.subspace_dim)}; + index_params.compression = vpq_params{.pq_bits = static_cast(ps.pq_bits), + .pq_dim = static_cast(ps.dim / ps.pq_len)}; index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is // not used for knn_graph building. index_params.build_algo = ps.build_algo; @@ -178,8 +182,8 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { search_params.team_size = ps.team_size; search_params.itopk_size = ps.itopk_size; - auto database_view = raft::make_device_matrix_view( - (const DataT*)database.data(), ps.n_rows, ps.dim); + auto database_view = + raft::make_device_matrix_view(database.data(), ps.n_rows, ps.dim); { cagra::index index(handle_); @@ -187,7 +191,7 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); auto database_host_view = raft::make_host_matrix_view( - (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + database_host.data_handle(), ps.n_rows, ps.dim); index = cagra::build(handle_, index_params, database_host_view); } else { index = cagra::build(handle_, index_params, database_view); @@ -198,6 +202,12 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { auto index = cagra::deserialize(handle_, "cagra_index"); if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + // CAGRA-Q sanity check: we've built the right index type + auto* vpq_dataset = + dynamic_cast*>(&index.data()); + EXPECT_NE(vpq_dataset, nullptr) + << "Expected VPQ dataset, because we're testing CAGRA-Q here."; + auto search_queries_view = raft::make_device_matrix_view( search_queries.data(), ps.n_queries, ps.dim); auto indices_out_view = @@ -241,8 +251,6 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { host_dists_Cagra_view, ps.metric); - resource::sync_stream(handle_); - raft::copy(indices_dev.data(), host_indices_Cagra_view.data_handle(), ps.k * ps.n_queries, @@ -255,13 +263,6 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { } } - // for (int i = 0; i < min(ps.n_queries, 10); i++) { - // // std::cout << "query " << i << std::end; - // print_vector("T", indices_naive.data() + i * ps.k, ps.k, std::cout); - // print_vector("C", indices_Cagra.data() + i * ps.k, ps.k, std::cout); - // print_vector("T", distances_naive.data() + i * ps.k, ps.k, std::cout); - // print_vector("C", distances_Cagra.data() + i * ps.k, ps.k, std::cout); - // } double min_recall = ps.min_recall; EXPECT_TRUE(eval_neighbours(indices_naive, indices_Cagra, @@ -314,30 +315,23 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { rmm::device_uvector search_queries; }; -inline std::vector generate_inputs() -{ - // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries - std::vector inputs = raft::util::itertools::product( - {100}, - {1000, 10000}, // datsset size - {128, 256}, // dataset dim - {8, 12}, // k - {2, 4}, // subspace dim - {8}, // PQ bit - {graph_build_algo::NN_DESCENT}, - {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, - {0}, // query size - {0}, - {512}, - {1}, - {raft::distance::DistanceType::L2Expanded}, - {false}, - {true}, - {0.8}); - - return inputs; -} - -const std::vector inputs = generate_inputs(); +const std::vector vpq_inputs = raft::util::itertools::product( + {100}, // n_queries + {1000, 10000}, // n_rows + {128, 256}, // dim + {8, 12}, // k + {2, 4}, // pq_len + {8}, // pq_bits + {graph_build_algo::NN_DESCENT}, // build_algo + {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, // algo + {0}, // max_queries + {0}, // team_size + {512}, // itopk_size + {1}, // search_width + {raft::distance::DistanceType::L2Expanded}, // metric + {false}, // host_dataset + {true}, // include_serialized_dataset + {0.8} // min_recall +); } // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu index ea10e81a29..7f89df3af1 100644 --- a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,6 @@ namespace raft::neighbors::cagra { typedef AnnCagraVpqTest AnnCagraVpqTestF_I64; TEST_P(AnnCagraVpqTestF_I64, AnnCagraVpq) { this->testCagra(); } -INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_I64, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_I64, ::testing::ValuesIn(vpq_inputs)); } // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu index f3477a4ccb..19d3f32250 100644 --- a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,6 @@ namespace raft::neighbors::cagra { typedef AnnCagraVpqTest AnnCagraVpqTestF_U32; TEST_P(AnnCagraVpqTestF_U32, AnnCagraVpq) { this->testCagra(); } -INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_U32, ::testing::ValuesIn(vpq_inputs)); } // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index afd083d512..a3373d79fd 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -265,7 +265,7 @@ auto eval_distances(raft::resources const& handle, raft::matrix::copy_rows( handle, - make_device_matrix_view(x, k, n_cols), + make_device_matrix_view(x, n_rows, n_cols), y.view(), make_device_vector_view(neighbors + i * k, k)); From 09db075912411a132b40c138807b6d62bef2c28c Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:46:31 +0100 Subject: [PATCH 09/30] Update cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp Co-authored-by: Tamas Bela Feher --- cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 752d729da7..7f87988101 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -274,7 +274,9 @@ struct standard_dataset_descriptor_t #pragma unroll for (uint32_t v = 0; v < vlen; v++) { const uint32_t kv = k + v; - // if (kv >= dataset_dim) break; + // Note this loop can go above the dataset_dim for padded arrays. This is not a problem because: + // - Above the last element (dataset_dim-1), the query array is filled with zeros. + // - The data buffer has to be also padded with zeros. DISTANCE_T diff = query_ptr[device::swizzling(kv)]; diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); norm2 += diff * diff; From 4eca385997ed932f21669a0c6599bc8bfa61e3dd Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 19 Mar 2024 23:08:27 +0900 Subject: [PATCH 10/30] Output an error message if multi_kernel and vpq are specified --- cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp | 3 ++- .../raft/neighbors/detail/cagra/search_multi_kernel.cuh | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 7f87988101..f7816468de 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -274,7 +274,8 @@ struct standard_dataset_descriptor_t #pragma unroll for (uint32_t v = 0; v < vlen; v++) { const uint32_t kv = k + v; - // Note this loop can go above the dataset_dim for padded arrays. This is not a problem because: + // Note this loop can go above the dataset_dim for padded arrays. This is not a problem + // because: // - Above the last element (dataset_dim-1), the query array is filled with zeros. // - The data buffer has to be also padded with zeros. DISTANCE_T diff = query_ptr[device::swizzling(kv)]; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 0098c5d844..05a02f7384 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -1030,6 +1030,7 @@ struct search(res, params, dim, graph_degree, topk) { + THROW("The multi-kernel mode does not support VPQ"); } void set_params(raft::resources const& res) {} From d321873b5dacc5f19adff26120656aa88396fd11 Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 19 Mar 2024 15:26:34 +0100 Subject: [PATCH 11/30] Use a raft helper for ceiling division --- .../raft/neighbors/detail/cagra/compute_distance_vpq.cuh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index 642047af6a..b6d21f2e52 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -15,8 +15,11 @@ */ #pragma once + #include "compute_distance.hpp" +#include + namespace raft::neighbors::cagra::detail { template ( encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id))); if (PQ_BITS == 8) { - constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** - constexpr unsigned nelem = - ((DATASET_BLOCK_DIM / PQ_CODE_BOOK_DIM) + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); + constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** + constexpr unsigned nelem = raft::div_rounding_up_unsafe( + DATASET_BLOCK_DIM / PQ_CODE_BOOK_DIM, TEAM_SIZE * vlen); // Loading PQ codes uint32_t pq_codes[nelem]; #pragma unroll From 11f9350f5652679eb9a51ffa662c120efa54acb7 Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 19 Mar 2024 15:27:58 +0100 Subject: [PATCH 12/30] Move the instance macro to a separate header to reduce the codesize --- .../cagra/q_search_multi_cta_00_generate.py | 45 ++-------------- ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 41 ++------------- ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 41 ++------------- ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 41 ++------------- ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 41 ++------------- ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 41 ++------------- ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 41 ++------------- ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 41 ++------------- ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 41 ++------------- ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 41 ++------------- ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 41 ++------------- ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 41 ++------------- ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 41 ++------------- ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 41 ++------------- ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 41 ++------------- ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 41 ++------------- ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 41 ++------------- ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 41 ++------------- ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 41 ++------------- ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 41 ++------------- ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 41 ++------------- ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 41 ++------------- ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 41 ++------------- ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 41 ++------------- ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 41 ++------------- ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 41 ++------------- ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 41 ++------------- ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 41 ++------------- ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 41 ++------------- ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 41 ++------------- ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 41 ++------------- ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 41 ++------------- ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 41 ++------------- ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 41 ++------------- ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 41 ++------------- ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 41 ++------------- ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 41 ++------------- ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 41 ++------------- ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 41 ++------------- ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 41 ++------------- ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 41 ++------------- ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 41 ++------------- ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 41 ++------------- ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 41 ++------------- ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 41 ++------------- ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 41 ++------------- ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 41 ++------------- ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 41 ++------------- ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 41 ++------------- .../cagra/q_search_single_cta_00_generate.py | 48 +++-------------- ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 45 ++-------------- ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 45 ++-------------- ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 45 ++-------------- ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 45 ++-------------- ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 45 ++-------------- ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 45 ++-------------- ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 45 ++-------------- ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 45 ++-------------- ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 45 ++-------------- ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 45 ++-------------- ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 45 ++-------------- ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 45 ++-------------- ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 45 ++-------------- ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 45 ++-------------- ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 45 ++-------------- ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 45 ++-------------- ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 45 ++-------------- ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 45 ++-------------- ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 45 ++-------------- ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 45 ++-------------- ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 45 ++-------------- ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 45 ++-------------- ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 45 ++-------------- ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 45 ++-------------- ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 45 ++-------------- ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 45 ++-------------- ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 45 ++-------------- ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 45 ++-------------- ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 45 ++-------------- ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 45 ++-------------- ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 45 ++-------------- ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 45 ++-------------- ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 45 ++-------------- ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 45 ++-------------- ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 45 ++-------------- ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 45 ++-------------- ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 45 ++-------------- ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 45 ++-------------- ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 45 ++-------------- ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 45 ++-------------- ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 45 ++-------------- ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 45 ++-------------- ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 45 ++-------------- ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 45 ++-------------- ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 45 ++-------------- ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 45 ++-------------- ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 45 ++-------------- ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 45 ++-------------- .../detail/cagra/search_multi_cta.cuh | 51 ++++++++++++++++++ .../cagra/search_multi_cta_00_generate.py | 40 ++------------ ...arch_multi_cta_float_uint32_dim1024_t32.cu | 38 ++------------ ...search_multi_cta_float_uint32_dim128_t8.cu | 38 ++------------ ...earch_multi_cta_float_uint32_dim256_t16.cu | 38 ++------------ ...earch_multi_cta_float_uint32_dim512_t32.cu | 38 ++------------ ...arch_multi_cta_float_uint64_dim1024_t32.cu | 38 ++------------ ...search_multi_cta_float_uint64_dim128_t8.cu | 38 ++------------ ...earch_multi_cta_float_uint64_dim256_t16.cu | 38 ++------------ ...earch_multi_cta_float_uint64_dim512_t32.cu | 38 ++------------ ...earch_multi_cta_half_uint32_dim1024_t32.cu | 38 ++------------ .../search_multi_cta_half_uint32_dim128_t8.cu | 38 ++------------ ...search_multi_cta_half_uint32_dim256_t16.cu | 38 ++------------ ...search_multi_cta_half_uint32_dim512_t32.cu | 38 ++------------ ...earch_multi_cta_half_uint64_dim1024_t32.cu | 38 ++------------ .../search_multi_cta_half_uint64_dim128_t8.cu | 38 ++------------ ...search_multi_cta_half_uint64_dim256_t16.cu | 38 ++------------ ...search_multi_cta_half_uint64_dim512_t32.cu | 38 ++------------ ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 38 ++------------ .../search_multi_cta_int8_uint32_dim128_t8.cu | 38 ++------------ ...search_multi_cta_int8_uint32_dim256_t16.cu | 38 ++------------ ...search_multi_cta_int8_uint32_dim512_t32.cu | 38 ++------------ ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 38 ++------------ ...search_multi_cta_uint8_uint32_dim128_t8.cu | 38 ++------------ ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 38 ++------------ ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 38 ++------------ .../detail/cagra/search_single_cta.cuh | 52 +++++++++++++++++++ .../cagra/search_single_cta_00_generate.py | 43 ++------------- ...rch_single_cta_float_uint32_dim1024_t32.cu | 48 +++-------------- ...earch_single_cta_float_uint32_dim128_t8.cu | 48 +++-------------- ...arch_single_cta_float_uint32_dim256_t16.cu | 48 +++-------------- ...arch_single_cta_float_uint32_dim512_t32.cu | 48 +++-------------- ...rch_single_cta_float_uint64_dim1024_t32.cu | 48 +++-------------- ...earch_single_cta_float_uint64_dim128_t8.cu | 48 +++-------------- ...arch_single_cta_float_uint64_dim256_t16.cu | 48 +++-------------- ...arch_single_cta_float_uint64_dim512_t32.cu | 48 +++-------------- ...arch_single_cta_half_uint32_dim1024_t32.cu | 48 +++-------------- ...search_single_cta_half_uint32_dim128_t8.cu | 48 +++-------------- ...earch_single_cta_half_uint32_dim256_t16.cu | 48 +++-------------- ...earch_single_cta_half_uint32_dim512_t32.cu | 48 +++-------------- ...arch_single_cta_half_uint64_dim1024_t32.cu | 48 +++-------------- ...search_single_cta_half_uint64_dim128_t8.cu | 48 +++-------------- ...earch_single_cta_half_uint64_dim256_t16.cu | 48 +++-------------- ...earch_single_cta_half_uint64_dim512_t32.cu | 48 +++-------------- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 48 +++-------------- ...search_single_cta_int8_uint32_dim128_t8.cu | 48 +++-------------- ...earch_single_cta_int8_uint32_dim256_t16.cu | 48 +++-------------- ...earch_single_cta_int8_uint32_dim512_t32.cu | 48 +++-------------- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 48 +++-------------- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 48 +++-------------- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 48 +++-------------- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 48 +++-------------- 150 files changed, 793 insertions(+), 5678 deletions(-) create mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta.cuh diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py index 5bdddc447c..bd5f6b278f 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,56 +29,22 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ -#include +#include "search_multi_cta.cuh" + #include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - DATASET_DESC_T dataset_desc, \\ - raft::device_matrix_view graph, \\ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \\ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \\ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t block_size, \\ - uint32_t result_buffer_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ - uint32_t num_cta_per_query, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); - """ trailer = """ -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search """ diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu index 2cedb81030..5b174ddaee 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu index 0fb0b71f76..11503927fb 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu index d366aa7e46..5ad1f942e6 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu index 05a1f2d101..fdab2893b2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu index 2d0d3b3a9c..01e7dcba4c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu index ec0472365f..61afc4731b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu index 767a237100..8524a653c6 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu index 06a933244d..28f75dcbff 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu index c79ab09af3..9c5b161911 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu index a459a0ff26..ab13f43868 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu index 39c03511b7..c03a57372d 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu index 1d969a1a91..f4f7148580 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu index 4d33d983d8..15c65830e9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu index 4cc2e8709d..ba00a5cf7e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu index 42a1d31eb7..ad101deec9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu index aea3d7cd06..185fadae2e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu index 9002e7b18d..d67903cea2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu index 20f484b814..2dc54bcb59 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu index 35269059c1..a9761e947a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu index 9a1e7174d0..fd23a5a5b9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu index 325548ac9d..b350696bf0 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu index 44bc031788..c6ecb67efe 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu index e03065c9c7..ac4ffa356a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu index c7de7cb2a4..e7efd7e305 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu index 2087f2e796..f78f7ae508 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu index 147b650b14..38cf9c85a5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu index 6a351ee484..c280585101 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu index b3f25d9dcf..bddf03f42a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu index bf82c42d7e..ddb1304325 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu index 0ff43e0556..61e4305ffa 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu index 53a3fd1ab4..0967bbb039 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu index 5bff2228f9..1b4db0b1f6 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu index cb4612b28a..b0d33811f0 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu index 54df72a242..84ee6dc773 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu index 700a9b9954..887c152c0a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu index b9c14f8c5d..2af58c6211 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu index 71342a1683..753a3d8f49 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu index bdd447ef3f..b2528e4446 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu index a6c2948239..e3b691f10d 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu index 587477bdb5..aa070a2763 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu index 6086369e6d..c28b846130 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu index 528b3e3d26..93b3036aee 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 1024, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu index 4844c4ab98..3d0553f043 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu index 0ae7b6e415..35a09411ce 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 8, 128, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu index fbaee68569..96aec1b2a2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu index 6ac499334f..0f19ae504f 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 16, 256, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu index c2ce5d8c35..9083453c7c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu index a97fafdd1f..0d8ec8dde6 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,49 +15,19 @@ */ /* - * NOTE: this file is generated by search_multi_cta_00_generate.py + * NOTE: this file is generated by q_search_multi_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_multi_cta_00_generate.py + * > python q_search_multi_cta_00_generate.py * */ +#include "search_multi_cta.cuh" + #include -#include -#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , - -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - instantiate_kernel_selection( 32, 512, @@ -66,6 +35,4 @@ instantiate_kernel_selection( uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py index 83346ea70e..2cbc423b9e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -30,57 +29,22 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - DATASET_DESC_T dataset_desc, \\ - raft::device_matrix_view graph, \\ - typename DATASET_DESC_T::INDEX_T* topk_indices_ptr, \\ - typename DATASET_DESC_T::DISTANCE_T* topk_distances_ptr, \\ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \\ - const uint32_t num_queries, \\ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ - uint32_t* num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t num_itopk_candidates, \\ - uint32_t block_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ - size_t small_hash_bitlen, \\ - size_t small_hash_reset_interval, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); - """ trailer = """ -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search """ @@ -117,7 +81,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA 0 COMMA {distance_t} COMMA {idx_t} COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA 0 COMMA {distance_t} COMMA {idx_t} COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu index 923ebb0f39..9d7d64e544 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu index cc5ed1fcfd..5a4ccf7682 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu index 802915010f..4bcdc42008 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu index de18551dca..9e64faa3c8 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu index 23e59a6567..c942fb075a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu index cc16f86865..dd1a9ac9e1 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu index 4913fdcec8..dd50c4b063 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu index 2025541036..b69ffaa988 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu index 5a1d01bf36..e5cbe01c51 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu index 6454cdd6d2..c8b5cfe3f6 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu index c99b9a06df..69551690e2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu index 56ff585150..ba0095892c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu index 1337c52346..960d2e1f28 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu index 94b8e0f43d..b589c1af8c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu index 672649c665..11f21a24a5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu index a68a2db7c5..0f06167fc2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu index 23baf22b22..308684a452 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu index efc808398a..5a88bae588 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu index 3e492a6c56..a3b58d44cf 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu index 1b41e52853..bb19e6bd54 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu index 2ca8109c42..78a2ef1cd1 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu index 127ceb5b20..54b277fdb5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu index 2a832075b6..7df4430968 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu index 901e3ecf1b..5bb3ede62b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu index 41cd2f29f8..11f81782ae 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu index 4385966ddd..03ad10016f 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu index 08bc44e0ed..1b1b1551ab 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu index 7ab54e22b4..d3ca5fa44d 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu index e54fa50a95..357f4e9ca4 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu index 894c782ba5..f50a789bf9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu index dc1e9b988b..6a7e80dd92 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu index 21b41903bd..2a1817d9f8 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu index 2cc366d243..4f1f51b147 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu index 3b19648cc1..eea96f3d15 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu index 847fbb9e0d..14ef0c7a33 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu index c51e30f77a..a88d71554b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu index d7b90d36b6..96dde7ab99 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu index 57b7299312..0dcb6fbaf9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu index deeef5499d..3facd5f91e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu index e9ead11eaf..fc4d49e207 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu index aa3ddb46f9..ecd7c8c918 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu index 9a0051ee59..9f94847864 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 1024, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu index cdafb745b0..3474092311 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu index c3e7d7c808..49addc2bc4 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 8, 128, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu index 92b9f9c2fc..25f2a2ba25 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu index 479d4bcb58..1c1ae0e845 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 16, 256, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu index d8fe1806fa..8ec11f13fa 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu index 8957f1df00..dc6b1f8ce2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2024, NVIDIA CORPORATION. * @@ -16,58 +15,24 @@ */ /* - * NOTE: this file is generated by search_single_cta_00_generate.py + * NOTE: this file is generated by q_search_single_cta_00_generate.py * * Make changes there and run in this directory: * - * > python search_single_cta_00_generate.py + * > python q_search_single_cta_00_generate.py * */ -#include -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T * topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T * topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( +instantiate_kernel_selection( 32, 512, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh new file mode 100644 index 0000000000..179bf8f20f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +#define COMMA , + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index abc0afeb5e..472886729b 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,47 +37,14 @@ * */ -#include -#include +#include "search_multi_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define instantiate_kernel_selection( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - DATASET_DESC_T dataset_desc, \\ - raft::device_matrix_view graph, \\ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \\ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \\ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t block_size, \\ - uint32_t result_buffer_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ - uint32_t num_cta_per_query, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); - """ trailer = """ -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search """ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 129d31788f..777dab40c6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 1024, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 790e186c42..ca69079ebd 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(8, 128, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 0b8850c4da..956bd57f78 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(16, 256, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 0e1303721d..5016ca4e29 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 512, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 3e13743871..f71447a84e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 1024, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index c6c32c0664..3da7a01d2b 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(8, 128, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index 49bbb0fd8c..a91883c1a3 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(16, 256, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index 92fda41528..979bf614dc 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 512, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu index c2ce339aab..3b4e0dedd0 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 1024, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu index 2c32bcb2ea..490fabe5cb 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(8, 128, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu index ed7e51494e..7095a2bcbd 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(16, 256, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu index 9d2f81005f..c4d7e89656 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 512, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu index a104ac67c0..8976924220 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 1024, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu index 9629ab956c..22e120b374 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(8, 128, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu index f7d430d625..a0e52734f7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(16, 256, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu index 4f89823b2b..1637af3973 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 512, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index 2d80aac4d6..bfd55f004f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 1024, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index fa2a911b86..a4c338e6db 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(8, 128, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 1f1d2f2088..597e862bdd 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(16, 256, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 683dbbbcc4..bd2e584e75 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 512, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index be0a84b2bf..a936c424c0 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 1024, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 105c192797..8a707ccaf2 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(8, 128, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index 61c2dea41c..4ebe2e3b77 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(16, 256, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 152cd4360c..459219c8d1 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,46 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { - -#define COMMA , +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection(32, 512, raft::neighbors::cagra::detail::standard_dataset_descriptor_t< uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, raft::neighbors::filtering::none_cagra_sample_filter); -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh new file mode 100644 index 0000000000..7fb705a2d2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +#define COMMA , + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 7aa3fb790e..6fedae0fa3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,48 +37,14 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - DATASET_DESC_T dataset_desc, \\ - raft::device_matrix_view graph, \\ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \\ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \\ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t num_itopk_candidates, \\ - uint32_t block_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \\ - size_t small_hash_bitlen, \\ - size_t small_hash_reset_interval, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); - """ trailer = """ -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search """ @@ -109,7 +74,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA 0 COMMA 0 COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA 0 COMMA 0 COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index d696a241b3..81d242d37f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 7deee0ebb6..f26ca71b8e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 3d6e8aa0cf..048d740dd3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index ed5c0b6e50..99ea62960c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index 08926144de..27f06caa8c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 08f96d4340..573a5688a8 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index 982719e75a..cbe53429e9 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 82a05a81b7..e297abde56 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu index c1a3b108c3..bde6407312 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu index 6c91fa2f11..b256de71e5 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu index f9022574b7..019d64b7cb 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu index 579b33f369..2da2263380 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu index 0ef0b61421..20dd3857b1 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu index f1458c1bdf..a4f2d10f23 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu index 6f6f8bbe54..7460bd54af 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu index 2a48da9ca4..a1983aa9ad 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index 9b2230e229..cd5a061c2e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index ff092380e1..d5845f1cc7 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 6dc7f152ea..e370b547dd 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index 7a6a691d26..c5b2841027 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 42212f3000..a16d3cac38 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index 2602d63280..ceaa625f05 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index 75557e019d..cb8f2c16cf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 934d2374dd..72307f5c39 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,48 +23,15 @@ * */ -#include -#include +#include "search_single_cta.cuh" -#define COMMA , +#include namespace raft::neighbors::cagra::detail::single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ - template void select_and_run( \ - DATASET_DESC_T dataset_desc, \ - raft::device_matrix_view graph, \ - typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ - typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ - const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t< + uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search From 38a8bf27e2e64e1cd7eb404b1c25799481e7e830 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Tue, 19 Mar 2024 23:54:30 +0900 Subject: [PATCH 13/30] Use TxN_t --- .../detail/cagra/compute_distance.hpp | 45 ++----------------- .../detail/cagra/compute_distance_vpq.cuh | 27 +++++------ 2 files changed, 15 insertions(+), 57 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index f7816468de..7fffef2497 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -20,6 +20,7 @@ #include "utils.hpp" #include +#include #include @@ -36,44 +37,6 @@ _RAFT_DEVICE constexpr unsigned get_vlen() return utils::size_of() / utils::size_of(); } -template -struct code_book_load_t_core { - using type = void; -}; -template <> -struct code_book_load_t_core<1> { - using type = std::uint8_t; -}; -template <> -struct code_book_load_t_core<2> { - using type = std::uint16_t; -}; -template <> -struct code_book_load_t_core<4> { - using type = std::uint32_t; -}; -template <> -struct code_book_load_t_core<8> { - using type = LOAD_64BIT_T; -}; -template <> -struct code_book_load_t_core<16> { - using type = LOAD_128BIT_T; -}; - -template -struct code_book_load_t { - using type = typename code_book_load_t_core() * vlen>::type; -}; - -template -struct data_load_t { - union { - typename code_book_load_t::type load; - DATA_T data[VLEN]; - }; -}; - template (); // #include (DATASET_BLOCK_DIM, TEAM_SIZE * vlen); - device::data_load_t dl_buff[reg_nelem]; + raft::TxN_t dl_buff[reg_nelem]; DISTANCE_T norm2 = 0; if (valid) { @@ -265,7 +228,7 @@ struct standard_dataset_descriptor_t for (uint32_t e = 0; e < reg_nelem; e++) { const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; if (k >= dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); + dl_buff[e].load(dataset_ptr, k); } #pragma unroll for (uint32_t e = 0; e < reg_nelem; e++) { @@ -279,7 +242,7 @@ struct standard_dataset_descriptor_t // - Above the last element (dataset_dim-1), the query array is filled with zeros. // - The data buffer has to be also padded with zeros. DISTANCE_T diff = query_ptr[device::swizzling(kv)]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); + diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].val.data[v]); norm2 += diff * diff; } } diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index b6d21f2e52..aef71f04f7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -126,15 +126,13 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading VQ code-book - device::data_load_t vq_vals[PQ_CODE_BOOK_DIM]; - using vq_vals_load_t = typename device::code_book_load_t::type; + raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; #pragma unroll for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 1) { const uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k); if (d >= dim) break; - // Loading 4 x 16-bit VQ-values using 64-bit load ops (from L2$ or device memory) - vq_vals[m].load = - *(reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code))); + vq_vals[m].load(reinterpret_cast(vq_code_book_ptr), + d + (dim * vq_code)); } // Compute distance std::uint32_t pq_code = pq_codes[e]; @@ -151,7 +149,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t( smem_pq_code_book_ptr + (1 << PQ_BITS) * 2 * (m / 2) + (2 * (pq_code & 0xff)))); - diff2 -= vq_vals[d1 / vlen].data[(d1 % vlen) / 2]; + diff2 -= vq_vals[d1 / vlen].val.data[(d1 % vlen) / 2]; norm2 += diff2 * diff2; } pq_code >>= 8; @@ -165,26 +163,23 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading VQ code-book - typename device::data_load_t::type vq_vals[PQ_CODE_BOOK_DIM]; - using vq_vals_load_t = typename device::code_book_load_t::type; + raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; #pragma unroll for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { const std::uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k); if (d >= dim) break; // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device memory) - vq_vals[m].load = - *(reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code))); + vq_vals[m].load(reinterpret_cast(vq_code_book_ptr), + d + (dim * vq_code)); } // Compute distance std::uint32_t pq_code = pq_codes[e]; #pragma unroll for (std::uint32_t v = 0; v < vlen; v++) { if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; - typename device::data_load_t::type pq_vals; - using pq_vals_load_t = device::code_book_load_t; - pq_vals.load = *(reinterpret_cast( - smem_pq_code_book_ptr + - (PQ_CODE_BOOK_DIM * (pq_code & 0xff)))); // (from L1$ or smem) + raft::TxN_t pq_vals; + pq_vals.load(reinterpret_cast(smem_pq_code_book_ptr), + (PQ_CODE_BOOK_DIM * (pq_code & 0xff))); // (from L1$ or smem) #pragma unroll for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); @@ -192,7 +187,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= dataset_dim) break; DISTANCE_T diff = query_ptr[d]; // (from smem) diff -= pq_scale * static_cast(pq_vals.data[m]); - diff -= vq_scale * static_cast(vq_vals[d1 / vlen].data[d1 % vlen]); + diff -= vq_scale * static_cast(vq_vals[d1 / vlen].val.data[d1 % vlen]); norm += diff * diff; } pq_code >>= 8; From cda2cb8414453656d6c528f8028301d982ed25f9 Mon Sep 17 00:00:00 2001 From: achirkin Date: Tue, 19 Mar 2024 20:33:13 +0100 Subject: [PATCH 14/30] Fix incorrect addressing using TxN_t --- .../neighbors/detail/cagra/compute_distance_vpq.cuh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index aef71f04f7..526c4fcdd9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -131,8 +131,8 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= dim) break; - vq_vals[m].load(reinterpret_cast(vq_code_book_ptr), - d + (dim * vq_code)); + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); } // Compute distance std::uint32_t pq_code = pq_codes[e]; @@ -169,8 +169,8 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= dim) break; // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device memory) - vq_vals[m].load(reinterpret_cast(vq_code_book_ptr), - d + (dim * vq_code)); + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); } // Compute distance std::uint32_t pq_code = pq_codes[e]; @@ -178,8 +178,9 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= dim) break; raft::TxN_t pq_vals; - pq_vals.load(reinterpret_cast(smem_pq_code_book_ptr), - (PQ_CODE_BOOK_DIM * (pq_code & 0xff))); // (from L1$ or smem) + pq_vals.load(reinterpret_cast(smem_pq_code_book_ptr + + PQ_CODE_BOOK_DIM * (pq_code & 0xff)), + 0); // (from L1$ or smem) #pragma unroll for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); From 103b9c0f4f5eec99d251248789097628c8c5034e Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Wed, 20 Mar 2024 20:02:12 +0900 Subject: [PATCH 15/30] Fix typo --- cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 12d334c7d3..ded41158f3 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -155,7 +155,7 @@ template -void lauch_vpq_search_main_core( +void launch_vpq_search_main_core( raft::resources const& res, const vpq_dataset* vpq_dset, search_params params, @@ -275,7 +275,7 @@ void search_main(raft::resources const& res, RAFT_FAIL("FP32 VPQ dataset support is coming soon"); } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); vpq_dset != nullptr) { - lauch_vpq_search_main_core( + launch_vpq_search_main_core( res, vpq_dset, params, graph_internal, queries, neighbors, distances, sample_filter); } else if (auto* empty_dset = dynamic_cast*>(&index.data()); empty_dset != nullptr) { From 1fb7c369917173da1ee408adaef77caa72107da1 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Wed, 20 Mar 2024 20:02:32 +0900 Subject: [PATCH 16/30] Fix VPQ search params validation --- cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index ded41158f3..68777c9325 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -165,6 +165,9 @@ void launch_vpq_search_main_core( raft::device_matrix_view distances, CagraSampleFilterT sample_filter) { + assert(vpq_dset->pq_bits() == 2 || vpq_dset->pq_bits() == 4); + assert(vpq_dset->dim() % vpq_dset->pq_dim() == 0); + const float vq_scale = 1.0f; const float pq_scale = 1.0f; From 89aa91ea076954e6b2b70343729faff515b87476 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Wed, 20 Mar 2024 20:18:54 +0900 Subject: [PATCH 17/30] Add dim size validation --- .../raft/neighbors/detail/cagra/compute_distance_vpq.cuh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index 526c4fcdd9..ba0f5a07fa 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -91,6 +91,10 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t DATASET_BLOCK_DIM) { + RAFT_FAIL( + "`dim` must be smaller or equal to 512. Support for larger dimension is coming soon."); + } } __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, From daf4f084532265bb1d89786fb7abce5b9df54efc Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Wed, 20 Mar 2024 23:17:18 +0900 Subject: [PATCH 18/30] Fix VPQ similarity computation for large dim --- .../detail/cagra/compute_distance_vpq.cuh | 159 +++++++++--------- 1 file changed, 79 insertions(+), 80 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index ba0f5a07fa..a29f22f506 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -91,10 +91,6 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t DATASET_BLOCK_DIM) { - RAFT_FAIL( - "`dim` must be smaller or equal to 512. Support for larger dimension is coming soon."); - } } __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, @@ -107,95 +103,98 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t( encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id))); if (PQ_BITS == 8) { - constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** - constexpr unsigned nelem = raft::div_rounding_up_unsafe( - DATASET_BLOCK_DIM / PQ_CODE_BOOK_DIM, TEAM_SIZE * vlen); - // Loading PQ codes - uint32_t pq_codes[nelem]; -#pragma unroll - for (std::uint32_t e = 0; e < nelem; e++) { - const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= n_subspace) break; - // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) - pq_codes[e] = *(reinterpret_cast( - encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id) + 4 + - k)); - } - // - if constexpr ((std::is_same::value) && (PQ_CODE_BOOK_DIM % 2 == 0)) { - // **** Use half2 for distance computation **** - half2 norm2{0, 0}; + for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { + constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** + constexpr unsigned nelem = raft::div_rounding_up_unsafe( + DATASET_BLOCK_DIM / PQ_CODE_BOOK_DIM, TEAM_SIZE * vlen); + // Loading PQ codes + uint32_t pq_codes[nelem]; #pragma unroll for (std::uint32_t e = 0; e < nelem; e++) { - const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; if (k >= n_subspace) break; - // Loading VQ code-book - raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; -#pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 1) { - const uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k); - if (d >= dim) break; - vq_vals[m].load( - reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); - } - // Compute distance - std::uint32_t pq_code = pq_codes[e]; + // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) + pq_codes[e] = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id) + + 4 + k)); + } + // + if constexpr ((std::is_same::value) && (PQ_CODE_BOOK_DIM % 2 == 0)) { + // **** Use half2 for distance computation **** + half2 norm2{0, 0}; #pragma unroll - for (std::uint32_t v = 0; v < vlen; v++) { - if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; + if (k >= n_subspace) break; + // Loading VQ code-book + raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; #pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 2) { - const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); - const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); - // Loading query vector in smem - half2 diff2 = (reinterpret_cast( - query_ptr))[device::swizzling(d / 2)]; - // Loading PQ code book in smem - diff2 -= *(reinterpret_cast( - smem_pq_code_book_ptr + (1 << PQ_BITS) * 2 * (m / 2) + (2 * (pq_code & 0xff)))); - diff2 -= vq_vals[d1 / vlen].val.data[(d1 % vlen) / 2]; - norm2 += diff2 * diff2; + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 1) { + const uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k) + elem_offset; + if (d >= dim) break; + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); } - pq_code >>= 8; - } - } - norm = static_cast(norm2.x + norm2.y); - } else { - // **** Use float for distance computation **** + // Compute distance + std::uint32_t pq_code = pq_codes[e]; #pragma unroll - for (std::uint32_t e = 0; e < nelem; e++) { - const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= n_subspace) break; - // Loading VQ code-book - raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; #pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { - const std::uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k); - if (d >= dim) break; - // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device memory) - vq_vals[m].load( - reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 2) { + const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); + const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); + // Loading query vector in smem + half2 diff2 = (reinterpret_cast( + query_ptr))[device::swizzling(d / 2)]; + // Loading PQ code book in smem + diff2 -= *(reinterpret_cast( + smem_pq_code_book_ptr + (1 << PQ_BITS) * 2 * (m / 2) + (2 * (pq_code & 0xff)))); + diff2 -= vq_vals[d1 / vlen].val.data[(d1 % vlen) / 2]; + norm2 += diff2 * diff2; + } + pq_code >>= 8; + } } - // Compute distance - std::uint32_t pq_code = pq_codes[e]; + norm += static_cast(norm2.x + norm2.y); + } else { + // **** Use float for distance computation **** #pragma unroll - for (std::uint32_t v = 0; v < vlen; v++) { - if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; - raft::TxN_t pq_vals; - pq_vals.load(reinterpret_cast(smem_pq_code_book_ptr + - PQ_CODE_BOOK_DIM * (pq_code & 0xff)), - 0); // (from L1$ or smem) + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= n_subspace) break; + // Loading VQ code-book + raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; #pragma unroll for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { - const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); - const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); - // if (d >= dataset_dim) break; - DISTANCE_T diff = query_ptr[d]; // (from smem) - diff -= pq_scale * static_cast(pq_vals.data[m]); - diff -= vq_scale * static_cast(vq_vals[d1 / vlen].val.data[d1 % vlen]); - norm += diff * diff; + const std::uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k) + elem_offset; + if (d >= dim) break; + // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device + // memory) + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_CODE_BOOK_DIM * (v + k) >= dim) break; + raft::TxN_t pq_vals; + pq_vals.load(reinterpret_cast(smem_pq_code_book_ptr + + PQ_CODE_BOOK_DIM * (pq_code & 0xff)), + 0); // (from L1$ or smem) +#pragma unroll + for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { + const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); + const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); + // if (d >= dataset_dim) break; + DISTANCE_T diff = query_ptr[d]; // (from smem) + diff -= pq_scale * static_cast(pq_vals.data[m]); + diff -= vq_scale * static_cast(vq_vals[d1 / vlen].val.data[d1 % vlen]); + norm += diff * diff; + } + pq_code >>= 8; } - pq_code >>= 8; } } } From 38ab2bd787699992a23296532b770859b1bf7512 Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Wed, 20 Mar 2024 23:41:05 +0900 Subject: [PATCH 19/30] Update CAGRA VPQ test --- cpp/test/neighbors/ann_cagra_vpq.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh index a1c33e5a52..0dc7e09d38 100644 --- a/cpp/test/neighbors/ann_cagra_vpq.cuh +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -318,7 +318,7 @@ class AnnCagraVpqTest : public ::testing::TestWithParam { const std::vector vpq_inputs = raft::util::itertools::product( {100}, // n_queries {1000, 10000}, // n_rows - {128, 256}, // dim + {128, 132, 192, 256, 512, 768}, // dim {8, 12}, // k {2, 4}, // pq_len {8}, // pq_bits From 51748119efe7683641deff7ab5ec54690c95a381 Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Thu, 21 Mar 2024 01:06:21 +0900 Subject: [PATCH 20/30] Update cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh Co-authored-by: Artem M. Chirkin <9253178+achirkin@users.noreply.github.com> --- cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 68777c9325..31f2acd0b7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -165,8 +165,11 @@ void launch_vpq_search_main_core( raft::device_matrix_view distances, CagraSampleFilterT sample_filter) { - assert(vpq_dset->pq_bits() == 2 || vpq_dset->pq_bits() == 4); - assert(vpq_dset->dim() % vpq_dset->pq_dim() == 0); + RAFT_EXPECTS(vpq_dset->pq_bits() == 8, "Only pq_bits = 8 is supported for now"); + RAFT_EXPECTS(vpq_dset->pq_len() == 2 || vpq_dset->pq_len() == 4, + "Only pq_len 2 or for are supported for now"); + RAFT_EXPECTS(vpq_dset->dim() % vpq_dset->pq_dim() == 0, + "dim must be a multiple of pq_dim at the moment"); const float vq_scale = 1.0f; const float pq_scale = 1.0f; From 16ddb139fec6d05cb346c07731a532e339987e89 Mon Sep 17 00:00:00 2001 From: achirkin Date: Wed, 20 Mar 2024 17:07:38 +0100 Subject: [PATCH 21/30] Remove redundant team-size and dataset-block-dim parameters from the data descriptor --- .../neighbors/detail/cagra/cagra_search.cuh | 10 +- .../detail/cagra/compute_distance.hpp | 36 +----- .../detail/cagra/compute_distance_vpq.cuh | 104 +++++------------- .../cagra/search_multi_cta_kernel-ext.cuh | 70 ++++++------ .../cagra/search_multi_cta_kernel-inl.cuh | 54 +++++---- .../detail/cagra/search_multi_kernel.cuh | 95 ++++++++-------- .../cagra/search_single_cta_kernel-ext.cuh | 73 ++++++------ .../cagra/search_single_cta_kernel-inl.cuh | 96 ++++++++++------ .../cagra/q_search_multi_cta_00_generate.py | 2 +- ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 11 +- ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 11 +- ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 11 +- ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 11 +- ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 11 +- ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 11 +- ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 11 +- ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 11 +- ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 11 +- ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 11 +- ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 11 +- ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 11 +- ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 11 +- ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 11 +- ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 11 +- ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 11 +- ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- .../cagra/q_search_single_cta_00_generate.py | 2 +- ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 11 +- ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 11 +- ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 11 +- ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 11 +- ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 11 +- ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 11 +- ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 11 +- ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 11 +- ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 11 +- ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 11 +- ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 11 +- ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 11 +- ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 11 +- ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 11 +- ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 11 +- ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 11 +- ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 11 +- ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 11 +- ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 11 +- ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 11 +- ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 11 +- ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 11 +- ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 11 +- ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 11 +- .../cagra/search_multi_cta_00_generate.py | 2 +- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 10 +- ...search_multi_cta_float_uint32_dim128_t8.cu | 10 +- ...earch_multi_cta_float_uint32_dim256_t16.cu | 10 +- ...earch_multi_cta_float_uint32_dim512_t32.cu | 10 +- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 10 +- ...search_multi_cta_float_uint64_dim128_t8.cu | 10 +- ...earch_multi_cta_float_uint64_dim256_t16.cu | 10 +- ...earch_multi_cta_float_uint64_dim512_t32.cu | 10 +- ...earch_multi_cta_half_uint32_dim1024_t32.cu | 10 +- .../search_multi_cta_half_uint32_dim128_t8.cu | 10 +- ...search_multi_cta_half_uint32_dim256_t16.cu | 10 +- ...search_multi_cta_half_uint32_dim512_t32.cu | 10 +- ...earch_multi_cta_half_uint64_dim1024_t32.cu | 10 +- .../search_multi_cta_half_uint64_dim128_t8.cu | 10 +- ...search_multi_cta_half_uint64_dim256_t16.cu | 10 +- ...search_multi_cta_half_uint64_dim512_t32.cu | 10 +- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 10 +- .../search_multi_cta_int8_uint32_dim128_t8.cu | 10 +- ...search_multi_cta_int8_uint32_dim256_t16.cu | 10 +- ...search_multi_cta_int8_uint32_dim512_t32.cu | 10 +- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 10 +- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 10 +- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 10 +- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 10 +- .../cagra/search_single_cta_00_generate.py | 2 +- ...rch_single_cta_float_uint32_dim1024_t32.cu | 10 +- ...earch_single_cta_float_uint32_dim128_t8.cu | 10 +- ...arch_single_cta_float_uint32_dim256_t16.cu | 10 +- ...arch_single_cta_float_uint32_dim512_t32.cu | 10 +- ...rch_single_cta_float_uint64_dim1024_t32.cu | 10 +- ...earch_single_cta_float_uint64_dim128_t8.cu | 10 +- ...arch_single_cta_float_uint64_dim256_t16.cu | 10 +- ...arch_single_cta_float_uint64_dim512_t32.cu | 10 +- ...arch_single_cta_half_uint32_dim1024_t32.cu | 10 +- ...search_single_cta_half_uint32_dim128_t8.cu | 10 +- ...earch_single_cta_half_uint32_dim256_t16.cu | 10 +- ...earch_single_cta_half_uint32_dim512_t32.cu | 10 +- ...arch_single_cta_half_uint64_dim1024_t32.cu | 10 +- ...search_single_cta_half_uint64_dim128_t8.cu | 10 +- ...earch_single_cta_half_uint64_dim256_t16.cu | 10 +- ...earch_single_cta_half_uint64_dim512_t32.cu | 10 +- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 10 +- ...search_single_cta_int8_uint32_dim128_t8.cu | 10 +- ...earch_single_cta_int8_uint32_dim256_t16.cu | 10 +- ...earch_single_cta_int8_uint32_dim512_t32.cu | 10 +- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 10 +- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 10 +- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 10 +- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 10 +- 156 files changed, 966 insertions(+), 1116 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 31f2acd0b7..b927f184e2 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -180,10 +180,8 @@ void launch_vpq_search_main_core( DatasetT, 8 /*PQ bit*/, 2 /* Subspace dimension*/, - 0, DistanceT, - InternalIdxT, - 0>; + InternalIdxT>; dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), vpq_dset->encoded_row_length(), vpq_dset->pq_dim(), @@ -200,10 +198,8 @@ void launch_vpq_search_main_core( DatasetT, 8 /*PQ bit*/, 4 /* Subspace dimension*/, - 0, DistanceT, - InternalIdxT, - 0>; + InternalIdxT>; dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), vpq_dset->encoded_row_length(), vpq_dset->pq_dim(), @@ -266,7 +262,7 @@ void search_main(raft::resources const& res, strided_dset != nullptr) { // Set TEAM_SIZE and DATASET_BLOCK_SIZE to zero tentatively since these parameters cannot be // determined here. They are set just before kernel launch. - using dataset_desc_t = standard_dataset_descriptor_t; + using dataset_desc_t = standard_dataset_descriptor_t; // Search using a plain (strided) row-major dataset const dataset_desc_t dataset_desc(strided_dset->view().data_handle(), strided_dset->n_rows(), diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 7fffef2497..2436d0a3ca 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -78,7 +78,8 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( } } - const auto norm2 = dataset_desc.compute_similarity(query_buffer, seed_index, valid_i); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, seed_index, valid_i); if (valid_i && (norm2 < best_norm2_team_local)) { best_norm2_team_local = norm2; @@ -152,8 +153,8 @@ _RAFT_DEVICE void compute_distance_to_child_nodes( INDEX_T child_id = invalid_index; if (valid_i) { child_id = result_child_indices_ptr[i]; } - const auto norm2 = - dataset_desc.compute_similarity(query_buffer, child_id, child_id != invalid_index); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, child_id, child_id != invalid_index); // Store the distance const unsigned lane_id = threadIdx.x % TEAM_SIZE; @@ -181,18 +182,12 @@ struct dataset_descriptor_base_t { dataset_descriptor_base_t(const INDEX_T size, const std::uint32_t dim) : size(size), dim(dim) {} }; -template +template struct standard_dataset_descriptor_t : public dataset_descriptor_base_t { using LOAD_T = device::LOAD_128BIT_T; using DATA_T = DATA_T_; using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; - static const std::uint32_t DATASET_BLOCK_DIM = DATASET_BLOCK_DIM_; - static const std::uint32_t TEAM_SIZE = TEAM_SIZE_; const DATA_T* const ptr; const std::size_t ld; @@ -210,6 +205,7 @@ struct standard_dataset_descriptor_t static const std::uint32_t smem_buffer_size_in_byte = 0; __device__ void set_smem_ptr(void* const){}; + template __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, const INDEX_T dataset_i, const bool valid) const @@ -255,24 +251,4 @@ struct standard_dataset_descriptor_t } }; -template -standard_dataset_descriptor_t -set_compute_template_params( - standard_dataset_descriptor_t& - desc_in) -{ - return standard_dataset_descriptor_t( - desc_in.ptr, desc_in.size, desc_in.dim, desc_in.ld); -} - } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index a29f22f506..998cf40f77 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -24,18 +24,14 @@ namespace raft::neighbors::cagra::detail { template + class INDEX_T> struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { using LOAD_T = device::LOAD_128BIT_T; using DATA_T = DATA_T_; using CODE_BOOK_T = CODE_BOOK_T_; using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; - static const std::uint32_t DATASET_BLOCK_DIM = DATASET_BLOCK_DIM_; - static const std::uint32_t TEAM_SIZE = TEAM_SIZE_; const std::uint8_t* encoded_dataset_ptr; const std::uint32_t encoded_dataset_dim; @@ -50,7 +46,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t(); + (1 << PQ_BITS) * PQ_LEN * utils::size_of(); __device__ void set_smem_ptr(void* const smem_ptr) { @@ -58,15 +54,14 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t::value) { - for (unsigned i = threadIdx.x * 2; i < (1 << PQ_BITS) * PQ_CODE_BOOK_DIM; - i += blockDim.x * 2) { + for (unsigned i = threadIdx.x * 2; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x * 2) { half2 buf2; buf2.x = pq_code_book_ptr[i]; buf2.y = pq_code_book_ptr[i + 1]; (reinterpret_cast(smem_pq_code_book_ptr + i))[0] = buf2; } } else { - for (unsigned i = threadIdx.x; i < (1 << PQ_BITS) * PQ_CODE_BOOK_DIM; i += blockDim.x) { + for (unsigned i = threadIdx.x; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x) { // TODO: vectorize smem_pq_code_book_ptr[i] = pq_code_book_ptr[i]; } @@ -93,6 +88,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, const INDEX_T node_id, const bool valid) const @@ -104,9 +100,9 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t(encoded_dataset_dim) * node_id))); if (PQ_BITS == 8) { for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { - constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** - constexpr unsigned nelem = raft::div_rounding_up_unsafe( - DATASET_BLOCK_DIM / PQ_CODE_BOOK_DIM, TEAM_SIZE * vlen); + constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** + constexpr unsigned nelem = + raft::div_rounding_up_unsafe(DATASET_BLOCK_DIM / PQ_LEN, TEAM_SIZE * vlen); // Loading PQ codes uint32_t pq_codes[nelem]; #pragma unroll @@ -119,7 +115,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t::value) && (PQ_CODE_BOOK_DIM % 2 == 0)) { + if constexpr ((std::is_same::value) && (PQ_LEN % 2 == 0)) { // **** Use half2 for distance computation **** half2 norm2{0, 0}; #pragma unroll @@ -127,10 +123,10 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading VQ code-book - raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; + raft::TxN_t vq_vals[PQ_LEN]; #pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 1) { - const uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k) + elem_offset; + for (std::uint32_t m = 0; m < PQ_LEN; m += 1) { + const uint32_t d = (vlen * m) + (PQ_LEN * k) + elem_offset; if (d >= dim) break; vq_vals[m].load( reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); @@ -139,11 +135,11 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= dim) break; + if (PQ_LEN * (v + k) >= dim) break; #pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m += 2) { - const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); - const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); + for (std::uint32_t m = 0; m < PQ_LEN; m += 2) { + const std::uint32_t d1 = m + (PQ_LEN * v); + const std::uint32_t d = d1 + (PQ_LEN * k); // Loading query vector in smem half2 diff2 = (reinterpret_cast( query_ptr))[device::swizzling(d / 2)]; @@ -164,10 +160,10 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading VQ code-book - raft::TxN_t vq_vals[PQ_CODE_BOOK_DIM]; + raft::TxN_t vq_vals[PQ_LEN]; #pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { - const std::uint32_t d = (vlen * m) + (PQ_CODE_BOOK_DIM * k) + elem_offset; + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const std::uint32_t d = (vlen * m) + (PQ_LEN * k) + elem_offset; if (d >= dim) break; // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device // memory) @@ -178,15 +174,15 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= dim) break; - raft::TxN_t pq_vals; - pq_vals.load(reinterpret_cast(smem_pq_code_book_ptr + - PQ_CODE_BOOK_DIM * (pq_code & 0xff)), - 0); // (from L1$ or smem) + if (PQ_LEN * (v + k) >= dim) break; + raft::TxN_t pq_vals; + pq_vals.load( + reinterpret_cast(smem_pq_code_book_ptr + PQ_LEN * (pq_code & 0xff)), + 0); // (from L1$ or smem) #pragma unroll - for (std::uint32_t m = 0; m < PQ_CODE_BOOK_DIM; m++) { - const std::uint32_t d1 = m + (PQ_CODE_BOOK_DIM * v); - const std::uint32_t d = d1 + (PQ_CODE_BOOK_DIM * k); + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const std::uint32_t d1 = m + (PQ_LEN * v); + const std::uint32_t d = d1 + (PQ_LEN * k); // if (d >= dataset_dim) break; DISTANCE_T diff = query_ptr[d]; // (from smem) diff -= pq_scale * static_cast(pq_vals.data[m]); @@ -207,48 +203,4 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t -cagra_q_dataset_descriptor_t -set_compute_template_params(cagra_q_dataset_descriptor_t& desc_in) -{ - return cagra_q_dataset_descriptor_t(desc_in.encoded_dataset_ptr, - desc_in.encoded_dataset_dim, - desc_in.n_subspace, - desc_in.vq_code_book_ptr, - desc_in.vq_scale, - desc_in.pq_code_book_ptr, - desc_in.pq_scale, - desc_in.size, - desc_in.dim); -} } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 1e3095771f..50f9e69593 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -57,37 +57,37 @@ void select_and_run( cudaStream_t stream) RAFT_EXPLICIT; #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::standard_dataset_descriptor_t dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ + dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_kernel_selection( @@ -141,19 +141,15 @@ instantiate_kernel_selection( CODE_BOOK_T, \ PQ_BITS, \ PQ_CODE_BOOK_DIM, \ - 0, \ DISTANCE_T, \ - INDEX_T, \ - 0>, \ + INDEX_T>, \ SAMPLE_FILTER_T>( \ raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset_desc, \ + INDEX_T> dataset_desc, \ raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index eb771c9325..001b007000 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -123,7 +123,11 @@ __device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] // // multiple CTAs per single query // -template +template __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] @@ -147,12 +151,10 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( uint32_t* const num_executed_iterations, /* stats */ SAMPLE_FILTER_T sample_filter) { - constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; - constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; - using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; - using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; - using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; - using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; const auto num_queries = gridDim.y; const auto query_id = blockIdx.y; @@ -398,21 +400,36 @@ void set_value_batch(T* const dev_ptr, <<>>(dev_ptr, ld, val, count, batch_size); } -template +template struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value // parameters do not matter, because they are not part of the function signature. The // second to fourth value parameters will be selected by the choose_* functions below. - using kernel_t = decltype(&search_kernel<128, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>); + using kernel_t = decltype(&search_kernel); static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { if (result_buffer_size <= 64) { - return search_kernel<64, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; + return search_kernel; } else if (result_buffer_size <= 128) { - return search_kernel<128, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; + return search_kernel; } else if (result_buffer_size <= 256) { - return search_kernel<256, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; + return search_kernel; } THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); } @@ -449,16 +466,13 @@ void select_and_run( SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - const auto dataset_desc_ = - set_compute_template_params(dataset_desc); - - using dataset_desc_t = typename std::remove_const::type; - auto kernel = search_kernel_config::choose_buffer_size( - result_buffer_size, block_size); + auto kernel = + search_kernel_config:: + choose_buffer_size(result_buffer_size, block_size); RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, - smem_size + dataset_desc_t::smem_buffer_size_in_byte)); + smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); // Initialize hash table const uint32_t hash_size = hashmap::get_size(hash_bitlen); set_value_batch(hashmap_ptr, @@ -477,7 +491,7 @@ void select_and_run( smem_size); kernel<<>>(topk_indices_ptr, topk_distances_ptr, - dataset_desc_, + dataset_desc, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 05a02f7384..10788da432 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -87,7 +87,7 @@ void get_value(T* const host_ptr, const T* const dev_ptr, cudaStream_t cuda_stre } // MAX_DATASET_DIM : must equal to or greater than dataset_dim -template +template RAFT_KERNEL random_pickup_kernel( const DATASET_DESCRIPTOR_T dataset_desc, const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] @@ -102,11 +102,9 @@ RAFT_KERNEL random_pickup_kernel( typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] const std::uint32_t hash_bitlen) { - constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; - constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; - using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; - using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; - using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; const auto ldb = hashmap::get_size(hash_bitlen); const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) / TEAM_SIZE; @@ -139,7 +137,8 @@ RAFT_KERNEL random_pickup_kernel( device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_desc.size; } - const auto norm2 = dataset_desc.compute_similarity(query_buffer, seed_index, true); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, seed_index, true); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -161,7 +160,7 @@ RAFT_KERNEL random_pickup_kernel( } // MAX_DATASET_DIM : must be equal to or greater than dataset_dim -template +template void random_pickup( const DATASET_DESCRIPTOR_T dataset_desc, const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] @@ -178,10 +177,8 @@ void random_pickup( const std::uint32_t hash_bitlen, cudaStream_t const cuda_stream = 0) { - constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; - constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; - const auto block_size = 256u; - const auto num_teams_per_threadblock = block_size / TEAM_SIZE; + const auto block_size = 256u; + const auto num_teams_per_threadblock = block_size / TEAM_SIZE; const dim3 grid_size((num_pickup + num_teams_per_threadblock - 1) / num_teams_per_threadblock, num_queries); @@ -189,7 +186,7 @@ void random_pickup( raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; const auto smem_size = query_smem_buffer_length * sizeof(float); - random_pickup_kernel + random_pickup_kernel <<>>(dataset_desc, queries_ptr, num_pickup, @@ -304,7 +301,9 @@ void pickup_next_parents(INDEX_T* const parent_candidates_ptr, // [num_queries, terminate_flag); } -template RAFT_KERNEL compute_distance_to_child_nodes_kernel( const typename DATASET_DESCRIPTOR_T::INDEX_T* const @@ -331,9 +330,6 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; - constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; - constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; - const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; @@ -375,7 +371,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( const auto compute_distance_flag = hashmap::insert( visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - const auto norm2 = dataset_desc.compute_similarity(query_buffer, child_id, compute_distance_flag); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, child_id, compute_distance_flag); if (compute_distance_flag) { if (threadIdx.x % TEAM_SIZE == 0) { @@ -398,7 +395,9 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( } } -template void compute_distance_to_child_nodes( const typename DATASET_DESCRIPTOR_T::INDEX_T* const @@ -424,9 +423,6 @@ void compute_distance_to_child_nodes( SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream = 0) { - constexpr std::uint32_t TEAM_SIZE = DATASET_DESCRIPTOR_T::TEAM_SIZE; - constexpr std::uint32_t DATASET_BLOCK_DIM = DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; - const auto block_size = 128; const dim3 grid_size( (search_width * graph_degree + (block_size / TEAM_SIZE) - 1) / (block_size / TEAM_SIZE), @@ -438,7 +434,10 @@ void compute_distance_to_child_nodes( const auto smem_size = query_smem_buffer_length * sizeof(float) + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; - compute_distance_to_child_nodes_kernel + compute_distance_to_child_nodes_kernel <<>>(parent_node_list, parent_candidates_ptr, parent_distance_ptr, @@ -795,7 +794,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - DATASET_DESCRIPTOR_T dataset_desc_, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] @@ -806,8 +805,6 @@ struct search : search_plan_impl { uint32_t topk, SAMPLE_FILTER_T sample_filter) { - const auto dataset_desc = - set_compute_template_params(dataset_desc_); // Init hashmap cudaStream_t stream = resource::get_cuda_stream(res); const uint32_t hash_size = hashmap::get_size(hash_bitlen); @@ -825,20 +822,20 @@ struct search : search_plan_impl { } // Choose initial entry point candidates at random - random_pickup(dataset_desc, - queries_ptr, - num_queries, - result_buffer_size, - num_random_samplings, - rand_xor_mask, - dev_seed_ptr, - num_seeds, - result_indices.data(), - result_distances.data(), - result_buffer_allocation_size, - hashmap.data(), - hash_bitlen, - stream); + random_pickup(dataset_desc, + queries_ptr, + num_queries, + result_buffer_size, + num_random_samplings, + rand_xor_mask, + dev_seed_ptr, + num_seeds, + result_indices.data(), + result_distances.data(), + result_buffer_allocation_size, + hashmap.data(), + hash_bitlen, + stream); unsigned iter = 0; while (1) { @@ -890,7 +887,7 @@ struct search : search_plan_impl { } // Compute distance to child nodes that are adjacent to the parent node - compute_distance_to_child_nodes( + compute_distance_to_child_nodes( parent_node_list.data(), result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, @@ -998,28 +995,24 @@ struct search, + INDEX_T_>, SAMPLE_FILTER_T> : public search_plan_impl, + INDEX_T_>, SAMPLE_FILTER_T> { using DATASET_DESCRIPTOR_T = cagra_q_dataset_descriptor_t; + INDEX_T_>; using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; @@ -1036,7 +1029,7 @@ struct search graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index 44cc575b52..a836334667 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -58,39 +58,38 @@ void select_and_run( // raft::resources const& res, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail:: \ - standard_dataset_descriptor_t dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ + dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_single_cta_select_and_run( @@ -144,19 +143,15 @@ instantiate_single_cta_select_and_run( CODE_BOOK_T, \ PQ_BITS, \ PQ_CODE_BOOK_DIM, \ - 0, \ DISTANCE_T, \ - INDEX_T, \ - 0>, \ + INDEX_T>, \ SAMPLE_FILTER_T>( \ raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset, \ + INDEX_T> dataset, \ raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 52b7e549d8..8d8dad6708 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -456,7 +456,9 @@ __device__ inline void set_value_device(T* const ptr, const T fill, const std::u } // One query one thread block -template (dataset_desc.dim, DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM) * - DATASET_DESCRIPTOR_T::DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = @@ -577,18 +576,17 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( // compute distance to randomly selecting nodes _CLK_START(); const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - device::compute_distance_to_random_nodes( - result_indices_buffer, - result_distances_buffer, - query_buffer, - dataset_desc, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen); + device::compute_distance_to_random_nodes(result_indices_buffer, + result_distances_buffer, + query_buffer, + dataset_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -818,33 +816,50 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( #endif } -template +template struct search_kernel_config { - using kernel_t = decltype(&search_kernel<64, 64, 0, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>); + using kernel_t = decltype(&search_kernel); template static auto choose_search_kernel(unsigned itopk_size) -> kernel_t { if (itopk_size <= 64) { - return search_kernel<64, + return search_kernel; } else if (itopk_size <= 128) { - return search_kernel<128, + return search_kernel; } else if (itopk_size <= 256) { - return search_kernel<256, + return search_kernel; } else if (itopk_size <= 512) { - return search_kernel<512, + return search_kernel; + return search_kernel; } else if (itopk_size <= 512) { - return search_kernel<512, max_candidates, 0, DATASET_DESCRIPTOR_T, SAMPLE_FILTER_T>; + return search_kernel; } } THROW("No kernel for parametels itopk_size %u, num_itopk_candidates %u", @@ -910,15 +937,12 @@ void select_and_run( SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - const auto dataset_desc_ = - set_compute_template_params(dataset_desc); - using dataset_desc_t = typename std::remove_const::type; auto kernel = - search_kernel_config::choose_itopk_and_mx_candidates( - itopk_size, num_itopk_candidates, block_size); + search_kernel_config:: + choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, - smem_size + dataset_desc_t::smem_buffer_size_in_byte)); + smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( @@ -926,7 +950,7 @@ void select_and_run( kernel<<>>(topk_indices_ptr, topk_distances_ptr, topk, - dataset_desc_, + dataset_desc, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py index bd5f6b278f..e827c06be5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py @@ -77,7 +77,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA 0 COMMA {distance_t} COMMA {idx_t} COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA {distance_t} COMMA {idx_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu index 5b174ddaee..0bd386144c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu index 11503927fb..cd891b8e97 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu index 5ad1f942e6..66e8357498 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu index fdab2893b2..eb84983f9e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu index 01e7dcba4c..c66f8a0ae3 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu index 61afc4731b..2a1783944c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu index 8524a653c6..9fa74f1134 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu index 28f75dcbff..8fc91b5a10 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu index 9c5b161911..4e68c00525 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu index ab13f43868..5fe526ae47 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu index c03a57372d..64c89a880a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu index f4f7148580..c3e2427f57 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu index 15c65830e9..0a8826df1c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu index ba00a5cf7e..8019bec3e3 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu index ad101deec9..1a2a364037 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu index 185fadae2e..2f661538e6 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu index d67903cea2..aec486769f 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu index 2dc54bcb59..03f27085d8 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu index a9761e947a..119d1f2921 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu index fd23a5a5b9..666c676e87 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu index b350696bf0..e53b456a54 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu index c6ecb67efe..2aee739141 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu index ac4ffa356a..daa442b514 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu index e7efd7e305..a19346d19b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu index f78f7ae508..1c1d5381c9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu index 38cf9c85a5..b7402a3c38 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu index c280585101..f493b83bee 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu index bddf03f42a..8efcbe0650 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu index ddb1304325..cb770f44ba 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu index 61e4305ffa..0fd8ab809c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu index 0967bbb039..50cf198883 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu index 1b4db0b1f6..1548ed831e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu index b0d33811f0..c60ea7c87d 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu index 84ee6dc773..4a68e1e43c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu index 887c152c0a..df9fabd6a5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu index 2af58c6211..77075b0a44 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu index 753a3d8f49..374af8b56b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu index b2528e4446..ddb80458fd 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu index e3b691f10d..14e5c5d3dc 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu index aa070a2763..3c1776760a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu index c28b846130..e5a0a8882c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu index 93b3036aee..cee80390e8 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu index 3d0553f043..88678bf4ff 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu index 35a09411ce..baa7ee358a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu index 96aec1b2a2..5c44f052f2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu index 0f19ae504f..127a065fb5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu index 9083453c7c..fcf6985f97 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu index 0d8ec8dde6..f361e771b5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py index 2cbc423b9e..418d528a82 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py @@ -81,7 +81,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA 0 COMMA {distance_t} COMMA {idx_t} COMMA 0>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA {distance_t} COMMA {idx_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu index 9d7d64e544..d61ad0ce15 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu index 5a4ccf7682..410d2377ec 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu index 4bcdc42008..60cd58bab9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu index 9e64faa3c8..dfe5e6f14e 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu index c942fb075a..9a5d862276 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu index dd1a9ac9e1..d92ab50a58 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu index dd50c4b063..aac197d590 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu index b69ffaa988..f38a10e6d0 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu index e5cbe01c51..5523e63038 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu index c8b5cfe3f6..b06ef3d4fd 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu index 69551690e2..1fddee0e06 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu index ba0095892c..2aee442186 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu index 960d2e1f28..7a15e85280 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu index b589c1af8c..efba46c248 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu index 11f21a24a5..990582f18b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu index 0f06167fc2..a55907c66f 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - float COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu index 308684a452..55fd749720 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu index 5a88bae588..4b4063652a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu index a3b58d44cf..bae83dc0fa 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu index bb19e6bd54..99492db344 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu index 78a2ef1cd1..797142e317 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu index 54b277fdb5..9a36c35ae0 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu index 7df4430968..e0a01e84cc 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu index 5bb3ede62b..14de1b8941 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu index 11f81782ae..b1d50fb445 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu index 03ad10016f..c189a91764 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu index 1b1b1551ab..8693ee3716 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu index d3ca5fa44d..216ffd1ec5 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu index 357f4e9ca4..36985d218b 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu index f50a789bf9..8d55fe2b09 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu index 6a7e80dd92..2fdb1cbc20 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu index 2a1817d9f8..6dc3dc2ca8 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - half COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint64_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu index 4f1f51b147..21f8633033 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu index eea96f3d15..1a3867e06f 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu index 14ef0c7a33..9cbb16188a 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu index a88d71554b..305a1754bc 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu index 96dde7ab99..900e1b69d9 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu index 0dcb6fbaf9..a0bb2259f0 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu index 3facd5f91e..09d36a39a0 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu index fc4d49e207..dc9cbb2b56 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - int8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu index ecd7c8c918..c5508a38e2 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu index 9f94847864..7024425155 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 1024, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu index 3474092311..68687bc9cf 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu index 49addc2bc4..60efc55a30 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 8, - 128, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu index 25f2a2ba25..b2dfaac5fe 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu index 1c1ae0e845..891e9ef7cc 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 16, - 256, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu index 8ec11f13fa..91e617204c 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 2 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu index dc6b1f8ce2..a01d497676 100644 --- a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -28,11 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection( - 32, - 512, - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< - uint8_t COMMA half COMMA 8 COMMA 4 COMMA 0 COMMA float COMMA uint32_t COMMA 0>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 472886729b..6f023c39f1 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -71,7 +71,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA 0 COMMA 0 COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 777dab40c6..0e28d7a876 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index ca69079ebd..5e5e80a5de 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 956bd57f78..9039496968 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 5016ca4e29..fe1c7e77e5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index f71447a84e..7ef36baf7d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index 3da7a01d2b..da51c16314 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index a91883c1a3..99a4f7feb7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index 979bf614dc..50cdc97dd7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu index 3b4e0dedd0..b2d9cdb600 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu index 490fabe5cb..d756b295b7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu index 7095a2bcbd..b1e998762c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu index c4d7e89656..e712de6390 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu index 8976924220..282de4a851 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu index 22e120b374..71ef968575 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu index a0e52734f7..7c88406d71 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu index 1637af3973..360635dddb 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index bfd55f004f..3f129bd7cf 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index a4c338e6db..053b73275e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 597e862bdd..a1bb20369a 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index bd2e584e75..dbbc8bdd21 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index a936c424c0..125499e319 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 8a707ccaf2..f2117c4f80 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index 4ebe2e3b77..8e5ba0f98f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 459219c8d1..bea7d25392 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::multi_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 6fedae0fa3..0e809e4dc3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -74,7 +74,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA 0 COMMA 0 COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index 81d242d37f..8a9fc408ee 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index f26ca71b8e..c6f7c90c69 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 048d740dd3..2766286673 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index 99ea62960c..98ee189766 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index 27f06caa8c..c3ea39a729 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 573a5688a8..a53457656c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index cbe53429e9..52318efb85 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index e297abde56..6451fdc7f3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - float COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu index bde6407312..e927fd0878 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu index b256de71e5..3f3d22ee08 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu index 019d64b7cb..a84e5b8bd7 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu index 2da2263380..af4248865b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu index 20dd3857b1..16bd0cb647 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu index a4f2d10f23..afc59c8a59 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu index 7460bd54af..147d31cf85 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu index a1983aa9ad..5624a71c3c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - half COMMA uint64_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index cd5a061c2e..761fb705ba 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index d5845f1cc7..84b76cba53 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index e370b547dd..598fff9cdf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index c5b2841027..e7a1a9d9c6 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - int8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index a16d3cac38..d40b9285fc 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 1024, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index ceaa625f05..073bb350da 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(8, - 128, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index cb8f2c16cf..29b0224b4d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(16, - 256, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 72307f5c39..d9601de2ad 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -28,10 +28,10 @@ #include namespace raft::neighbors::cagra::detail::single_cta_search { -instantiate_kernel_selection(32, - 512, - raft::neighbors::cagra::detail::standard_dataset_descriptor_t< - uint8_t COMMA uint32_t COMMA 0 COMMA 0 COMMA float>, - raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search From 317c67f3fb7b448eec43349bb66faf2904decf43 Mon Sep 17 00:00:00 2001 From: achirkin Date: Wed, 20 Mar 2024 17:09:16 +0100 Subject: [PATCH 22/30] Mark the strided_dataset::view as deleted (pure virtual) to avoid linker errors --- cpp/include/raft/neighbors/dataset.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/dataset.hpp b/cpp/include/raft/neighbors/dataset.hpp index e7a3ba97a4..a6444775f4 100644 --- a/cpp/include/raft/neighbors/dataset.hpp +++ b/cpp/include/raft/neighbors/dataset.hpp @@ -72,7 +72,7 @@ struct strided_dataset : public dataset { return static_cast(v.stride(0) > 0 ? v.stride(0) : v.extent(1)); } /** Get the view of the data. */ - [[nodiscard]] virtual auto view() const noexcept -> view_type; + [[nodiscard]] virtual auto view() const noexcept -> view_type = 0; }; template From 59033c7af6a2ea785d43b7fed876050827109dd7 Mon Sep 17 00:00:00 2001 From: achirkin Date: Wed, 20 Mar 2024 17:16:57 +0100 Subject: [PATCH 23/30] Fix the instances in the tests as well --- .../ann_cagra/search_kernel_uint64_t.cuh | 124 +++++++++--------- 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index 67bc1a713f..5cca6d561a 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -21,37 +21,36 @@ namespace raft::neighbors::cagra::detail { namespace multi_cta_search { -#define instantiate_kernel_selection( \ - DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void select_and_run< \ - TEAM_SIZE, \ - MAX_DATASET_DIM, \ - raft::neighbors::cagra::detail::DATASET_DESCRIPTOR, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::DATASET_DESCRIPTOR \ - dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_kernel_selection(standard_dataset_descriptor_t, @@ -88,38 +87,37 @@ instantiate_kernel_selection(standard_dataset_descriptor_t, namespace single_cta_search { -#define instantiate_single_cta_select_and_run( \ - DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void select_and_run< \ - TEAM_SIZE, \ - MAX_DATASET_DIM, \ - raft::neighbors::cagra::detail::DATASET_DESCRIPTOR, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::DATASET_DESCRIPTOR \ - dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, From 6567186eb62d1bcc48c67db77d90ba2c3b63884e Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Thu, 21 Mar 2024 03:32:16 +0900 Subject: [PATCH 24/30] Fix a bug in VPQ similarity compute --- .../neighbors/detail/cagra/compute_distance_vpq.cuh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index 998cf40f77..fae6ac7db9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -107,7 +107,7 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) pq_codes[e] = *(reinterpret_cast( @@ -120,13 +120,13 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading VQ code-book raft::TxN_t vq_vals[PQ_LEN]; #pragma unroll for (std::uint32_t m = 0; m < PQ_LEN; m += 1) { - const uint32_t d = (vlen * m) + (PQ_LEN * k) + elem_offset; + const uint32_t d = (vlen * m) + (PQ_LEN * k); if (d >= dim) break; vq_vals[m].load( reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); @@ -157,13 +157,13 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t= n_subspace) break; // Loading VQ code-book raft::TxN_t vq_vals[PQ_LEN]; #pragma unroll for (std::uint32_t m = 0; m < PQ_LEN; m++) { - const std::uint32_t d = (vlen * m) + (PQ_LEN * k) + elem_offset; + const std::uint32_t d = (vlen * m) + (PQ_LEN * k); if (d >= dim) break; // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device // memory) From ecb896c40f3161ea776c6e58617081a1781c9945 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 20 Mar 2024 19:37:10 +0100 Subject: [PATCH 25/30] Disable implicit template instantiations for vpq tests --- .../raft/neighbors/detail/refine_host-ext.hpp | 1 + cpp/include/raft/neighbors/refine-ext.cuh | 21 +++++++++----- .../detail/refine_host_float_float.cpp | 1 + cpp/src/neighbors/refine_float_float.cu | 28 +++++++++++-------- cpp/test/neighbors/ann_cagra_vpq.cuh | 3 +- 5 files changed, 33 insertions(+), 21 deletions(-) mode change 100644 => 100755 cpp/test/neighbors/ann_cagra_vpq.cuh diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index 69d2bd29b2..f5c8c73bb9 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -54,6 +54,7 @@ template distance::DistanceType metric); instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index fc57494b22..7948a0e4f2 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -52,7 +52,7 @@ void refine(raft::resources const& handle, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ +#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ extern template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ @@ -60,8 +60,9 @@ void refine(raft::resources const& handle, raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ + raft::distance::DistanceType metric); + +#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ extern template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::host_matrix_view dataset, \ @@ -71,8 +72,14 @@ void refine(raft::resources const& handle, raft::host_matrix_view distances, \ raft::distance::DistanceType metric); -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, int8_t, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, uint8_t, float, int64_t); + +instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, int8_t, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, uint8_t, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_raft_neighbors_refine_d +#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/src/neighbors/detail/refine_host_float_float.cpp b/cpp/src/neighbors/detail/refine_host_float_float.cpp index c596200c0a..e0c8999640 100644 --- a/cpp/src/neighbors/detail/refine_host_float_float.cpp +++ b/cpp/src/neighbors/detail/refine_host_float_float.cpp @@ -25,5 +25,6 @@ distance::DistanceType metric); instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); #undef instantiate_raft_neighbors_refine diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu index ea6892d2c5..5ce901ab61 100644 --- a/cpp/src/neighbors/refine_float_float.cu +++ b/cpp/src/neighbors/refine_float_float.cu @@ -26,7 +26,7 @@ #include -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ +#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ @@ -34,17 +34,21 @@ raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ - template void raft::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ raft::distance::DistanceType metric); -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ + template void raft::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + raft::distance::DistanceType metric); + +instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_raft_neighbors_refine_d +#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh old mode 100644 new mode 100755 index 0dc7e09d38..d5accc2150 --- a/cpp/test/neighbors/ann_cagra_vpq.cuh +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -15,8 +15,6 @@ */ #pragma once -#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Search with filter instantiation - #include "../test_utils.cuh" #include "ann_utils.cuh" @@ -28,6 +26,7 @@ #include #include #include +#include #include #include #include From 1308c61c1552e5db667c63812650c0a9c1565780 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 20 Mar 2024 20:03:12 +0100 Subject: [PATCH 26/30] cagra-vpq enable instantiation of int64 kernels --- cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu index 7f89df3af1..f60edb5ed6 100644 --- a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "../ann_cagra_vpq.cuh" #include From 6d663aeccfc66a8f8149bee77905020fcf7c4cf8 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 20 Mar 2024 20:17:16 +0100 Subject: [PATCH 27/30] Correct copyright year --- cpp/src/neighbors/detail/refine_host_float_float.cpp | 2 +- cpp/src/neighbors/refine_float_float.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/neighbors/detail/refine_host_float_float.cpp b/cpp/src/neighbors/detail/refine_host_float_float.cpp index e0c8999640..09dcae9c3a 100644 --- a/cpp/src/neighbors/detail/refine_host_float_float.cpp +++ b/cpp/src/neighbors/detail/refine_host_float_float.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu index 5ce901ab61..75851eeedb 100644 --- a/cpp/src/neighbors/refine_float_float.cu +++ b/cpp/src/neighbors/refine_float_float.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 0e29876bafb4e597263d1f99fed3bc09536403eb Mon Sep 17 00:00:00 2001 From: Hiroyuki Ootomo Date: Wed, 20 Mar 2024 13:52:24 -0700 Subject: [PATCH 28/30] Update query copy from dmem to smem --- .../detail/cagra/compute_distance.hpp | 15 ++++++++++++++ .../detail/cagra/compute_distance_vpq.cuh | 20 +++++++++++++++++++ .../cagra/search_multi_cta_kernel-inl.cuh | 11 +++------- .../cagra/search_single_cta_kernel-inl.cuh | 11 +++------- 4 files changed, 41 insertions(+), 16 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 2436d0a3ca..eb2fa0905e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -205,6 +205,21 @@ struct standard_dataset_descriptor_t static const std::uint32_t smem_buffer_size_in_byte = 0; __device__ void set_smem_ptr(void* const){}; + template + __device__ void copy_query(const DATA_T* const dmem_query_ptr, + QUERY_T* const smem_query_ptr, + const std::uint32_t query_smem_buffer_length) + { + for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { + unsigned j = device::swizzling(i); + if (i < dim) { + smem_query_ptr[j] = spatial::knn::detail::utils::mapping{}(dmem_query_ptr[i]); + } else { + smem_query_ptr[j] = 0.0; + } + } + } + template __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, const INDEX_T dataset_i, diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index fae6ac7db9..877c936067 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -88,6 +88,26 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t + __device__ void copy_query(const DATA_T* const dmem_query_ptr, + QUERY_T* const smem_query_ptr, + const std::uint32_t query_smem_buffer_length) + { + for (unsigned i = threadIdx.x * 2; i < dim; i += blockDim.x * 2) { + half2 buf2 = {CUDART_ZERO_FP16, CUDART_ZERO_FP16}; + if (i < dim) { buf2.x = static_cast(static_cast(dmem_query_ptr[i])); } + if (i + 1 < dim) { buf2.y = static_cast(static_cast(dmem_query_ptr[i + 1])); } + if ((PQ_BITS == 8) && (PQ_LEN % 2 == 0)) { + // Use swizzling in the condition to reduce bank conflicts in shared + // memory, which are likely to occur when pq_code_book_dim is large. + ((half2*)smem_query_ptr)[device::swizzling(i / 2)] = + buf2; + } else { + (reinterpret_cast(smem_query_ptr + i))[0] = buf2; + } + } + } + template __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, const INDEX_T node_id, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 001b007000..48c22d9d14 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -213,14 +213,9 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( } #endif const DATA_T* const query_ptr = queries_ptr + (dataset_desc.dim * query_id); - for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { - unsigned j = device::swizzling(i); - if (i < dataset_desc.dim) { - query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); - } else { - query_buffer[j] = 0.0; - } - } + dataset_desc.template copy_query( + query_ptr, query_buffer, query_smem_buffer_length); + if (threadIdx.x == 0) { terminate_flag[0] = 0; } INDEX_T* const local_visited_hashmap_ptr = visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * query_id); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 8d8dad6708..a697f9512c 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -549,14 +549,9 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( auto filter_flag = terminate_flag; const DATA_T* const query_ptr = queries_ptr + query_id * dataset_desc.dim; - for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { - unsigned j = device::swizzling(i); - if (i < dataset_desc.dim) { - query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); - } else { - query_buffer[j] = 0.0; - } - } + dataset_desc.template copy_query( + query_ptr, query_buffer, query_smem_buffer_length); + if (threadIdx.x == 0) { terminate_flag[0] = 0; topk_ws[0] = ~0u; From 6ebb99e331287cc15664398259fb1d3bc8e1995d Mon Sep 17 00:00:00 2001 From: achirkin Date: Wed, 20 Mar 2024 22:33:36 +0100 Subject: [PATCH 29/30] Fix query mapping type and usage of a macro that is not available on older cuda --- .../raft/neighbors/detail/cagra/compute_distance.hpp | 2 +- .../raft/neighbors/detail/cagra/compute_distance_vpq.cuh | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index eb2fa0905e..49e14be73d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -213,7 +213,7 @@ struct standard_dataset_descriptor_t for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); if (i < dim) { - smem_query_ptr[j] = spatial::knn::detail::utils::mapping{}(dmem_query_ptr[i]); + smem_query_ptr[j] = spatial::knn::detail::utils::mapping{}(dmem_query_ptr[i]); } else { smem_query_ptr[j] = 0.0; } diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh index 877c936067..0204addba7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -93,10 +93,11 @@ struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t mapping{}; for (unsigned i = threadIdx.x * 2; i < dim; i += blockDim.x * 2) { - half2 buf2 = {CUDART_ZERO_FP16, CUDART_ZERO_FP16}; - if (i < dim) { buf2.x = static_cast(static_cast(dmem_query_ptr[i])); } - if (i + 1 < dim) { buf2.y = static_cast(static_cast(dmem_query_ptr[i + 1])); } + half2 buf2{0, 0}; + if (i < dim) { buf2.x = mapping(dmem_query_ptr[i]); } + if (i + 1 < dim) { buf2.y = mapping(dmem_query_ptr[i + 1]); } if ((PQ_BITS == 8) && (PQ_LEN % 2 == 0)) { // Use swizzling in the condition to reduce bank conflicts in shared // memory, which are likely to occur when pq_code_book_dim is large. From b2cdb6d82e301352c48ea99991e8a385dc58e706 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 21 Mar 2024 02:13:47 +0100 Subject: [PATCH 30/30] Set pq_len=2 as default, do not allow different pq_len for search --- cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 3 +-- cpp/include/raft/neighbors/detail/vpq_dataset.cuh | 2 +- cpp/test/neighbors/ann_cagra_vpq.cuh | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index b927f184e2..d30f69ddcd 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -166,8 +166,7 @@ void launch_vpq_search_main_core( CagraSampleFilterT sample_filter) { RAFT_EXPECTS(vpq_dset->pq_bits() == 8, "Only pq_bits = 8 is supported for now"); - RAFT_EXPECTS(vpq_dset->pq_len() == 2 || vpq_dset->pq_len() == 4, - "Only pq_len 2 or for are supported for now"); + RAFT_EXPECTS(vpq_dset->pq_len() == 2, "Only pq_len 2 is supported for now"); RAFT_EXPECTS(vpq_dset->dim() % vpq_dset->pq_dim() == 0, "dim must be a multiple of pq_dim at the moment"); diff --git a/cpp/include/raft/neighbors/detail/vpq_dataset.cuh b/cpp/include/raft/neighbors/detail/vpq_dataset.cuh index f6cd2a1ceb..f1321ba343 100644 --- a/cpp/include/raft/neighbors/detail/vpq_dataset.cuh +++ b/cpp/include/raft/neighbors/detail/vpq_dataset.cuh @@ -81,7 +81,7 @@ auto fill_missing_params_heuristics(const vpq_params& params, const DatasetT& da vpq_params r = params; double n_rows = dataset.extent(0); size_t dim = dataset.extent(1); - if (r.pq_dim == 0) { r.pq_dim = raft::div_rounding_up_safe(dim, size_t{4}); } + if (r.pq_dim == 0) { r.pq_dim = raft::div_rounding_up_safe(dim, size_t{2}); } if (r.pq_bits == 0) { r.pq_bits = 8; } if (r.vq_n_centers == 0) { r.vq_n_centers = raft::round_up_safe(std::sqrt(n_rows), 8); } if (r.vq_kmeans_trainset_fraction == 0) { diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh index d5accc2150..503b1a413a 100755 --- a/cpp/test/neighbors/ann_cagra_vpq.cuh +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -319,7 +319,7 @@ const std::vector vpq_inputs = raft::util::itertools::product {1000, 10000}, // n_rows {128, 132, 192, 256, 512, 768}, // dim {8, 12}, // k - {2, 4}, // pq_len + {2}, // pq_len {8}, // pq_bits {graph_build_algo::NN_DESCENT}, // build_algo {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, // algo