diff --git a/cpp/include/raft/distance/fused_distance_nn-ext.cuh b/cpp/include/raft/distance/fused_distance_nn-ext.cuh deleted file mode 100644 index 263bbcea81..0000000000 --- a/cpp/include/raft/distance/fused_distance_nn-ext.cuh +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include // raft::KeyValuePair -#include // raft::resources -#include // include initialize and reduce operations -#include // RAFT_EXPLICIT - -#include // int64_t - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -namespace raft { -namespace distance { - -template -void fusedDistanceNNMinReduce(OutT* min, - const DataT* x, - const DataT* y, - const DataT* xn, - const DataT* yn, - IdxT m, - IdxT n, - IdxT k, - void* workspace, - bool sqrt, - bool initOutBuffer, - bool isRowMajor, - raft::distance::DistanceType metric, - float metric_arg, - cudaStream_t stream) RAFT_EXPLICIT; - -} // namespace distance -} // namespace raft - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_distance_fusedDistanceNNMinReduce(DataT, OutT, IdxT) \ - extern template void raft::distance::fusedDistanceNNMinReduce( \ - OutT * min, \ - const DataT* x, \ - const DataT* y, \ - const DataT* xn, \ - const DataT* yn, \ - IdxT m, \ - IdxT n, \ - IdxT k, \ - void* workspace, \ - bool sqrt, \ - bool initOutBuffer, \ - bool isRowMajor, \ - raft::distance::DistanceType metric, \ - float metric_arg, \ - cudaStream_t stream) - -instantiate_raft_distance_fusedDistanceNNMinReduce(float, float, int); -instantiate_raft_distance_fusedDistanceNNMinReduce(float, float, int64_t); - -// We can't have comma's in the macro expansion, so we use the COMMA macro: -#define COMMA , - -instantiate_raft_distance_fusedDistanceNNMinReduce(float, raft::KeyValuePair, int); -instantiate_raft_distance_fusedDistanceNNMinReduce(float, - raft::KeyValuePair, - int64_t); - -#undef COMMA - -#undef instantiate_raft_distance_fusedDistanceNNMinReduce diff --git a/cpp/include/raft/distance/fused_distance_nn.cuh b/cpp/include/raft/distance/fused_distance_nn.cuh index 04c42e49a1..25b1ae01ea 100755 --- a/cpp/include/raft/distance/fused_distance_nn.cuh +++ b/cpp/include/raft/distance/fused_distance_nn.cuh @@ -15,10 +15,4 @@ */ #pragma once -#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY -#include "fused_distance_nn-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "fused_distance_nn-ext.cuh" -#endif +#include "fused_distance_nn-inl.cuh" \ No newline at end of file diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh deleted file mode 100644 index 4800f2e3cf..0000000000 --- a/cpp/include/raft/linalg/detail/coalesced_reduction-ext.cuh +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// The explicit instantiation of raft::linalg::detail::coalescedReduction is not -// forced because there would be too many instances. Instead, we cover the most -// common instantiations with extern template instantiations below. - -#define instantiate_raft_linalg_detail_coalescedReduction( \ - InType, OutType, IdxType, MainLambda, ReduceLambda, FinalLambda) \ - extern template void raft::linalg::detail::coalescedReduction(OutType* dots, \ - const InType* data, \ - IdxType D, \ - IdxType N, \ - OutType init, \ - cudaStream_t stream, \ - bool inplace, \ - MainLambda main_op, \ - ReduceLambda reduce_op, \ - FinalLambda final_op) - -instantiate_raft_linalg_detail_coalescedReduction( - double, double, int, raft::identity_op, raft::min_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - double, double, int, raft::sq_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - double, double, int, raft::sq_op, raft::add_op, raft::sqrt_op); -instantiate_raft_linalg_detail_coalescedReduction( - double, double, int, raft::abs_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - double, double, int, raft::abs_op, raft::max_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, size_t, raft::abs_op, raft::add_op, raft::sqrt_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, int, raft::abs_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, int, raft::identity_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, int, raft::identity_op, raft::min_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, int, raft::sq_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, int, raft::sq_op, raft::add_op, raft::sqrt_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, long, raft::sq_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, size_t, raft::identity_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, size_t, raft::sq_op, raft::add_op, raft::identity_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, size_t, raft::abs_op, raft::max_op, raft::sqrt_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, size_t, raft::sq_op, raft::add_op, raft::sqrt_op); -instantiate_raft_linalg_detail_coalescedReduction( - float, float, unsigned int, raft::sq_op, raft::add_op, raft::identity_op); - -#undef instantiate_raft_linalg_detail_coalescedReduction diff --git a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh index 3e6b17978b..d24c2a7444 100644 --- a/cpp/include/raft/linalg/detail/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/detail/coalesced_reduction.cuh @@ -16,11 +16,4 @@ #pragma once -// Always include inline definitions of coalesced reduction, because we do not -// force explicit instantion. #include "coalesced_reduction-inl.cuh" - -// Do include the extern template instantiations when possible. -#ifdef RAFT_COMPILED -#include "coalesced_reduction-ext.cuh" -#endif diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh deleted file mode 100644 index 35f4f0e1c9..0000000000 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include // none_cagra_sample_filter -#include // RAFT_EXPLICIT - -#include - -namespace raft::neighbors::cagra::detail { -namespace multi_cta_search { - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -template -void select_and_run( - DATASET_DESCRIPTOR_T dataset_desc, - raft::device_matrix_view graph, - typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, - typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, - const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, - const uint32_t num_queries, - const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, - uint32_t* const num_executed_iterations, - uint32_t topk, - uint32_t block_size, - uint32_t result_buffer_size, - uint32_t smem_size, - int64_t hash_bitlen, - typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, - uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, - uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, - SAMPLE_FILTER_T sample_filter, - raft::distance::DistanceType metric, - cudaStream_t stream) RAFT_EXPLICIT; -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void select_and_run< \ - TEAM_SIZE, \ - MAX_DATASET_DIM, \ - raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ - dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - raft::distance::DistanceType metric, \ - cudaStream_t stream); - -instantiate_kernel_selection( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection - -#define instantiate_q_kernel_selection(TEAM_SIZE, \ - MAX_DATASET_DIM, \ - CODE_BOOK_T, \ - PQ_BITS, \ - PQ_CODE_BOOK_DIM, \ - DATA_T, \ - INDEX_T, \ - DISTANCE_T, \ - SAMPLE_FILTER_T) \ - extern template void \ - select_and_run, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset_desc, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - raft::distance::DistanceType metric, \ - cudaStream_t stream); - -instantiate_q_kernel_selection( - 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection( - 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection( - 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 2, - half, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection( - 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection( - 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection( - 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 4, - half, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_kernel_selection( - 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(16, - 256, - half, - 8, - 2, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 512, - half, - 8, - 2, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 2, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection( - 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(16, - 256, - half, - 8, - 4, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 512, - half, - 8, - 4, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 4, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_kernel_selection(8, - 128, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(16, - 256, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 512, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(8, - 128, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(16, - 256, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 512, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_kernel_selection(8, - 128, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(16, - 256, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 512, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(8, - 128, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(16, - 256, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 512, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_kernel_selection(32, - 1024, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_q_kernel_selection -} // namespace multi_cta_search -} // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel.cuh index e003907292..3dc0745e6d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel.cuh @@ -15,10 +15,4 @@ */ #pragma once -#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "search_multi_cta_kernel-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "search_multi_cta_kernel-ext.cuh" -#endif diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh deleted file mode 100644 index 510219ab5d..0000000000 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ /dev/null @@ -1,602 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include // RAFT_EXPLICIT - -#include - -namespace raft::neighbors::cagra::detail { -namespace single_cta_search { - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -template -void select_and_run( // raft::resources const& res, - DATASET_DESCRIPTOR_T dataset_desc, - raft::device_matrix_view graph, - typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] - typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const uint32_t num_queries, - const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk, - uint32_t num_itopk_candidates, - uint32_t block_size, - uint32_t smem_size, - int64_t hash_bitlen, - typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, - size_t small_hash_bitlen, - size_t small_hash_reset_interval, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, - uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, - SAMPLE_FILTER_T sample_filter, - raft::distance::DistanceType metric, - cudaStream_t stream) RAFT_EXPLICIT; - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void select_and_run< \ - TEAM_SIZE, \ - MAX_DATASET_DIM, \ - raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ - dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - raft::distance::DistanceType metric, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_select_and_run - -#define instantiate_q_single_cta_select_and_run(TEAM_SIZE, \ - MAX_DATASET_DIM, \ - CODE_BOOK_T, \ - PQ_BITS, \ - PQ_CODE_BOOK_DIM, \ - DATA_T, \ - INDEX_T, \ - DISTANCE_T, \ - SAMPLE_FILTER_T) \ - extern template void \ - select_and_run, \ - SAMPLE_FILTER_T>( \ - raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - raft::distance::DistanceType metric, \ - cudaStream_t stream); - -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - half, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - half, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 2, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 2, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 4, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 4, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - float, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 16, 256, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 512, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 1024, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 16, 256, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 512, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 1024, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 16, 256, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 512, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - float, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 16, 256, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 32, 512, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - float, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run(8, - 128, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(8, - 128, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - uint8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run(8, - 128, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(8, - 128, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - int8_t, - uint32_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run(8, - 128, - half, - 8, - 2, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 2, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 2, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(8, - 128, - half, - 8, - 4, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 4, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 4, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - uint8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 2, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 2, - int8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 2, - int8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 2, - int8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run( - 8, 128, half, 8, 4, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(16, - 256, - half, - 8, - 4, - int8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 512, - half, - 8, - 4, - int8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_q_single_cta_select_and_run(32, - 1024, - half, - 8, - 4, - int8_t, - int64_t, - float, - raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_q_single_cta_select_and_run - -} // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel.cuh index 1d8fd8e30a..3e72fbf8e8 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel.cuh @@ -15,10 +15,4 @@ */ #pragma once -#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "search_single_cta_kernel-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "search_single_cta_kernel-ext.cuh" -#endif diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh deleted file mode 100644 index 140a9f17c8..0000000000 --- a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include // raft::neighbors::ivf_flat::index -#include // none_ivf_sample_filter -#include // RAFT_EXPLICIT - -#include // rmm:cuda_stream_view - -#include - -#include // uintX_t - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -namespace raft::neighbors::ivf_flat::detail { - -auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k) -> bool; - -template -void ivfflat_interleaved_scan(const raft::neighbors::ivf_flat::index& index, - const T* queries, - const uint32_t* coarse_query_results, - const uint32_t n_queries, - const uint32_t queries_offset, - const raft::distance::DistanceType metric, - const uint32_t n_probes, - const uint32_t k, - const uint32_t max_samples, - const uint32_t* chunk_indices, - const bool select_min, - IvfSampleFilterT sample_filter, - uint32_t* neighbors, - float* distances, - uint32_t& grid_dim_x, - rmm::cuda_stream_view stream) RAFT_EXPLICIT; - -} // namespace raft::neighbors::ivf_flat::detail - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( \ - T, AccT, IdxT, IvfSampleFilterT) \ - extern template void \ - raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan( \ - const raft::neighbors::ivf_flat::index& index, \ - const T* queries, \ - const uint32_t* coarse_query_results, \ - const uint32_t n_queries, \ - const uint32_t queries_offset, \ - const raft::distance::DistanceType metric, \ - const uint32_t n_probes, \ - const uint32_t k, \ - const uint32_t max_samples, \ - const uint32_t* chunk_indices, \ - const bool select_min, \ - IvfSampleFilterT sample_filter, \ - uint32_t* neighbors, \ - float* distances, \ - uint32_t& grid_dim_x, \ - rmm::cuda_stream_view stream) - -instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( - float, float, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( - half, half, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( - int8_t, int32_t, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan( - uint8_t, uint32_t, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); - -#undef instantiate_raft_neighbors_ivf_flat_detail_ivfflat_interleaved_scan diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh index 63f341dd9a..11d7da851a 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan.cuh @@ -16,10 +16,4 @@ #pragma once -#if !defined(RAFT_EXPLICIT_INSTANTIATE_ONLY) #include "ivf_flat_interleaved_scan-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "ivf_flat_interleaved_scan-ext.cuh" -#endif diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh deleted file mode 100644 index c14b0e810f..0000000000 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-ext.cuh +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include // raft::neighbors::ivf_flat::index -#include // none_ivf_sample_filter -#include // RAFT_EXPLICIT - -#include - -#include - -#include // uintX_t - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -namespace raft::neighbors::ivf_flat::detail { - -template -void search(raft::resources const& handle, - const search_params& params, - const raft::neighbors::ivf_flat::index& index, - const T* queries, - uint32_t n_queries, - uint32_t k, - IdxT* neighbors, - float* distances, - rmm::device_async_resource_ref mr, - IvfSampleFilterT sample_filter = IvfSampleFilterT()) RAFT_EXPLICIT; - -} // namespace raft::neighbors::ivf_flat::detail - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_neighbors_ivf_flat_detail_search(T, IdxT, IvfSampleFilterT) \ - extern template void raft::neighbors::ivf_flat::detail::search( \ - raft::resources const& handle, \ - const search_params& params, \ - const raft::neighbors::ivf_flat::index& index, \ - const T* queries, \ - uint32_t n_queries, \ - uint32_t k, \ - IdxT* neighbors, \ - float* distances, \ - rmm::device_async_resource_ref mr, \ - IvfSampleFilterT sample_filter) - -instantiate_raft_neighbors_ivf_flat_detail_search( - float, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_search( - half, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_search( - int8_t, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); -instantiate_raft_neighbors_ivf_flat_detail_search( - uint8_t, int64_t, raft::neighbors::filtering::none_ivf_sample_filter); - -#undef instantiate_raft_neighbors_ivf_flat_detail_search diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh index 7b03ebeab6..56e58bac27 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search.cuh @@ -15,10 +15,4 @@ */ #pragma once -#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "ivf_flat_search-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "ivf_flat_search-ext.cuh" -#endif diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh deleted file mode 100644 index 5e1a9b46d6..0000000000 --- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-ext.cuh +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include // RAFT_WEAK_FUNCTION -#include // raft::distance::DistanceType -#include // raft::neighbors::ivf_pq::detail::fp_8bit -#include // raft::neighbors::ivf_pq::codebook_gen -#include // none_ivf_sample_filter -#include // RAFT_EXPLICIT - -#include // rmm::cuda_stream_view - -#include // __half - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -namespace raft::neighbors::ivf_pq::detail { - -// is_local_topk_feasible is not inline here, because we would have to define it -// here as well. That would run the risk of the definitions here and in the -// -inl.cuh header diverging. -auto RAFT_WEAK_FUNCTION is_local_topk_feasible(uint32_t k, uint32_t n_probes, uint32_t n_queries) - -> bool; - -template -RAFT_KERNEL compute_similarity_kernel(uint32_t dim, - uint32_t n_probes, - uint32_t pq_dim, - uint32_t n_queries, - uint32_t queries_offset, - distance::DistanceType metric, - codebook_gen codebook_kind, - uint32_t topk, - uint32_t max_samples, - const float* cluster_centers, - const float* pq_centers, - const uint8_t* const* pq_dataset, - const uint32_t* cluster_labels, - const uint32_t* _chunk_indices, - const float* queries, - const uint32_t* index_list, - float* query_kths, - IvfSampleFilterT sample_filter, - LutT* lut_scores, - OutT* _out_scores, - uint32_t* _out_indices) RAFT_EXPLICIT; - -// The signature of the kernel defined by a minimal set of template parameters -template -using compute_similarity_kernel_t = - decltype(&compute_similarity_kernel); - -template -struct selected { - compute_similarity_kernel_t kernel; - dim3 grid_dim; - dim3 block_dim; - size_t smem_size; - size_t device_lut_size; -}; - -template -void compute_similarity_run(selected s, - rmm::cuda_stream_view stream, - uint32_t dim, - uint32_t n_probes, - uint32_t pq_dim, - uint32_t n_queries, - uint32_t queries_offset, - distance::DistanceType metric, - codebook_gen codebook_kind, - uint32_t topk, - uint32_t max_samples, - const float* cluster_centers, - const float* pq_centers, - const uint8_t* const* pq_dataset, - const uint32_t* cluster_labels, - const uint32_t* _chunk_indices, - const float* queries, - const uint32_t* index_list, - float* query_kths, - IvfSampleFilterT sample_filter, - LutT* lut_scores, - OutT* _out_scores, - uint32_t* _out_indices) RAFT_EXPLICIT; - -/** - * Use heuristics to choose an optimal instance of the search kernel. - * It selects among a few kernel variants (with/out using shared mem for - * lookup tables / precomputed distances) and tries to choose the block size - * to maximize kernel occupancy. - * - * @param manage_local_topk - * whether use the fused calculate+select or just calculate the distances for each - * query and probed cluster. - * - * @param locality_hint - * beyond this limit do not consider increasing the number of active blocks per SM - * would improve locality anymore. - */ -template -auto compute_similarity_select(const cudaDeviceProp& dev_props, - bool manage_local_topk, - int locality_hint, - double preferred_shmem_carveout, - uint32_t pq_bits, - uint32_t pq_dim, - uint32_t precomp_data_count, - uint32_t n_queries, - uint32_t n_probes, - uint32_t topk) - -> selected RAFT_EXPLICIT; - -} // namespace raft::neighbors::ivf_pq::detail - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( \ - OutT, LutT, IvfSampleFilterT) \ - extern template auto \ - raft::neighbors::ivf_pq::detail::compute_similarity_select( \ - const cudaDeviceProp& dev_props, \ - bool manage_local_topk, \ - int locality_hint, \ - double preferred_shmem_carveout, \ - uint32_t pq_bits, \ - uint32_t pq_dim, \ - uint32_t precomp_data_count, \ - uint32_t n_queries, \ - uint32_t n_probes, \ - uint32_t topk) \ - ->raft::neighbors::ivf_pq::detail::selected; \ - \ - extern template void \ - raft::neighbors::ivf_pq::detail::compute_similarity_run( \ - raft::neighbors::ivf_pq::detail::selected s, \ - rmm::cuda_stream_view stream, \ - uint32_t dim, \ - uint32_t n_probes, \ - uint32_t pq_dim, \ - uint32_t n_queries, \ - uint32_t queries_offset, \ - raft::distance::DistanceType metric, \ - raft::neighbors::ivf_pq::codebook_gen codebook_kind, \ - uint32_t topk, \ - uint32_t max_samples, \ - const float* cluster_centers, \ - const float* pq_centers, \ - const uint8_t* const* pq_dataset, \ - const uint32_t* cluster_labels, \ - const uint32_t* _chunk_indices, \ - const float* queries, \ - const uint32_t* index_list, \ - float* query_kths, \ - IvfSampleFilterT sample_filter, \ - LutT* lut_scores, \ - OutT* _out_scores, \ - uint32_t* _out_indices); - -#define COMMA , -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - half, - raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - half, - raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - half, - half, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - float, - half, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - float, - float, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - float, - raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA false>, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); -instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select( - float, - raft::neighbors::ivf_pq::detail::fp_8bit<5u COMMA true>, - raft::neighbors::filtering::ivf_to_sample_filter< - int64_t COMMA raft::neighbors::filtering::none_ivf_sample_filter>); - -#undef COMMA - -#undef instantiate_raft_neighbors_ivf_pq_detail_compute_similarity_select diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh index d987c0d4ed..467d389d38 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity.cuh @@ -16,10 +16,4 @@ #pragma once -#if !defined(RAFT_EXPLICIT_INSTANTIATE_ONLY) #include "ivf_pq_compute_similarity-inl.cuh" -#endif - -#ifdef RAFT_COMPILED -#include "ivf_pq_compute_similarity-ext.cuh" -#endif diff --git a/cpp/include/raft/sparse/matrix/detail/select_k-ext.cuh b/cpp/include/raft/sparse/matrix/detail/select_k-ext.cuh deleted file mode 100644 index 01625a0ce8..0000000000 --- a/cpp/include/raft/sparse/matrix/detail/select_k-ext.cuh +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include // RAFT_EXPLICIT - -#include // __half - -#include // uint32_t - -#ifdef RAFT_EXPLICIT_INSTANTIATE_ONLY - -namespace raft::sparse::matrix::detail { - -template -void select_k(raft::resources const& handle, - raft::device_csr_matrix_view in_val, - std::optional> in_idx, - raft::device_matrix_view out_val, - raft::device_matrix_view out_idx, - bool select_min, - bool sorted = false, - raft::matrix::SelectAlgo algo = raft::matrix::SelectAlgo::kAuto) RAFT_EXPLICIT; -} // namespace raft::sparse::matrix::detail - -#endif // RAFT_EXPLICIT_INSTANTIATE_ONLY - -#define instantiate_raft_sparse_matrix_detail_select_k(T, IdxT) \ - extern template void raft::sparse::matrix::detail::select_k( \ - raft::resources const& handle, \ - raft::device_csr_matrix_view in_val, \ - std::optional> in_idx, \ - raft::device_matrix_view out_val, \ - raft::device_matrix_view out_idx, \ - bool select_min, \ - bool sorted, \ - raft::matrix::SelectAlgo algo) - -instantiate_raft_sparse_matrix_detail_select_k(__half, uint32_t); -instantiate_raft_sparse_matrix_detail_select_k(__half, int64_t); -instantiate_raft_sparse_matrix_detail_select_k(float, int64_t); -instantiate_raft_sparse_matrix_detail_select_k(float, uint32_t); -instantiate_raft_sparse_matrix_detail_select_k(float, int); -instantiate_raft_sparse_matrix_detail_select_k(double, int64_t); -instantiate_raft_sparse_matrix_detail_select_k(double, uint32_t); - -#undef instantiate_raft_sparse_matrix_detail_select_k diff --git a/cpp/include/raft/sparse/matrix/detail/select_k.cuh b/cpp/include/raft/sparse/matrix/detail/select_k.cuh index 5d52b94b2f..31a4b54a94 100644 --- a/cpp/include/raft/sparse/matrix/detail/select_k.cuh +++ b/cpp/include/raft/sparse/matrix/detail/select_k.cuh @@ -15,11 +15,4 @@ */ #pragma once -#ifndef RAFT_EXPLICIT_INSTANTIATE_ONLY #include "select_k-inl.cuh" - -#endif - -#ifdef RAFT_COMPILED -#include "select_k-ext.cuh" -#endif