From 673a5bbd5c968d7768e1677ac8e7f4dc40dfaf8b Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 24 Oct 2024 17:25:25 -0400 Subject: [PATCH] Allow compilation with CUDA 12.6.1 (#2469) The 12.6.1 CUDA compiler has issues with enable_if inside the template arguments of some kernels. We can simplify kernel logic and remove the usage of enable_if. Authors: - Robert Maynard (https://github.com/robertmaynard) - Paul Taylor (https://github.com/trxcllnt) Approvers: - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/raft/pull/2469 --- .../raft/matrix/detail/columnWiseSort.cuh | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/cpp/include/raft/matrix/detail/columnWiseSort.cuh b/cpp/include/raft/matrix/detail/columnWiseSort.cuh index a02621d054..9e94724d3d 100644 --- a/cpp/include/raft/matrix/detail/columnWiseSort.cuh +++ b/cpp/include/raft/matrix/detail/columnWiseSort.cuh @@ -72,12 +72,11 @@ RAFT_KERNEL devOffsetKernel(T* in, T value, int n_times) } // block level radix sort - can only sort as much data we can fit within shared memory -template < - typename InType, - typename OutType, - int BLOCK_SIZE, - int ITEMS_PER_THREAD, - typename std::enable_if::IsValid, InType>::type* = nullptr> +template ::IsValid, bool> = true> RAFT_KERNEL __launch_bounds__(1024, 1) devKeyValSortColumnPerRow(const InType* inputKeys, InType* outputKeys, OutType* inputVals, @@ -120,12 +119,11 @@ RAFT_KERNEL __launch_bounds__(1024, 1) devKeyValSortColumnPerRow(const InType* i } } -template < - typename InType, - typename OutType, - int BLOCK_SIZE, - int ITEMS_PER_THREAD, - typename std::enable_if::IsValid), InType>::type* = nullptr> +template ::IsValid, bool> = true> RAFT_KERNEL devKeyValSortColumnPerRow(const InType* inputKeys, InType* outputKeys, OutType* inputVals,