Update to CCCL 2.7.0-rc2. (#17233)

This PR updates to CCCL 2.7.0-rc2. Do not merge until all of RAPIDS is ready to update. Depends on rapidsai/rapids-cmake#710 and should be admin-merged immediately after that PR. Part of rapidsai/build-planning#115. --------- Co-authored-by: Michael Schellenberger Costa <[email protected]>
rapidsai · Dec 4, 2024 · 38820ff · 38820ff
1 parent 6440207
commit 38820ff
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 54 deletions.
diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json
@@ -3,11 +3,6 @@
   "packages" : {
     "CCCL" : {
       "patches" : [
-        {
-          "file" : "${current_json_dir}/cccl_symbol_visibility.diff",
-          "issue" : "Correct symbol visibility issues in libcudacxx [https://github.com/NVIDIA/cccl/pull/1832/]",
-          "fixed_in" : "2.6"
-        },
         {
           "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
           "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",

diff --git a/cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff b/cpp/cmake/thirdparty/patches/cccl_symbol_visibility.diff
diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
@@ -1,25 +1,59 @@
 diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
-index 2a3cc4e33..8fb337b26 100644
+index 971b93d62..0d6b25b07 100644
 --- a/thrust/thrust/system/cuda/detail/dispatch.h
 +++ b/thrust/thrust/system/cuda/detail/dispatch.h
-@@ -44,8 +44,7 @@
-   }                                                                                   \
-   else                                                                                \
-   {                                                                                   \
--    auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
--    status                             = call arguments;                              \
+@@ -36,16 +36,15 @@
+  * that callables for both branches consist of the same tokens, and is intended to be used with Thrust-style dispatch
+  * interfaces, that always deduce the size type from the arguments.
+  */
+-#define THRUST_INDEX_TYPE_DISPATCH(status, call, count, arguments)         \
+-  if (count <= thrust::detail::integer_traits<std::int32_t>::const_max)    \
+-  {                                                                        \
+-    auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int32_t>(count); \
+-    status                             = call arguments;                   \
+-  }                                                                        \
+-  else                                                                     \
+-  {                                                                        \
+-    auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int64_t>(count); \
+-    status                             = call arguments;                   \
++#define THRUST_INDEX_TYPE_DISPATCH(status, call, count, arguments)                                 \
++  if (count <= thrust::detail::integer_traits<std::int32_t>::const_max)                            \
++  {                                                                                                \
++    auto THRUST_PP_CAT2(count, _fixed) = static_cast<std::int32_t>(count);                         \
++    status                             = call arguments;                                           \
++  }                                                                                                \
++  else                                                                                             \
++  {                                                                                                \
 +    throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
    }
- 
+
  /**
-@@ -66,9 +65,7 @@
-   }                                                                                          \
-   else                                                                                       \
-   {                                                                                          \
--    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1);      \
--    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2);      \
--    status                              = call arguments;                                    \
+@@ -55,18 +54,16 @@
+  *
+  * This version of the macro supports providing two count variables, which is necessary for set algorithms.
+  */
+-#define THRUST_DOUBLE_INDEX_TYPE_DISPATCH(status, call, count1, count2, arguments) \
+-  if (count1 + count2 <= thrust::detail::integer_traits<std::int32_t>::const_max)  \
+-  {                                                                                \
+-    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int32_t>(count1);       \
+-    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int32_t>(count2);       \
+-    status                              = call arguments;                          \
+-  }                                                                                \
+-  else                                                                             \
+-  {                                                                                \
+-    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int64_t>(count1);       \
+-    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int64_t>(count2);       \
+-    status                              = call arguments;                          \
++#define THRUST_DOUBLE_INDEX_TYPE_DISPATCH(status, call, count1, count2, arguments)                        \
++  if (count1 + count2 <= thrust::detail::integer_traits<std::int32_t>::const_max)                         \
++  {                                                                                                       \
++    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<std::int32_t>(count1);                              \
++    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<std::int32_t>(count2);                              \
++    status                              = call arguments;                                                 \
++  }                                                                                                       \
++  else                                                                                                    \
++  {                                                                                                       \
 +    throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
    }
+
  /**
-  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
@@ -1,20 +1,20 @@
 diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
-index eb76ebb0b..c6c529a50 100644
+index 29510db5e..cf57e5786 100644
 --- a/cub/cub/block/block_merge_sort.cuh
 +++ b/cub/cub/block/block_merge_sort.cuh
 @@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
    KeyT key1 = keys_shared[keys1_beg];
    KeyT key2 = keys_shared[keys2_beg];
- 
+
 -#pragma unroll
 +#pragma unroll 1
    for (int item = 0; item < ITEMS_PER_THREAD; ++item)
    {
-     bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
-@@ -376,7 +376,7 @@ public:
+     const bool p  = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
+@@ -374,7 +374,7 @@ public:
        //
        KeyT max_key = oob_default;
- 
+
 -#pragma unroll
 +#pragma unroll 1
        for (int item = 1; item < ITEMS_PER_THREAD; ++item)
@@ -27,7 +27,7 @@ index 7d9e8622f..da5627306 100644
 @@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
  {
    constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;
- 
+
 -#pragma unroll
 +#pragma unroll 1
    for (int i = 0; i < ITEMS_PER_THREAD; ++i)