Merge remote-tracking branch 'upstream/branch-25.02' into feat/remove…

…_expt_constexpr
rapidsai · Dec 12, 2024 · a3b68cd · a3b68cd
2 parents a9c4a37 + 78e5c0d
commit a3b68cd
Show file tree

Hide file tree

Showing 49 changed files with 400 additions and 1,258 deletions.
diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml
@@ -12,7 +12,7 @@ jobs:
   trigger-notifier:
     if: contains(github.event.pull_request.labels.*.name, 'breaking')
     secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-24.12
+    uses: rapidsai/shared-workflows/.github/workflows/breaking-change-alert.yaml@branch-25.02
     with:
       sender_login: ${{ github.event.sender.login }}
       sender_avatar: ${{ github.event.sender.avatar_url }}

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -277,7 +277,7 @@ rapids_cpm_init()
 # Not using rapids-cmake since we never want to find, always download.
 CPMAddPackage(
   NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW TRUE GIT_TAG
-  14bb233d2420f7187a690f0bb528ec0420c70d48
+  c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
 )
 rapids_make_logger(cudf EXPORT_SET cudf-exports)
 

diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -34,6 +34,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
   cudf::size_type const num_rows    = state.get_int64("NumRows");
   auto const keep                   = get_keep(state.get_string("keep"));
   cudf::size_type const cardinality = state.get_int64("cardinality");
+  auto const null_probability       = state.get_float64("null_probability");
 
   if (cardinality > num_rows) {
     state.skip("cardinality > num_rows");
@@ -42,7 +43,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
 
   data_profile profile = data_profile_builder()
                            .cardinality(cardinality)
-                           .null_probability(0.01)
+                           .null_probability(null_probability)
                            .distribution(cudf::type_to_id<Type>(),
                                          distribution_id::UNIFORM,
                                          static_cast<Type>(0),
@@ -65,6 +66,7 @@ using data_type = nvbench::type_list<int32_t, int64_t>;
 NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
   .set_name("distinct")
   .set_type_axes_names({"Type"})
+  .add_float64_axis("null_probability", {0.01})
   .add_string_axis("keep", {"any", "first", "last", "none"})
   .add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000})
   .add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000});

diff --git a/cpp/benchmarks/text/minhash.cpp b/cpp/benchmarks/text/minhash.cpp
@@ -54,9 +54,8 @@ static void bench_minhash(nvbench::state& state)
   state.add_global_memory_writes<nvbench::int32_t>(num_rows);  // output are hashes
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto result = base64
-                    ? nvtext::minhash64_permuted(input, 0, parameters_a, parameters_b, hash_width)
-                    : nvtext::minhash_permuted(input, 0, parameters_a, parameters_b, hash_width);
+    auto result = base64 ? nvtext::minhash64(input, 0, parameters_a, parameters_b, hash_width)
+                         : nvtext::minhash(input, 0, parameters_a, parameters_b, hash_width);
   });
 }
 

diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -44,10 +44,11 @@ __launch_bounds__(block_size) CUDF_KERNEL
                            mutable_column_device_view out,
                            size_type* __restrict__ const valid_count)
 {
-  auto tidx                      = cudf::detail::grid_1d::global_thread_id<block_size>();
-  auto const stride              = cudf::detail::grid_1d::grid_stride<block_size>();
-  int const warp_id              = tidx / cudf::detail::warp_size;
-  size_type const warps_per_grid = gridDim.x * block_size / cudf::detail::warp_size;
+  auto tidx = cudf::detail::grid_1d::global_thread_id<block_size>();
+
+  auto const stride         = cudf::detail::grid_1d::grid_stride<block_size>();
+  auto const warp_id        = tidx / cudf::detail::warp_size;
+  auto const warps_per_grid = stride / cudf::detail::warp_size;
 
   // begin/end indices for the column data
   size_type const begin = 0;
@@ -60,7 +61,7 @@ __launch_bounds__(block_size) CUDF_KERNEL
 
   // lane id within the current warp
   constexpr size_type leader_lane{0};
-  int const lane_id = threadIdx.x % cudf::detail::warp_size;
+  auto const lane_id = threadIdx.x % cudf::detail::warp_size;
 
   size_type warp_valid_count{0};
 

diff --git a/cpp/include/cudf/detail/get_value.cuh b/cpp/include/cudf/detail/get_value.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
@@ -48,11 +49,9 @@ T get_value(column_view const& col_view, size_type element_index, rmm::cuda_stre
   CUDF_EXPECTS(data_type(type_to_id<T>()) == col_view.type(), "get_value data type mismatch");
   CUDF_EXPECTS(element_index >= 0 && element_index < col_view.size(),
                "invalid element_index value");
-  T result;
-  CUDF_CUDA_TRY(cudaMemcpyAsync(
-    &result, col_view.data<T>() + element_index, sizeof(T), cudaMemcpyDefault, stream.value()));
-  stream.synchronize();
-  return result;
+  return cudf::detail::make_host_vector_sync(
+           device_span<T const>{col_view.data<T>() + element_index, 1}, stream)
+    .front();
 }
 
 }  // namespace detail

diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh
@@ -16,6 +16,8 @@
 #pragma once
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/utilities/cuda_memcpy.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -251,7 +253,7 @@ auto contiguous_copy_column_device_views(HostTableView source_view, rmm::cuda_st
   // A buffer of CPU memory is allocated to hold the ColumnDeviceView
   // objects. Once filled, the CPU memory is then copied to device memory
   // and the pointer is set in the d_columns member.
-  std::vector<int8_t> h_buffer(padded_views_size_bytes);
+  auto h_buffer = cudf::detail::make_host_vector<int8_t>(padded_views_size_bytes, stream);
   // Each ColumnDeviceView instance may have child objects which may
   // require setting some internal device pointers before being copied
   // from CPU to device.
@@ -266,8 +268,10 @@ auto contiguous_copy_column_device_views(HostTableView source_view, rmm::cuda_st
   auto d_columns = detail::child_columns_to_device_array<ColumnDeviceView>(
     source_view.begin(), source_view.end(), h_ptr, d_ptr);
 
-  CUDF_CUDA_TRY(cudaMemcpyAsync(d_ptr, h_ptr, views_size_bytes, cudaMemcpyDefault, stream.value()));
-  stream.synchronize();
+  auto const h_span = host_span<int8_t const>{h_buffer}.subspan(
+    static_cast<int8_t const*>(h_ptr) - h_buffer.data(), views_size_bytes);
+  auto const d_span = device_span<int8_t>{static_cast<int8_t*>(d_ptr), views_size_bytes};
+  cudf::detail::cuda_memcpy(d_span, h_span, stream);
   return std::make_tuple(std::move(descendant_storage), d_columns);
 }
 

diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp
@@ -31,69 +31,6 @@ namespace CUDF_EXPORT nvtext {
  * @file
  */
 
-/**
- * @brief Returns the minhash value for each string
- *
- * Hash values are computed from substrings of each string and the
- * minimum hash value is returned for each string.
- *
- * Any null row entries result in corresponding null output rows.
- *
- * This function uses MurmurHash3_x86_32 for the hash algorithm.
- *
- * @deprecated Deprecated in 24.12
- *
- * @throw std::invalid_argument if the width < 2
- *
- * @param input Strings column to compute minhash
- * @param seed  Seed value used for the hash algorithm
- * @param width The character width used for apply substrings;
- *              Default is 4 characters.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return Minhash values for each string in input
- */
-[[deprecated]] std::unique_ptr<cudf::column> minhash(
-  cudf::strings_column_view const& input,
-  cudf::numeric_scalar<uint32_t> seed = 0,
-  cudf::size_type width               = 4,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr   = cudf::get_current_device_resource_ref());
-
-/**
- * @brief Returns the minhash values for each string per seed
- *
- * Hash values are computed from substrings of each string and the
- * minimum hash value is returned for each string for each seed.
- * Each row of the list column are seed results for the corresponding
- * string. The order of the elements in each row match the order of
- * the seeds provided in the `seeds` parameter.
- *
- * This function uses MurmurHash3_x86_32 for the hash algorithm.
- *
- * Any null row entries result in corresponding null output rows.
- *
- * @deprecated Deprecated in 24.12 - to be replaced in a future release
- *
- * @throw std::invalid_argument if the width < 2
- * @throw std::invalid_argument if seeds is empty
- * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
- *
- * @param input Strings column to compute minhash
- * @param seeds Seed values used for the hash algorithm
- * @param width The character width used for apply substrings;
- *              Default is 4 characters.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return List column of minhash values for each string per seed
- */
-[[deprecated]] std::unique_ptr<cudf::column> minhash(
-  cudf::strings_column_view const& input,
-  cudf::device_span<uint32_t const> seeds,
-  cudf::size_type width             = 4,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
-
 /**
  * @brief Returns the minhash values for each string
  *
@@ -132,7 +69,7 @@ namespace CUDF_EXPORT nvtext {
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return List column of minhash values for each string per seed
  */
-std::unique_ptr<cudf::column> minhash_permuted(
+std::unique_ptr<cudf::column> minhash(
   cudf::strings_column_view const& input,
   uint32_t seed,
   cudf::device_span<uint32_t const> parameter_a,
@@ -142,67 +79,16 @@ std::unique_ptr<cudf::column> minhash_permuted(
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
 /**
- * @brief Returns the minhash value for each string
- *
- * Hash values are computed from substrings of each string and the
- * minimum hash value is returned for each string.
- *
- * Any null row entries result in corresponding null output rows.
- *
- * This function uses MurmurHash3_x64_128 for the hash algorithm.
- * The hash function returns 2 uint64 values but only the first value
- * is used with the minhash calculation.
- *
- * @deprecated Deprecated in 24.12
- *
- * @throw std::invalid_argument if the width < 2
- *
- * @param input Strings column to compute minhash
- * @param seed  Seed value used for the hash algorithm
- * @param width The character width used for apply substrings;
- *              Default is 4 characters.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return Minhash values as UINT64 for each string in input
- */
-[[deprecated]] std::unique_ptr<cudf::column> minhash64(
-  cudf::strings_column_view const& input,
-  cudf::numeric_scalar<uint64_t> seed = 0,
-  cudf::size_type width               = 4,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr   = cudf::get_current_device_resource_ref());
-
-/**
- * @brief Returns the minhash values for each string per seed
- *
- * Hash values are computed from substrings of each string and the
- * minimum hash value is returned for each string for each seed.
- * Each row of the list column are seed results for the corresponding
- * string. The order of the elements in each row match the order of
- * the seeds provided in the `seeds` parameter.
- *
- * This function uses MurmurHash3_x64_128 for the hash algorithm.
+ * @copydoc nvtext::minhash
  *
- * Any null row entries result in corresponding null output rows.
- *
- * @deprecated Deprecated in 24.12 - to be replaced in a future release
- *
- * @throw std::invalid_argument if the width < 2
- * @throw std::invalid_argument if seeds is empty
- * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
- *
- * @param input Strings column to compute minhash
- * @param seeds Seed values used for the hash algorithm
- * @param width The character width used for apply substrings;
- *              Default is 4 characters.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return List column of minhash values for each string per seed
+ * @deprecated Use nvtext::minhash()
  */
-[[deprecated]] std::unique_ptr<cudf::column> minhash64(
+[[deprecated]] std::unique_ptr<cudf::column> minhash_permuted(
   cudf::strings_column_view const& input,
-  cudf::device_span<uint64_t const> seeds,
-  cudf::size_type width             = 4,
+  uint32_t seed,
+  cudf::device_span<uint32_t const> parameter_a,
+  cudf::device_span<uint32_t const> parameter_b,
+  cudf::size_type width,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
@@ -244,7 +130,7 @@ std::unique_ptr<cudf::column> minhash_permuted(
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return List column of minhash values for each string per seed
  */
-std::unique_ptr<cudf::column> minhash64_permuted(
+std::unique_ptr<cudf::column> minhash64(
   cudf::strings_column_view const& input,
   uint64_t seed,
   cudf::device_span<uint64_t const> parameter_a,
@@ -254,64 +140,18 @@ std::unique_ptr<cudf::column> minhash64_permuted(
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
 /**
- * @brief Returns the minhash values for each row of strings per seed
- *
- * Hash values are computed from each string in each row and the
- * minimum hash value is returned for each row for each seed.
- * Each row of the output list column are seed results for the corresponding
- * input row. The order of the elements in each row match the order of
- * the seeds provided in the `seeds` parameter.
- *
- * This function uses MurmurHash3_x86_32 for the hash algorithm.
- *
- * Any null row entries result in corresponding null output rows.
+ * @copydoc nvtext::minhash64
  *
- * @deprecated Deprecated in 24.12
- *
- * @throw std::invalid_argument if seeds is empty
- * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
- *
- * @param input Lists column of strings to compute minhash
- * @param seeds Seed values used for the hash algorithm
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return List column of minhash values for each string per seed
+ * @deprecated Use nvtext::minhash64()
  */
-[[deprecated]] std::unique_ptr<cudf::column> word_minhash(
-  cudf::lists_column_view const& input,
-  cudf::device_span<uint32_t const> seeds,
+[[deprecated]] std::unique_ptr<cudf::column> minhash64_permuted(
+  cudf::strings_column_view const& input,
+  uint64_t seed,
+  cudf::device_span<uint64_t const> parameter_a,
+  cudf::device_span<uint64_t const> parameter_b,
+  cudf::size_type width,
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
-/**
- * @brief Returns the minhash values for each row of strings per seed
- *
- * Hash values are computed from each string in each row and the
- * minimum hash value is returned for each row for each seed.
- * Each row of the output list column are seed results for the corresponding
- * input row. The order of the elements in each row match the order of
- * the seeds provided in the `seeds` parameter.
- *
- * This function uses MurmurHash3_x64_128 for the hash algorithm though
- * only the first 64-bits of the hash are used in computing the output.
- *
- * Any null row entries result in corresponding null output rows.
- *
- * @deprecated Deprecated in 24.12
- *
- * @throw std::invalid_argument if seeds is empty
- * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
- *
- * @param input Lists column of strings to compute minhash
- * @param seeds Seed values used for the hash algorithm
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return List column of minhash values for each string per seed
- */
-[[deprecated]] std::unique_ptr<cudf::column> word_minhash64(
-  cudf::lists_column_view const& input,
-  cudf::device_span<uint64_t const> seeds,
-  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
-  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 /** @} */  // end of group
 }  // namespace CUDF_EXPORT nvtext