Merge remote-tracking branch 'upstream/branch-24.12' into pylibcudf/w…

…rappers/dt
rapidsai · Oct 10, 2024 · 91782a0 · 91782a0
2 parents 5c2fac8 + 3791c8a
commit 91782a0
Show file tree

Hide file tree

Showing 91 changed files with 1,755 additions and 988 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
@@ -3,8 +3,7 @@
 
 set -euo pipefail
 
-export RAPIDS_VERSION="$(rapids-version)"
-export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
 export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"
 
 rapids-logger "Create test conda environment"
@@ -29,7 +28,10 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  libcudf pylibcudf cudf dask-cudf
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "pylibcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 

diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate C++ testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -31,7 +33,10 @@ rapids-print-env
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
-  libcudf libcudf_kafka libcudf-tests libcudf-example
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf-tests=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf-example=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi
diff --git a/ci/test_java.sh b/ci/test_java.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate Java testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -30,7 +32,7 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
-  libcudf
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi

diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate notebook testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -30,7 +32,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  cudf libcudf
+  "cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
 pushd notebooks

diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
@@ -7,6 +7,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate Python testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -38,4 +40,5 @@ rapids-print-env
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  cudf libcudf
+  "cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
@@ -7,10 +7,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 # Common setup steps shared by Python test jobs
 source ./ci/test_python_common.sh test_python_other
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  dask-cudf cudf_kafka custreamz
+  "dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "cudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "custreamz=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -245,6 +245,7 @@ ConfigureNVBench(
   REDUCTION_NVBENCH
   reduction/anyall.cpp
   reduction/dictionary.cpp
+  reduction/histogram.cpp
   reduction/minmax.cpp
   reduction/rank.cpp
   reduction/reduce.cpp
@@ -270,8 +271,13 @@ ConfigureBench(
 )
 
 ConfigureNVBench(
-  GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_max_multithreaded.cpp
-  groupby/group_nunique.cpp groupby/group_rank.cpp groupby/group_struct_keys.cpp
+  GROUPBY_NVBENCH
+  groupby/group_histogram.cpp
+  groupby/group_max.cpp
+  groupby/group_max_multithreaded.cpp
+  groupby/group_nunique.cpp
+  groupby/group_rank.cpp
+  groupby/group_struct_keys.cpp
 )
 
 # ##################################################################################################

diff --git a/cpp/benchmarks/groupby/group_histogram.cpp b/cpp/benchmarks/groupby/group_histogram.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+
+#include <cudf/groupby.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename Type>
+void groupby_histogram_helper(nvbench::state& state,
+                              cudf::size_type num_rows,
+                              cudf::size_type cardinality,
+                              double null_probability)
+{
+  auto const keys = [&] {
+    data_profile const profile =
+      data_profile_builder()
+        .cardinality(cardinality)
+        .no_validity()
+        .distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
+    return create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
+  }();
+
+  auto const values = [&] {
+    auto builder = data_profile_builder().cardinality(0).distribution(
+      cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
+    if (null_probability > 0) {
+      builder.null_probability(null_probability);
+    } else {
+      builder.no_validity();
+    }
+    return create_random_column(
+      cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
+  }();
+
+  // Vector of 1 request
+  std::vector<cudf::groupby::aggregation_request> requests(1);
+  requests.back().values = values->view();
+  requests.back().aggregations.push_back(
+    cudf::make_histogram_aggregation<cudf::groupby_aggregation>());
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto gb_obj       = cudf::groupby::groupby(cudf::table_view({keys->view()}));
+    auto const result = gb_obj.aggregate(requests);
+  });
+
+  auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / elapsed_time, "rows/s");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+template <typename Type>
+void bench_groupby_histogram(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const cardinality      = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const null_probability = state.get_float64("null_probability");
+
+  if (cardinality > num_rows) {
+    state.skip("cardinality > num_rows");
+    return;
+  }
+
+  groupby_histogram_helper<Type>(state, num_rows, cardinality, null_probability);
+}
+
+NVBENCH_BENCH_TYPES(bench_groupby_histogram,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, int64_t, float, double>))
+  .set_name("groupby_histogram")
+  .add_float64_axis("null_probability", {0, 0.1, 0.9})
+  .add_int64_axis("cardinality", {100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("num_rows", {100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000});
diff --git a/cpp/benchmarks/reduction/histogram.cpp b/cpp/benchmarks/reduction/histogram.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cudf/aggregation.hpp"
+#include "cudf/detail/aggregation/aggregation.hpp"
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
+#include <benchmarks/common/table_utilities.hpp>
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/reduction.hpp>
+#include <cudf/reduction/detail/histogram.hpp>
+#include <cudf/types.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename type>
+static void nvbench_reduction_histogram(nvbench::state& state, nvbench::type_list<type>)
+{
+  auto const dtype = cudf::type_to_id<type>();
+
+  auto const cardinality      = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const null_probability = state.get_float64("null_probability");
+
+  if (cardinality > num_rows) {
+    state.skip("cardinality > num_rows");
+    return;
+  }
+
+  data_profile const profile = data_profile_builder()
+                                 .null_probability(null_probability)
+                                 .cardinality(cardinality)
+                                 .distribution(dtype, distribution_id::UNIFORM, 0, num_rows);
+
+  auto const input = create_random_column(dtype, row_count{num_rows}, profile);
+  auto agg         = cudf::make_histogram_aggregation<cudf::reduce_aggregation>();
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::reduce(*input, *agg, input->type(), stream_view);
+  });
+
+  state.add_element_count(input->size());
+}
+
+using data_type = nvbench::type_list<int32_t, int64_t>;
+
+NVBENCH_BENCH_TYPES(nvbench_reduction_histogram, NVBENCH_TYPE_AXES(data_type))
+  .set_name("histogram")
+  .add_float64_axis("null_probability", {0.1})
+  .add_int64_axis("cardinality",
+                  {0, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000, 50'000'000})
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000});