Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into perf-minhash-highmem
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Oct 10, 2024
2 parents f747163 + 31423d0 commit 72acce3
Show file tree
Hide file tree
Showing 231 changed files with 3,754 additions and 1,887 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/pr_issue_status_automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,33 @@ jobs:
UPDATE_ITEM: true
UPDATE_LINKED_ISSUES: true
secrets: inherit

process-branch-name:
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: get-project-id
runs-on: ubuntu-latest
outputs:
branch-name: ${{ steps.process-branch-name.outputs.branch-name }}
steps:
- name: Extract branch name
id: process-branch-name
run: |
branch=${{ github.event.pull_request.base.ref }}
release=${branch#branch-}
echo "branch-name=$release" >> "$GITHUB_OUTPUT"
update-release:
# This job sets the PR and its linked issues to the release they are targeting
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: [get-project-id, process-branch-name]
with:
PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"
SINGLE_SELECT_FIELD_ID: "PVTSSF_lADOAp2shc4AiNzlzgg52UQ"
SINGLE_SELECT_FIELD_NAME: "Release"
SINGLE_SELECT_OPTION_VALUE: "${{ needs.process-branch-name.outputs.branch-name }}"
ITEM_PROJECT_ID: "${{ needs.get-project-id.outputs.ITEM_PROJECT_ID }}"
ITEM_NODE_ID: "${{ github.event.pull_request.node_id }}"
UPDATE_ITEM: true
UPDATE_LINKED_ISSUES: true
secrets: inherit
296 changes: 296 additions & 0 deletions CHANGELOG.md

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

set -euo pipefail

export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"

rapids-logger "Create test conda environment"
Expand All @@ -29,7 +28,10 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf pylibcudf cudf dask-cudf
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"pylibcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}"

export RAPIDS_DOCS_DIR="$(mktemp -d)"

Expand Down
7 changes: 6 additions & 1 deletion ci/test_cpp_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate C++ testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -31,7 +33,10 @@ rapids-print-env

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf libcudf_kafka libcudf-tests libcudf-example
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-tests=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-example=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
4 changes: 3 additions & 1 deletion ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate Java testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -30,7 +32,7 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
5 changes: 4 additions & 1 deletion ci/test_notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate notebook testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -30,7 +32,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
cudf libcudf
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"

NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
pushd notebooks
Expand Down
5 changes: 4 additions & 1 deletion ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate Python testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand Down Expand Up @@ -38,4 +40,5 @@ rapids-print-env
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
cudf libcudf
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
2 changes: 1 addition & 1 deletion ci/test_python_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ source ./ci/test_python_common.sh test_python_cudf

rapids-logger "Check GPU usage"
nvidia-smi

rapids-print-env
EXITCODE=0
trap "EXITCODE=1" ERR
set +e
Expand Down
6 changes: 5 additions & 1 deletion ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh test_python_other

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
dask-cudf cudf_kafka custreamz
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"custreamz=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ dependencies:
- openpyxl
- packaging
- pandas
- pandas>=2.0,<2.2.3dev0
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.8,<1.9
- pre-commit
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ dependencies:
- openpyxl
- packaging
- pandas
- pandas>=2.0,<2.2.3dev0
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.8,<1.9
- pre-commit
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ requirements:
run:
- python
- typing_extensions >=4.0.0
- pandas >=2.0,<2.2.3dev0
- pandas >=2.0,<2.2.4dev0
- cupy >=12.0.0
- numba-cuda >=0.0.13
- numpy >=1.23,<3.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/pylibcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ requirements:
run:
- python
- typing_extensions >=4.0.0
- pandas >=2.0,<2.2.3dev0
- pandas >=2.0,<2.2.4dev0
- numpy >=1.23,<3.0a0
- pyarrow>=14.0.0,<18.0.0a0
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
43 changes: 36 additions & 7 deletions cpp/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,18 +1,47 @@
---
# Notes on disabled checks
# ------------------------
# modernize-use-equals-default:
# auto-fix is broken (doesn't insert =default correctly)
# modernize-concat-nested-namespaces:
# auto-fix is broken (can delete code)
# modernize-use-trailing-return-type:
# Purely stylistic, no benefit to rewriting everything
# modernize-return-braced-init-list:
# Stylistically we prefer to see the return type at the return site.
# See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
# for more information.
# modernize-use-bool-literals:
# Our tests use int flags for validity masks extensively and we prefer that
# clang-analyzer-cplusplus.NewDeleteLeaks:
# This check has numerous bugs, see
# https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
# We encounter at least
# https://github.com/llvm/llvm-project/issues/60896
# https://github.com/llvm/llvm-project/issues/69602
# clang-analyzer-optin.core.EnumCastOutOfRange
# We use enums as flags in multiple cases and this check makes ORing flags invalid
# clang-analyzer-optin.cplusplus.UninitializedObject'
# There is an error in nanoarrow that none of the clang-tidy filters (i.e.
# header-filter and exclude-header-filter are able to properly avoid. This
# merits further investigation
#
# We need to verify that broken checks are still broken
Checks:
'modernize-*,
-modernize-use-equals-default,
-modernize-concat-nested-namespaces,
-modernize-use-trailing-return-type,
-modernize-use-bool-literals'

# -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly)
# -modernize-concat-nested-namespaces # auto-fix is broken (can delete code)
# -modernize-use-trailing-return-type # just a preference
-modernize-return-braced-init-list,
-modernize-use-bool-literals,
clang-analyzer-*,
-clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
CheckOptions:
- key: modernize-loop-convert.MaxCopySize
Expand Down
14 changes: 10 additions & 4 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ ConfigureNVBench(
REDUCTION_NVBENCH
reduction/anyall.cpp
reduction/dictionary.cpp
reduction/histogram.cpp
reduction/minmax.cpp
reduction/rank.cpp
reduction/reduce.cpp
Expand All @@ -270,8 +271,13 @@ ConfigureBench(
)

ConfigureNVBench(
GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_max_multithreaded.cpp
groupby/group_nunique.cpp groupby/group_rank.cpp groupby/group_struct_keys.cpp
GROUPBY_NVBENCH
groupby/group_histogram.cpp
groupby/group_max.cpp
groupby/group_max_multithreaded.cpp
groupby/group_nunique.cpp
groupby/group_rank.cpp
groupby/group_struct_keys.cpp
)

# ##################################################################################################
Expand Down Expand Up @@ -330,11 +336,11 @@ ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp)

# ##################################################################################################
# * ast benchmark ---------------------------------------------------------------------------------
ConfigureBench(AST_BENCH ast/transform.cpp)
ConfigureNVBench(AST_NVBENCH ast/transform.cpp)

# ##################################################################################################
# * binaryop benchmark ----------------------------------------------------------------------------
ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp)
ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp)

# ##################################################################################################
# * nvtext benchmark -------------------------------------------------------------------
Expand Down
51 changes: 17 additions & 34 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,14 +15,16 @@
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/transform.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <nvbench/nvbench.cuh>

#include <algorithm>
#include <list>
#include <memory>
Expand All @@ -35,13 +37,10 @@ enum class TreeType {
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
class AST : public cudf::benchmark {};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(benchmark::State& state)
static void BM_ast_transform(nvbench::state& state)
{
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
Expand Down Expand Up @@ -86,38 +85,22 @@ static void BM_ast_transform(benchmark::State& state)

auto const& expression_tree_root = expressions.back();

// Execute benchmark
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf::compute_column(table, expression_tree_root);
}

// Use the number of bytes read from global memory
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) *
(tree_levels + 1) * sizeof(key_type));
}
state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));

static void CustomRanges(benchmark::internal::Benchmark* b)
{
auto row_counts = std::vector<cudf::size_type>{100'000, 1'000'000, 10'000'000, 100'000'000};
auto operation_counts = std::vector<cudf::size_type>{1, 5, 10};
for (auto const& row_count : row_counts) {
for (auto const& operation_count : operation_counts) {
b->Args({row_count, operation_count});
}
}
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
}

#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable) \
(::benchmark::State & st) \
static void name(::nvbench::state& st) \
{ \
BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st); \
::BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st); \
} \
BENCHMARK_REGISTER_F(AST, name) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("tree_levels", {1, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})

AST_TRANSFORM_BENCHMARK_DEFINE(
ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);
Expand Down
Loading

0 comments on commit 72acce3

Please sign in to comment.