Skip to content

Commit

Permalink
Merge branch 'perf-minhash-highmem' of github.com:davidwendt/cudf int…
Browse files Browse the repository at this point in the history
…o perf-minhash-highmem
  • Loading branch information
davidwendt committed Oct 11, 2024
2 parents b1363ee + aa6f3e0 commit 660641e
Show file tree
Hide file tree
Showing 186 changed files with 2,910 additions and 1,389 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/pr_issue_status_automation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,33 @@ jobs:
UPDATE_ITEM: true
UPDATE_LINKED_ISSUES: true
secrets: inherit

process-branch-name:
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: get-project-id
runs-on: ubuntu-latest
outputs:
branch-name: ${{ steps.process-branch-name.outputs.branch-name }}
steps:
- name: Extract branch name
id: process-branch-name
run: |
branch=${{ github.event.pull_request.base.ref }}
release=${branch#branch-}
echo "branch-name=$release" >> "$GITHUB_OUTPUT"
update-release:
# This job sets the PR and its linked issues to the release they are targeting
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
needs: [get-project-id, process-branch-name]
with:
PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"
SINGLE_SELECT_FIELD_ID: "PVTSSF_lADOAp2shc4AiNzlzgg52UQ"
SINGLE_SELECT_FIELD_NAME: "Release"
SINGLE_SELECT_OPTION_VALUE: "${{ needs.process-branch-name.outputs.branch-name }}"
ITEM_PROJECT_ID: "${{ needs.get-project-id.outputs.ITEM_PROJECT_ID }}"
ITEM_NODE_ID: "${{ github.event.pull_request.node_id }}"
UPDATE_ITEM: true
UPDATE_LINKED_ISSUES: true
secrets: inherit
296 changes: 296 additions & 0 deletions CHANGELOG.md

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

set -euo pipefail

export RAPIDS_VERSION="$(rapids-version)"
export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"

rapids-logger "Create test conda environment"
Expand All @@ -29,7 +28,10 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
libcudf pylibcudf cudf dask-cudf
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"pylibcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}"

export RAPIDS_DOCS_DIR="$(mktemp -d)"

Expand Down
7 changes: 6 additions & 1 deletion ci/test_cpp_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate C++ testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -31,7 +33,10 @@ rapids-print-env

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf libcudf_kafka libcudf-tests libcudf-example
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-tests=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf-example=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
4 changes: 3 additions & 1 deletion ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate Java testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -30,7 +32,7 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcudf
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
5 changes: 4 additions & 1 deletion ci/test_notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate notebook testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand All @@ -30,7 +32,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
cudf libcudf
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"

NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
pushd notebooks
Expand Down
5 changes: 4 additions & 1 deletion ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-logger "Generate Python testing dependencies"

ENV_YAML_DIR="$(mktemp -d)"
Expand Down Expand Up @@ -38,4 +40,5 @@ rapids-print-env
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
cudf libcudf
"cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
6 changes: 5 additions & 1 deletion ci/test_python_other.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
# Common setup steps shared by Python test jobs
source ./ci/test_python_common.sh test_python_other

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
dask-cudf cudf_kafka custreamz
"dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
"cudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
"custreamz=${RAPIDS_VERSION_MAJOR_MINOR}"

rapids-logger "Check GPU usage"
nvidia-smi
Expand Down
43 changes: 36 additions & 7 deletions cpp/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,18 +1,47 @@
---
# Notes on disabled checks
# ------------------------
# modernize-use-equals-default:
# auto-fix is broken (doesn't insert =default correctly)
# modernize-concat-nested-namespaces:
# auto-fix is broken (can delete code)
# modernize-use-trailing-return-type:
# Purely stylistic, no benefit to rewriting everything
# modernize-return-braced-init-list:
# Stylistically we prefer to see the return type at the return site.
# See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
# for more information.
# modernize-use-bool-literals:
# Our tests use int flags for validity masks extensively and we prefer that
# clang-analyzer-cplusplus.NewDeleteLeaks:
# This check has numerous bugs, see
# https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
# We encounter at least
# https://github.com/llvm/llvm-project/issues/60896
# https://github.com/llvm/llvm-project/issues/69602
# clang-analyzer-optin.core.EnumCastOutOfRange
# We use enums as flags in multiple cases and this check makes ORing flags invalid
# clang-analyzer-optin.cplusplus.UninitializedObject'
# There is an error in nanoarrow that none of the clang-tidy filters (i.e.
# header-filter and exclude-header-filter are able to properly avoid. This
# merits further investigation
#
# We need to verify that broken checks are still broken
Checks:
'modernize-*,
-modernize-use-equals-default,
-modernize-concat-nested-namespaces,
-modernize-use-trailing-return-type,
-modernize-use-bool-literals'

# -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly)
# -modernize-concat-nested-namespaces # auto-fix is broken (can delete code)
# -modernize-use-trailing-return-type # just a preference
-modernize-return-braced-init-list,
-modernize-use-bool-literals,
clang-analyzer-*,
-clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
CheckOptions:
- key: modernize-loop-convert.MaxCopySize
Expand Down
10 changes: 8 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ ConfigureNVBench(
REDUCTION_NVBENCH
reduction/anyall.cpp
reduction/dictionary.cpp
reduction/histogram.cpp
reduction/minmax.cpp
reduction/rank.cpp
reduction/reduce.cpp
Expand All @@ -270,8 +271,13 @@ ConfigureBench(
)

ConfigureNVBench(
GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_max_multithreaded.cpp
groupby/group_nunique.cpp groupby/group_rank.cpp groupby/group_struct_keys.cpp
GROUPBY_NVBENCH
groupby/group_histogram.cpp
groupby/group_max.cpp
groupby/group_max_multithreaded.cpp
groupby/group_nunique.cpp
groupby/group_rank.cpp
groupby/group_struct_keys.cpp
)

# ##################################################################################################
Expand Down
90 changes: 90 additions & 0 deletions cpp/benchmarks/groupby/group_histogram.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>

#include <cudf/groupby.hpp>

#include <nvbench/nvbench.cuh>

template <typename Type>
void groupby_histogram_helper(nvbench::state& state,
cudf::size_type num_rows,
cudf::size_type cardinality,
double null_probability)
{
auto const keys = [&] {
data_profile const profile =
data_profile_builder()
.cardinality(cardinality)
.no_validity()
.distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
return create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
}();

auto const values = [&] {
auto builder = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
if (null_probability > 0) {
builder.null_probability(null_probability);
} else {
builder.no_validity();
}
return create_random_column(
cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
}();

// Vector of 1 request
std::vector<cudf::groupby::aggregation_request> requests(1);
requests.back().values = values->view();
requests.back().aggregations.push_back(
cudf::make_histogram_aggregation<cudf::groupby_aggregation>());

auto const mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys->view()}));
auto const result = gb_obj.aggregate(requests);
});

auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(num_rows) / elapsed_time, "rows/s");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

template <typename Type>
void bench_groupby_histogram(nvbench::state& state, nvbench::type_list<Type>)
{
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const null_probability = state.get_float64("null_probability");

if (cardinality > num_rows) {
state.skip("cardinality > num_rows");
return;
}

groupby_histogram_helper<Type>(state, num_rows, cardinality, null_probability);
}

NVBENCH_BENCH_TYPES(bench_groupby_histogram,
NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, int64_t, float, double>))
.set_name("groupby_histogram")
.add_float64_axis("null_probability", {0, 0.1, 0.9})
.add_int64_axis("cardinality", {100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000})
.add_int64_axis("num_rows", {100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000});
68 changes: 68 additions & 0 deletions cpp/benchmarks/reduction/histogram.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cudf/aggregation.hpp"
#include "cudf/detail/aggregation/aggregation.hpp"

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/common/nvbench_utilities.hpp>
#include <benchmarks/common/table_utilities.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/reduction.hpp>
#include <cudf/reduction/detail/histogram.hpp>
#include <cudf/types.hpp>

#include <nvbench/nvbench.cuh>

template <typename type>
static void nvbench_reduction_histogram(nvbench::state& state, nvbench::type_list<type>)
{
auto const dtype = cudf::type_to_id<type>();

auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const null_probability = state.get_float64("null_probability");

if (cardinality > num_rows) {
state.skip("cardinality > num_rows");
return;
}

data_profile const profile = data_profile_builder()
.null_probability(null_probability)
.cardinality(cardinality)
.distribution(dtype, distribution_id::UNIFORM, 0, num_rows);

auto const input = create_random_column(dtype, row_count{num_rows}, profile);
auto agg = cudf::make_histogram_aggregation<cudf::reduce_aggregation>();
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
auto result = cudf::reduce(*input, *agg, input->type(), stream_view);
});

state.add_element_count(input->size());
}

using data_type = nvbench::type_list<int32_t, int64_t>;

NVBENCH_BENCH_TYPES(nvbench_reduction_histogram, NVBENCH_TYPE_AXES(data_type))
.set_name("histogram")
.add_float64_axis("null_probability", {0.1})
.add_int64_axis("cardinality",
{0, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000, 50'000'000})
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000});
Loading

0 comments on commit 660641e

Please sign in to comment.