Skip to content

Commit

Permalink
Merge branch 'branch-24.08' of github.com:rapidsai/cudf into polars-s…
Browse files Browse the repository at this point in the history
…tr-replace
  • Loading branch information
lithomas1 committed Jul 5, 2024
2 parents 46f0f35 + a583c97 commit 6e084bc
Show file tree
Hide file tree
Showing 126 changed files with 3,379 additions and 701 deletions.
16 changes: 13 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ jobs:
- docs-build
- wheel-build-cudf
- wheel-tests-cudf
- test-cudf-polars
- wheel-build-cudf-polars
- wheel-tests-cudf-polars
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- devcontainer
Expand Down Expand Up @@ -133,17 +134,26 @@ jobs:
with:
build_type: pull-request
script: ci/test_wheel_cudf.sh
test-cudf-polars:
wheel-build-cudf-polars:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: "ci/build_wheel_cudf_polars.sh"
wheel-tests-cudf-polars:
needs: wheel-build-cudf-polars
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
# This always runs, but only fails if this PR touches code in
# pylibcudf or cudf_polars
script: "ci/test_cudf_polars.sh"
script: "ci/test_wheel_cudf_polars.sh"
wheel-build-dask-cudf:
needs: wheel-build-cudf
secrets: inherit
Expand Down
13 changes: 7 additions & 6 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,6 @@ repos:
.*test.*|
^CHANGELOG.md$
)
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
hooks:
Expand All @@ -149,7 +144,7 @@ repos:
- id: ruff-format
files: python/.*$
- repo: https://github.com/rapidsai/pre-commit-hooks
rev: v0.0.3
rev: v0.2.0
hooks:
- id: verify-copyright
exclude: |
Expand All @@ -158,6 +153,12 @@ repos:
cpp/src/io/parquet/ipc/Message_generated[.]h$|
cpp/src/io/parquet/ipc/Schema_generated[.]h$
)
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.13.11
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]

default_language_version:
python: python3
9 changes: 8 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)

VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats"
VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings"
HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"<args>\\\"]
clean - remove all existing build artifacts and configuration (start
over)
Expand All @@ -39,6 +39,7 @@ HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [li
--opensource_nvcomp - disable use of proprietary nvcomp extensions
--show_depr_warn - show cmake deprecation warnings
--ptds - enable per-thread default stream
--disable_large_strings - disable large strings support
--build_metrics - generate build metrics report for libcudf
--incl_cache_stats - include cache statistics in build metrics report
--cmake-args=\\\"<args>\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
Expand Down Expand Up @@ -69,6 +70,7 @@ BUILD_DISABLE_DEPRECATION_WARNINGS=ON
BUILD_PER_THREAD_DEFAULT_STREAM=OFF
BUILD_REPORT_METRICS=OFF
BUILD_REPORT_INCL_CACHE_STATS=OFF
BUILD_DISABLE_LARGE_STRINGS=OFF
USE_PROPRIETARY_NVCOMP=ON
PYTHON_ARGS_FOR_INSTALL="-m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true"

Expand Down Expand Up @@ -153,6 +155,7 @@ function buildLibCudfJniInDocker {
-DCUDF_ENABLE_ARROW_S3=OFF \
-DBUILD_TESTS=OFF \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON \
-DCUDF_LARGE_STRINGS_DISABLED=ON \
-DRMM_LOGGING_LEVEL=OFF \
-DBUILD_SHARED_LIBS=OFF && \
cmake --build . --parallel ${PARALLEL_LEVEL} && \
Expand Down Expand Up @@ -239,6 +242,9 @@ if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON"
fi

if hasArg --disable_large_strings; then
BUILD_DISABLE_LARGE_STRINGS="ON"
fi

# If clean given, run it prior to any other steps
if hasArg clean; then
Expand Down Expand Up @@ -292,6 +298,7 @@ if buildAll || hasArg libcudf; then
-DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
-DDISABLE_DEPRECATION_WARNINGS=${BUILD_DISABLE_DEPRECATION_WARNINGS} \
-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \
-DCUDF_LARGE_STRINGS_DISABLED=${BUILD_DISABLE_LARGE_STRINGS} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
${EXTRA_CMAKE_ARGS}

Expand Down
11 changes: 11 additions & 0 deletions ci/build_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

set -euo pipefail

package_dir="python/cudf_polars"

./ci/build_wheel.sh ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
11 changes: 11 additions & 0 deletions ci/run_cudf_polars_pytests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

# It is essential to cd into python/cudf_polars as `pytest-xdist` + `coverage` seem to work only at this directory level.

# Support invoking run_cudf_polars_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/

python -m pytest --cache-clear "$@" tests
3 changes: 3 additions & 0 deletions ci/test_java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ EXITCODE=0
trap "EXITCODE=1" ERR
set +e

# disable large strings
export LIBCUDF_LARGE_STRINGS_ENABLED=0

rapids-logger "Run Java tests"
pushd java
mvn test -B -DCUDF_JNI_ENABLE_PROFILING=OFF
Expand Down
25 changes: 8 additions & 17 deletions ci/test_cudf_polars.sh → ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,14 @@ else
fi

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist

RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"

rapids-logger "Install cudf wheel"
# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/cudf*.whl)[test]

rapids-logger "Install polars (allow pre-release versions)"
python -m pip install 'polars>=1.0.0a0'
# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install ./local-cudf-dep/cudf*.whl

rapids-logger "Install cudf_polars"
python -m pip install --no-deps python/cudf_polars
python -m pip install $(echo ./dist/cudf_polars*.whl)[test]

rapids-logger "Run cudf_polars tests"

Expand All @@ -44,13 +37,11 @@ EXITCODE=0
trap set_exitcode ERR
set +e

python -m pytest \
--cache-clear \
./ci/run_cudf_polars_pytests.sh \
--cov cudf_polars \
--cov-fail-under=100 \
--cov-config=python/cudf_polars/pyproject.toml \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf_polars.xml" \
python/cudf_polars/tests
--cov-config=./pyproject.toml \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml"

trap ERR
set -e
Expand Down
2 changes: 1 addition & 1 deletion ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="

# Download the cudf built in the previous step
RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install --no-deps ./local-cudf-dep/cudf*.whl
python -m pip install ./local-cudf-dep/cudf*.whl

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/dask_cudf*.whl)[test]
Expand Down
5 changes: 2 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.3
- dask-cuda==24.8.*
- dask-cuda==24.8.*,>=0.0.0a0
- dlpack>=0.8,<1.0
- doxygen=1.9.1
Expand All @@ -44,10 +43,10 @@ dependencies:
- libcufile=1.4.0.31
- libcurand-dev=10.3.0.86
- libcurand=10.3.0.86
- libkvikio==24.8.*
- libkvikio==24.8.*,>=0.0.0a0
- libparquet==16.1.0.*
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.8.*
- librmm==24.8.*,>=0.0.0a0
- make
- moto>=4.0.8
- msgpack-python
Expand Down
7 changes: 3 additions & 4 deletions conda/environments/all_cuda-122_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.3
- dask-cuda==24.8.*
- dask-cuda==24.8.*,>=0.0.0a0
- dlpack>=0.8,<1.0
- doxygen=1.9.1
Expand All @@ -43,10 +42,10 @@ dependencies:
- libarrow==16.1.0.*
- libcufile-dev
- libcurand-dev
- libkvikio==24.8.*
- libkvikio==24.8.*,>=0.0.0a0
- libparquet==16.1.0.*
- librdkafka>=1.9.0,<1.10.0a0
- librmm==24.8.*
- librmm==24.8.*,>=0.0.0a0
- make
- moto>=4.0.8
- msgpack-python
Expand All @@ -66,7 +65,7 @@ dependencies:
- pre-commit
- pyarrow==16.1.0.*
- pydata-sphinx-theme!=0.14.2
- pynvjitlink
- pynvjitlink>=0.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
8 changes: 8 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON)
mark_as_advanced(CUDF_BUILD_TESTUTIL)
option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF)
mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED)
option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF)
Expand Down Expand Up @@ -365,6 +367,7 @@ add_library(
src/interop/to_arrow_device.cu
src/interop/from_arrow_device.cu
src/interop/from_arrow_host.cu
src/interop/from_arrow_stream.cu
src/interop/to_arrow_schema.cpp
src/interop/detail/arrow_allocator.cpp
src/io/avro/avro.cpp
Expand Down Expand Up @@ -782,6 +785,11 @@ if(NOT USE_NVTX)
target_compile_definitions(cudf PUBLIC NVTX_DISABLE)
endif()

# Disable large strings support
if(CUDF_LARGE_STRINGS_DISABLED)
target_compile_definitions(cudf PRIVATE CUDF_LARGE_STRINGS_DISABLED)
endif()

# Define RMM logging level
target_compile_definitions(cudf PRIVATE "RMM_LOGGING_LEVEL=LIBCUDF_LOGGING_LEVEL")

Expand Down
9 changes: 7 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ target_include_directories(

# Use an OBJECT library so we only compile these helper source files only once
add_library(
cudf_benchmark_common OBJECT "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
synchronization/synchronization.cpp io/cuio_common.cpp
cudf_benchmark_common OBJECT
"${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
synchronization/synchronization.cpp
io/cuio_common.cpp
common/table_utilities.cpp
common/benchmark_utilities.cpp
common/nvbench_utilities.cpp
)
target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env>)
add_custom_command(
Expand Down
27 changes: 27 additions & 0 deletions cpp/benchmarks/common/benchmark_utilities.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "benchmark_utilities.hpp"

void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration)
{
state.SetItemsProcessed(state.iterations() * items_processed_per_iteration);
}

void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration)
{
state.SetBytesProcessed(state.iterations() * bytes_processed_per_iteration);
}
41 changes: 41 additions & 0 deletions cpp/benchmarks/common/benchmark_utilities.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <benchmark/benchmark.h>

/**
* @brief Sets the number of items processed during the benchmark.
*
* This function could be used instead of ::benchmark::State.SetItemsProcessed()
* to avoid repeatedly computing ::benchmark::State.iterations() * items_processed_per_iteration.
*
* @param state the benchmark state
* @param items_processed_per_iteration number of items processed per iteration
*/
void set_items_processed(::benchmark::State& state, int64_t items_processed_per_iteration);

/**
* @brief Sets the number of bytes processed during the benchmark.
*
* This function could be used instead of ::benchmark::State.SetItemsProcessed()
* to avoid repeatedly computing ::benchmark::State.iterations() * bytes_processed_per_iteration.
*
* @param state the benchmark state
* @param bytes_processed_per_iteration number of bytes processed per iteration
*/
void set_bytes_processed(::benchmark::State& state, int64_t bytes_processed_per_iteration);
Loading

0 comments on commit 6e084bc

Please sign in to comment.