Skip to content

Commit

Permalink
Merge pull request #14372 from bdice/branch-23.12-merge-23.10
Browse files Browse the repository at this point in the history
Forward-merge branch-23.10 to branch-23.12
  • Loading branch information
raydouglass authored Nov 8, 2023
2 parents d3dcc75 + 8cdedd8 commit a35f90c
Show file tree
Hide file tree
Showing 128 changed files with 9,508 additions and 531 deletions.
22 changes: 22 additions & 0 deletions .github/ISSUE_TEMPLATE/pandas_function_request.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
name: Request a Missing Pandas Function
about: Request GPU support for a function executed on the CPU in pandas accelerator mode.
title: "[FEA]"
labels: "? - Needs Triage, feature request"
assignees: ''

---

This issue template is intended to be used primarily for requests related to pandas accelerator mode. If you'd like to file a general cuDF feature request, please [click here](https://github.com/rapidsai/cudf/issues/new?assignees=&labels=%3F+-+Needs+Triage%2C+feature+request&projects=&template=feature_request.md&title=%5BFEA%5D).


**Missing Pandas Feature Request**
A clear and concise summary of the pandas function(s) you'd like to be able run with cuDF.


**Profiler Output**
If you used the profiler in pandas accelerator mode, please provide the full output of your profiling report.


**Additional context**
Add any other context, code examples, or references to existing implementations about the feature request here.
53 changes: 53 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ jobs:
- wheel-tests-cudf
- wheel-build-dask-cudf
- wheel-tests-dask-cudf
- unit-tests-cudf-pandas
- pandas-tests
#- pandas-tests-diff
#- pandas-tests-diff-comment
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
checks:
Expand Down Expand Up @@ -126,3 +130,52 @@ jobs:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
build_type: pull-request
script: ci/test_wheel_dask_cudf.sh
unit-tests-cudf-pandas:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1")))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
pandas-tests:
# run the Pandas unit tests using PR branch
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
build_type: pull-request
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
#pandas-tests-diff:
# # diff the results of running the Pandas unit tests and publish a job summary
# needs: [pandas-tests-main, pandas-tests-pr]
# secrets: inherit
# # This branch exports a `job_output` output that the downstream job reads.
# uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@wence/fea/custom-job-output
# with:
# node_type: cpu4
# build_type: pull-request
# run_script: ci/cudf_pandas_scripts/pandas-tests/diff.sh
#pandas-tests-diff-comment:
# # Post comment of pass/fail rate on PR
# runs-on: ubuntu-latest
# needs: pandas-tests-diff
# steps:
# - uses: actions/github-script@v6
# with:
# script: |
# const branch = process.env.GITHUB_REF_NAME;
# const prBranchPattern = new RegExp("^pull-request/[0-9]+$");
# if (!branch.match(prBranchPattern)) {
# throw new Error(`${branch} does not match PR branch pattern.`);
# }
# const summary_url = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
# const prNumber = branch.split("/")[1];
# const summary_comment = `${{ needs.pandas-tests-diff.outputs.job_output }}`;
# github.rest.issues.createComment({
# issue_number: prNumber,
# owner: context.repo.owner,
# repo: context.repo.repo,
# body: `${summary_comment}\n\nHere is [a link to the full test summary](${summary_url}).\n`
# })
22 changes: 22 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,25 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/test_wheel_dask_cudf.sh
unit-tests-cudf-pandas:
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/cudf_pandas_scripts/run_tests.sh
pandas-tests:
# run the Pandas unit tests
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
# pr mode uses the HEAD of the branch, which is also correct for nightlies
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ dask-worker-space/

# Sphinx docs & build artifacts
docs/cudf/source/api_docs/generated/*
docs/cudf/source/api_docs/api/*
docs/cudf/source/user_guide/api_docs/api/*
docs/cudf/source/user_guide/example_output/*
docs/cudf/source/user_guide/cudf.*Dtype.*.rst
_html
Expand Down
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ repos:
# https://github.com/PyCQA/pydocstyle/issues/603
additional_dependencies: [toml]
args: ["--config=pyproject.toml"]
exclude: |
(?x)^(
^python/cudf/cudf/pandas/scripts/.*|
^python/cudf/cudf_pandas_tests/.*
)
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.6.3
hooks:
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
# Patch project metadata files to include the CUDA version suffix and version override.
pyproject_file="${package_dir}/pyproject.toml"

sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
sed -i "s/^name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
echo "${version}" > VERSION
sed -i "/^__git_commit__/ s/= .*/= \"${commit}\"/g" "${package_dir}/${package_name}/_version.py"

Expand Down
1 change: 1 addition & 0 deletions ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"

python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*


RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${AUDITWHEEL_POLICY}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist
24 changes: 24 additions & 0 deletions ci/cudf_pandas_scripts/pandas-tests/diff.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Download the summarized results of running the Pandas tests on both the main
# branch and the PR branch:

# Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir
MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.pr-results.json
aws s3 cp $MAIN_ARTIFACT main-results.json
aws s3 cp $PR_ARTIFACT pr-results.json

# Compute the diff and prepare job summary:
python -m pip install pandas tabulate
python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY"

COMMENT=$(head -1 summary.txt)

echo "$COMMENT"

# Magic name that the custom-job.yaml workflow reads and re-exports
echo "job_output=${COMMENT}" >> "${GITHUB_OUTPUT}"
100 changes: 100 additions & 0 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import json
import sys

import pandas as pd


def get_total_and_passed(results):
total_failed = 0
total_errored = 0
total_passed = 0
for module_name, row in results.items():
total_failed += row.get("failed", 0)
total_errored += row.get("errored", 0)
total_passed += row.get("passed", 0)
total_tests = total_failed + total_errored + total_passed
return total_tests, total_passed


main_json = sys.argv[1]
pr_json = sys.argv[2]

# read the results of summarize-test-results.py --summary
with open(main_json) as f:
main_results = json.load(f)
main_total, main_passed = get_total_and_passed(main_results)

with open(pr_json) as f:
pr_results = json.load(f)
pr_total, pr_passed = get_total_and_passed(pr_results)

passing_percentage = pr_passed / pr_total * 100
pass_rate_change = abs(pr_passed - main_passed) / main_passed * 100
rate_change_type = "a decrease" if pr_passed < main_passed else "an increase"

comment = (
"Merging this PR would result in "
f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) "
"Pandas tests passing, "
f"{rate_change_type} in the test pass rate by "
f"{pass_rate_change:.2f}%. "
f"Trunk stats: {main_passed}/{main_total}."
)


def emoji_passed(x):
if x > 0:
return f"{x}✅"
elif x < 0:
return f"{x}❌"
else:
return f"{x}"


def emoji_failed(x):
if x > 0:
return f"{x}❌"
elif x < 0:
return f"{x}✅"
else:
return f"{x}"


# convert pr_results to a pandas DataFrame and then a markdown table
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
diff_df = pr_df - main_df

pr_df = pr_df[["total", "passed", "failed", "skipped"]]
diff_df = diff_df[["total", "passed", "failed", "skipped"]]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed)
diff_df["skipped_diff"] = diff_df["skipped_diff"].map(emoji_failed)

df = pd.concat([pr_df, diff_df], axis=1)
df = df.rename_axis("Test module")

df = df.rename(
columns={
"total": "Total tests",
"passed": "Passed tests",
"failed": "Failed tests",
"skipped": "Skipped tests",
"total_diff": "Total delta",
"passed_diff": "Passed delta",
"failed_diff": "Failed delta",
"skipped_diff": "Skipped delta",
}
)
df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False)

print(comment)
print()
print("Here are the results of running the Pandas tests against this PR:")
print()
print(df.to_markdown())
42 changes: 42 additions & 0 deletions ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

PANDAS_TESTS_BRANCH=${1}

rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch"
rapids-logger "PR number: $RAPIDS_REF_NAME"

# Set the manylinux version used for downloading the wheels so that we test the
# newer ABI wheels on the newer images that support their installation.
# Need to disable pipefail for the head not to fail, see
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
set +o pipefail
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
set -o pipefail
manylinux_version="2_17"
if [[ ${glibc_minor_version} -ge 28 ]]; then
manylinux_version="2_28"
fi
manylinux="manylinux_${manylinux_version}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas_tests]

git checkout $COMMIT

bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
-n 10 \
--tb=line \
--skip-slow \
--max-worker-restart=3 \
--import-mode=importlib \
--report-log=${PANDAS_TESTS_BRANCH}.json 2>&1

# summarize the results and save them to artifacts:
python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${PANDAS_TESTS_BRANCH}-results.json
RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/
52 changes: 52 additions & 0 deletions ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

set -eoxu pipefail

# Function to display script usage
function display_usage {
echo "Usage: $0 [--no-cudf]"
}

# Default value for the --no-cudf option
no_cudf=false

# Parse command-line arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--no-cudf)
no_cudf=true
shift
;;
*)
echo "Error: Unknown option $1"
display_usage
exit 1
;;
esac
done

if [ "$no_cudf" = true ]; then
echo "Skipping cudf install"
else
# Set the manylinux version used for downloading the wheels so that we test the
# newer ABI wheels on the newer images that support their installation.
# Need to disable pipefail for the head not to fail, see
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q
set +o pipefail
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2)
set -o pipefail
manylinux_version="2_17"
if [[ ${glibc_minor_version} -ge 28 ]]; then
manylinux_version="2_28"
fi
manylinux="manylinux_${manylinux_version}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf_pandas_tests]
fi

python -m pytest -p cudf.pandas ./python/cudf/cudf_pandas_tests/
1 change: 1 addition & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ dependencies:
- python-snappy>=0.6.0
- python>=3.9,<3.11
- pytorch<1.12.0
- rich
- rmm==23.12.*
- s3fs>=2022.3.0
- scikit-build>=0.13.1
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ dependencies:
- python-snappy>=0.6.0
- python>=3.9,<3.11
- pytorch<1.12.0
- rich
- rmm==23.12.*
- s3fs>=2022.3.0
- scikit-build>=0.13.1
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ requirements:
- nvtx >=0.2.1
- packaging
- cachetools
- rich

test:
requires:
Expand Down
Loading

0 comments on commit a35f90c

Please sign in to comment.