-
Notifications
You must be signed in to change notification settings - Fork 917
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #14372 from bdice/branch-23.12-merge-23.10
Forward-merge branch-23.10 to branch-23.12
- Loading branch information
Showing
128 changed files
with
9,508 additions
and
531 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
--- | ||
name: Request a Missing Pandas Function | ||
about: Request GPU support for a function executed on the CPU in pandas accelerator mode. | ||
title: "[FEA]" | ||
labels: "? - Needs Triage, feature request" | ||
assignees: '' | ||
|
||
--- | ||
|
||
This issue template is intended to be used primarily for requests related to pandas accelerator mode. If you'd like to file a general cuDF feature request, please [click here](https://github.com/rapidsai/cudf/issues/new?assignees=&labels=%3F+-+Needs+Triage%2C+feature+request&projects=&template=feature_request.md&title=%5BFEA%5D). | ||
|
||
|
||
**Missing Pandas Feature Request** | ||
A clear and concise summary of the pandas function(s) you'd like to be able run with cuDF. | ||
|
||
|
||
**Profiler Output** | ||
If you used the profiler in pandas accelerator mode, please provide the full output of your profiling report. | ||
|
||
|
||
**Additional context** | ||
Add any other context, code examples, or references to existing implementations about the feature request here. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,10 @@ jobs: | |
- wheel-tests-cudf | ||
- wheel-build-dask-cudf | ||
- wheel-tests-dask-cudf | ||
- unit-tests-cudf-pandas | ||
- pandas-tests | ||
#- pandas-tests-diff | ||
#- pandas-tests-diff-comment | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
checks: | ||
|
@@ -126,3 +130,52 @@ jobs: | |
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) | ||
build_type: pull-request | ||
script: ci/test_wheel_dask_cudf.sh | ||
unit-tests-cudf-pandas: | ||
needs: wheel-build-cudf | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
with: | ||
matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.10" and (.CUDA_VER == "11.8.0" or .CUDA_VER == "12.0.1"))) | ||
build_type: pull-request | ||
script: ci/cudf_pandas_scripts/run_tests.sh | ||
pandas-tests: | ||
# run the Pandas unit tests using PR branch | ||
needs: wheel-build-cudf | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
with: | ||
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.] | ||
build_type: pull-request | ||
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr | ||
#pandas-tests-diff: | ||
# # diff the results of running the Pandas unit tests and publish a job summary | ||
# needs: [pandas-tests-main, pandas-tests-pr] | ||
# secrets: inherit | ||
# # This branch exports a `job_output` output that the downstream job reads. | ||
# uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@wence/fea/custom-job-output | ||
# with: | ||
# node_type: cpu4 | ||
# build_type: pull-request | ||
# run_script: ci/cudf_pandas_scripts/pandas-tests/diff.sh | ||
#pandas-tests-diff-comment: | ||
# # Post comment of pass/fail rate on PR | ||
# runs-on: ubuntu-latest | ||
# needs: pandas-tests-diff | ||
# steps: | ||
# - uses: actions/github-script@v6 | ||
# with: | ||
# script: | | ||
# const branch = process.env.GITHUB_REF_NAME; | ||
# const prBranchPattern = new RegExp("^pull-request/[0-9]+$"); | ||
# if (!branch.match(prBranchPattern)) { | ||
# throw new Error(`${branch} does not match PR branch pattern.`); | ||
# } | ||
# const summary_url = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; | ||
# const prNumber = branch.split("/")[1]; | ||
# const summary_comment = `${{ needs.pandas-tests-diff.outputs.job_output }}`; | ||
# github.rest.issues.createComment({ | ||
# issue_number: prNumber, | ||
# owner: context.repo.owner, | ||
# repo: context.repo.repo, | ||
# body: `${summary_comment}\n\nHere is [a link to the full test summary](${summary_url}).\n` | ||
# }) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,3 +96,25 @@ jobs: | |
date: ${{ inputs.date }} | ||
sha: ${{ inputs.sha }} | ||
script: ci/test_wheel_dask_cudf.sh | ||
unit-tests-cudf-pandas: | ||
needs: wheel-build-cudf | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
with: | ||
build_type: nightly | ||
branch: ${{ inputs.branch }} | ||
date: ${{ inputs.date }} | ||
sha: ${{ inputs.sha }} | ||
script: ci/cudf_pandas_scripts/run_tests.sh | ||
pandas-tests: | ||
# run the Pandas unit tests | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/[email protected] | ||
with: | ||
matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.] | ||
build_type: nightly | ||
branch: ${{ inputs.branch }} | ||
date: ${{ inputs.date }} | ||
sha: ${{ inputs.sha }} | ||
# pr mode uses the HEAD of the branch, which is also correct for nightlies | ||
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/usr/bin/env bash | ||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
# All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# Download the summarized results of running the Pandas tests on both the main | ||
# branch and the PR branch: | ||
|
||
# Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir | ||
MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json | ||
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.pr-results.json | ||
aws s3 cp $MAIN_ARTIFACT main-results.json | ||
aws s3 cp $PR_ARTIFACT pr-results.json | ||
|
||
# Compute the diff and prepare job summary: | ||
python -m pip install pandas tabulate | ||
python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY" | ||
|
||
COMMENT=$(head -1 summary.txt) | ||
|
||
echo "$COMMENT" | ||
|
||
# Magic name that the custom-job.yaml workflow reads and re-exports | ||
echo "job_output=${COMMENT}" >> "${GITHUB_OUTPUT}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
# All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import json | ||
import sys | ||
|
||
import pandas as pd | ||
|
||
|
||
def get_total_and_passed(results): | ||
total_failed = 0 | ||
total_errored = 0 | ||
total_passed = 0 | ||
for module_name, row in results.items(): | ||
total_failed += row.get("failed", 0) | ||
total_errored += row.get("errored", 0) | ||
total_passed += row.get("passed", 0) | ||
total_tests = total_failed + total_errored + total_passed | ||
return total_tests, total_passed | ||
|
||
|
||
main_json = sys.argv[1] | ||
pr_json = sys.argv[2] | ||
|
||
# read the results of summarize-test-results.py --summary | ||
with open(main_json) as f: | ||
main_results = json.load(f) | ||
main_total, main_passed = get_total_and_passed(main_results) | ||
|
||
with open(pr_json) as f: | ||
pr_results = json.load(f) | ||
pr_total, pr_passed = get_total_and_passed(pr_results) | ||
|
||
passing_percentage = pr_passed / pr_total * 100 | ||
pass_rate_change = abs(pr_passed - main_passed) / main_passed * 100 | ||
rate_change_type = "a decrease" if pr_passed < main_passed else "an increase" | ||
|
||
comment = ( | ||
"Merging this PR would result in " | ||
f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) " | ||
"Pandas tests passing, " | ||
f"{rate_change_type} in the test pass rate by " | ||
f"{pass_rate_change:.2f}%. " | ||
f"Trunk stats: {main_passed}/{main_total}." | ||
) | ||
|
||
|
||
def emoji_passed(x): | ||
if x > 0: | ||
return f"{x}✅" | ||
elif x < 0: | ||
return f"{x}❌" | ||
else: | ||
return f"{x}" | ||
|
||
|
||
def emoji_failed(x): | ||
if x > 0: | ||
return f"{x}❌" | ||
elif x < 0: | ||
return f"{x}✅" | ||
else: | ||
return f"{x}" | ||
|
||
|
||
# convert pr_results to a pandas DataFrame and then a markdown table | ||
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index() | ||
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index() | ||
diff_df = pr_df - main_df | ||
|
||
pr_df = pr_df[["total", "passed", "failed", "skipped"]] | ||
diff_df = diff_df[["total", "passed", "failed", "skipped"]] | ||
diff_df.columns = diff_df.columns + "_diff" | ||
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed) | ||
diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed) | ||
diff_df["skipped_diff"] = diff_df["skipped_diff"].map(emoji_failed) | ||
|
||
df = pd.concat([pr_df, diff_df], axis=1) | ||
df = df.rename_axis("Test module") | ||
|
||
df = df.rename( | ||
columns={ | ||
"total": "Total tests", | ||
"passed": "Passed tests", | ||
"failed": "Failed tests", | ||
"skipped": "Skipped tests", | ||
"total_diff": "Total delta", | ||
"passed_diff": "Passed delta", | ||
"failed_diff": "Failed delta", | ||
"skipped_diff": "Skipped delta", | ||
} | ||
) | ||
df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False) | ||
|
||
print(comment) | ||
print() | ||
print("Here are the results of running the Pandas tests against this PR:") | ||
print() | ||
print(df.to_markdown()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/env bash | ||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
# All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
PANDAS_TESTS_BRANCH=${1} | ||
|
||
rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch" | ||
rapids-logger "PR number: $RAPIDS_REF_NAME" | ||
|
||
# Set the manylinux version used for downloading the wheels so that we test the | ||
# newer ABI wheels on the newer images that support their installation. | ||
# Need to disable pipefail for the head not to fail, see | ||
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q | ||
set +o pipefail | ||
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2) | ||
set -o pipefail | ||
manylinux_version="2_17" | ||
if [[ ${glibc_minor_version} -ge 28 ]]; then | ||
manylinux_version="2_28" | ||
fi | ||
manylinux="manylinux_${manylinux_version}" | ||
|
||
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" | ||
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep | ||
python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas_tests] | ||
|
||
git checkout $COMMIT | ||
|
||
bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \ | ||
-n 10 \ | ||
--tb=line \ | ||
--skip-slow \ | ||
--max-worker-restart=3 \ | ||
--import-mode=importlib \ | ||
--report-log=${PANDAS_TESTS_BRANCH}.json 2>&1 | ||
|
||
# summarize the results and save them to artifacts: | ||
python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json pandas-testing/${PANDAS_TESTS_BRANCH}.json > pandas-testing/${PANDAS_TESTS_BRANCH}-results.json | ||
RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"} | ||
mkdir -p "${RAPIDS_ARTIFACTS_DIR}" | ||
mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
# All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
set -eoxu pipefail | ||
|
||
# Function to display script usage | ||
function display_usage { | ||
echo "Usage: $0 [--no-cudf]" | ||
} | ||
|
||
# Default value for the --no-cudf option | ||
no_cudf=false | ||
|
||
# Parse command-line arguments | ||
while [[ $# -gt 0 ]]; do | ||
case "$1" in | ||
--no-cudf) | ||
no_cudf=true | ||
shift | ||
;; | ||
*) | ||
echo "Error: Unknown option $1" | ||
display_usage | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
if [ "$no_cudf" = true ]; then | ||
echo "Skipping cudf install" | ||
else | ||
# Set the manylinux version used for downloading the wheels so that we test the | ||
# newer ABI wheels on the newer images that support their installation. | ||
# Need to disable pipefail for the head not to fail, see | ||
# https://stackoverflow.com/questions/19120263/why-exit-code-141-with-grep-q | ||
set +o pipefail | ||
glibc_minor_version=$(ldd --version | head -1 | grep -o "[0-9]\.[0-9]\+" | tail -1 | cut -d '.' -f2) | ||
set -o pipefail | ||
manylinux_version="2_17" | ||
if [[ ${glibc_minor_version} -ge 28 ]]; then | ||
manylinux_version="2_28" | ||
fi | ||
manylinux="manylinux_${manylinux_version}" | ||
|
||
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" | ||
RAPIDS_PY_WHEEL_NAME="cudf_${manylinux}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep | ||
python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf_pandas_tests] | ||
fi | ||
|
||
python -m pytest -p cudf.pandas ./python/cudf/cudf_pandas_tests/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -103,6 +103,7 @@ requirements: | |
- nvtx >=0.2.1 | ||
- packaging | ||
- cachetools | ||
- rich | ||
|
||
test: | ||
requires: | ||
|
Oops, something went wrong.