Skip to content

Commit

Permalink
Enable test-reporting for pandas pytests in CI (#15369)
Browse files Browse the repository at this point in the history
This PR enables pandas test-reporting for pandas pytests in CI by comparing against the results available in nightlies as a baseline.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: #15369
  • Loading branch information
galipremsagar authored Apr 8, 2024
1 parent c5eb324 commit 102d564
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 44 deletions.
43 changes: 9 additions & 34 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ jobs:
- devcontainer
- unit-tests-cudf-pandas
- pandas-tests
#- pandas-tests-diff
#- pandas-tests-diff-comment
- pandas-tests-diff
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
checks:
Expand Down Expand Up @@ -180,35 +179,11 @@ jobs:
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
test_summary_show: "none"
#pandas-tests-diff:
# # diff the results of running the Pandas unit tests and publish a job summary
# needs: [pandas-tests-main, pandas-tests-pr]
# secrets: inherit
# # This branch exports a `job_output` output that the downstream job reads.
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# node_type: cpu4
# build_type: pull-request
# run_script: ci/cudf_pandas_scripts/pandas-tests/diff.sh
#pandas-tests-diff-comment:
# # Post comment of pass/fail rate on PR
# runs-on: ubuntu-latest
# needs: pandas-tests-diff
# steps:
# - uses: actions/github-script@v6
# with:
# script: |
# const branch = process.env.GITHUB_REF_NAME;
# const prBranchPattern = new RegExp("^pull-request/[0-9]+$");
# if (!branch.match(prBranchPattern)) {
# throw new Error(`${branch} does not match PR branch pattern.`);
# }
# const summary_url = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
# const prNumber = branch.split("/")[1];
# const summary_comment = `${{ needs.pandas-tests-diff.outputs.job_output }}`;
# github.rest.issues.createComment({
# issue_number: prNumber,
# owner: context.repo.owner,
# repo: context.repo.repo,
# body: `${summary_comment}\n\nHere is [a link to the full test summary](${summary_url}).\n`
# })
pandas-tests-diff:
# diff the results of running the Pandas unit tests and publish a job summary
needs: pandas-tests
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@patch-1
with:
node_type: cpu4
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
29 changes: 21 additions & 8 deletions ci/cudf_pandas_scripts/pandas-tests/diff.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Download the summarized results of running the Pandas tests on both the main
# branch and the PR branch:

# Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir
GH_JOB_NAME="pandas-tests-diff / build"
rapids-logger "Github job name: ${GH_JOB_NAME}"

MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.pr-results.json
aws s3 cp $MAIN_ARTIFACT main-results.json
PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py39.pr-results.json

rapids-logger "Fetching latest available results from nightly"
aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/" --query "sort_by(Contents[?ends_with(Key, '.main-results.json')], &LastModified)[::-1].[Key]" --output text > s3_output.txt
cat s3_output.txt
read -r COMPARE_ENV < s3_output.txt
export COMPARE_ENV
rapids-logger "Latest available results from nightly: ${COMPARE_ENV}"

aws s3 cp "s3://rapids-downloads/${COMPARE_ENV}" main-results.json
aws s3 cp $PR_ARTIFACT pr-results.json

# Compute the diff and prepare job summary:
python -m pip install pandas tabulate
python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY"

COMMENT=$(head -1 summary.txt)

COMMENT=$(head -1 summary.txt | grep -oP '\d+/\d+ \(\d+\.\d+%\).*?(a decrease by|an increase by) \d+\.\d+%')
echo "$COMMENT"

# Magic name that the custom-job.yaml workflow reads and re-exports
echo "job_output=${COMMENT}" >> "${GITHUB_OUTPUT}"
jq --arg COMMENT "$COMMENT" --arg GH_JOB_NAME "$GH_JOB_NAME" -n \
'{"context": "Pandas tests",
"description": $COMMENT,
"state":"success",
"job_name": $GH_JOB_NAME}' \
> gh-status.json
4 changes: 2 additions & 2 deletions ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

Expand Down Expand Up @@ -40,7 +40,7 @@ def get_total_and_passed(results):
"Merging this PR would result in "
f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) "
"Pandas tests passing, "
f"{rate_change_type} in the test pass rate by "
f"{rate_change_type} by "
f"{pass_rate_change:.2f}%. "
f"Trunk stats: {main_passed}/{main_total}."
)
Expand Down

0 comments on commit 102d564

Please sign in to comment.