From 102d564db21df1d805c2d06571e75a96fa6d822f Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Mon, 8 Apr 2024 07:21:21 -0500 Subject: [PATCH] Enable test-reporting for pandas pytests in CI (#15369) This PR enables pandas test-reporting for pandas pytests in CI by comparing against the results available in nightlies as a baseline. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/15369 --- .github/workflows/pr.yaml | 43 ++++--------------- ci/cudf_pandas_scripts/pandas-tests/diff.sh | 29 +++++++++---- .../pandas-tests/job-summary.py | 4 +- 3 files changed, 32 insertions(+), 44 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2d7ebb62fa8..345ccbea45b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -30,8 +30,7 @@ jobs: - devcontainer - unit-tests-cudf-pandas - pandas-tests - #- pandas-tests-diff - #- pandas-tests-diff-comment + - pandas-tests-diff secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.06 checks: @@ -180,35 +179,11 @@ jobs: script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit. test_summary_show: "none" - #pandas-tests-diff: - # # diff the results of running the Pandas unit tests and publish a job summary - # needs: [pandas-tests-main, pandas-tests-pr] - # secrets: inherit - # # This branch exports a `job_output` output that the downstream job reads. - # uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 - # with: - # node_type: cpu4 - # build_type: pull-request - # run_script: ci/cudf_pandas_scripts/pandas-tests/diff.sh - #pandas-tests-diff-comment: - # # Post comment of pass/fail rate on PR - # runs-on: ubuntu-latest - # needs: pandas-tests-diff - # steps: - # - uses: actions/github-script@v6 - # with: - # script: | - # const branch = process.env.GITHUB_REF_NAME; - # const prBranchPattern = new RegExp("^pull-request/[0-9]+$"); - # if (!branch.match(prBranchPattern)) { - # throw new Error(`${branch} does not match PR branch pattern.`); - # } - # const summary_url = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; - # const prNumber = branch.split("/")[1]; - # const summary_comment = `${{ needs.pandas-tests-diff.outputs.job_output }}`; - # github.rest.issues.createComment({ - # issue_number: prNumber, - # owner: context.repo.owner, - # repo: context.repo.repo, - # body: `${summary_comment}\n\nHere is [a link to the full test summary](${summary_url}).\n` - # }) + pandas-tests-diff: + # diff the results of running the Pandas unit tests and publish a job summary + needs: pandas-tests + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@patch-1 + with: + node_type: cpu4 + build_type: pull-request + run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh index 37adabdb9c6..ae5a249bcbd 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -7,18 +7,31 @@ # branch and the PR branch: # Hard-coded needs to match the version deduced by rapids-upload-artifacts-dir +GH_JOB_NAME="pandas-tests-diff / build" +rapids-logger "Github job name: ${GH_JOB_NAME}" + MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.main-results.json -PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py310.pr-results.json -aws s3 cp $MAIN_ARTIFACT main-results.json +PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py39.pr-results.json + +rapids-logger "Fetching latest available results from nightly" +aws s3api list-objects-v2 --bucket rapids-downloads --prefix "nightly/" --query "sort_by(Contents[?ends_with(Key, '.main-results.json')], &LastModified)[::-1].[Key]" --output text > s3_output.txt +cat s3_output.txt +read -r COMPARE_ENV < s3_output.txt +export COMPARE_ENV +rapids-logger "Latest available results from nightly: ${COMPARE_ENV}" + +aws s3 cp "s3://rapids-downloads/${COMPARE_ENV}" main-results.json aws s3 cp $PR_ARTIFACT pr-results.json # Compute the diff and prepare job summary: python -m pip install pandas tabulate python ci/cudf_pandas_scripts/pandas-tests/job-summary.py main-results.json pr-results.json | tee summary.txt >> "$GITHUB_STEP_SUMMARY" -COMMENT=$(head -1 summary.txt) - +COMMENT=$(head -1 summary.txt | grep -oP '\d+/\d+ \(\d+\.\d+%\).*?(a decrease by|an increase by) \d+\.\d+%') echo "$COMMENT" - -# Magic name that the custom-job.yaml workflow reads and re-exports -echo "job_output=${COMMENT}" >> "${GITHUB_OUTPUT}" +jq --arg COMMENT "$COMMENT" --arg GH_JOB_NAME "$GH_JOB_NAME" -n \ + '{"context": "Pandas tests", + "description": $COMMENT, + "state":"success", + "job_name": $GH_JOB_NAME}' \ + > gh-status.json diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py index 1e83e51ab04..93a815838b7 100644 --- a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py +++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -40,7 +40,7 @@ def get_total_and_passed(results): "Merging this PR would result in " f"{pr_passed}/{pr_total} ({passing_percentage:.2f}%) " "Pandas tests passing, " - f"{rate_change_type} in the test pass rate by " + f"{rate_change_type} by " f"{pass_rate_change:.2f}%. " f"Trunk stats: {main_passed}/{main_total}." )