From f7c5d32a833dcc6b9b35756b89a0eb19b8bc9a40 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 25 Sep 2024 14:37:37 -0500 Subject: [PATCH] Display deltas for `cudf.pandas` test summary (#16864) This PR displays delta's for CPU and GPU usage metrics that are extracted from `cudf.pandas` pytests. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cudf/pull/16864 --- .github/workflows/pr.yaml | 18 +++++- .../pandas-tests/job-summary.py | 64 +++++++++++++++---- 2 files changed, 66 insertions(+), 16 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b4c449ce5d8..766df59594b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -50,6 +50,7 @@ jobs: test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }} test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }} test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }} + test_cudf_pandas: ${{ steps.changed-files.outputs.cudf_pandas_any_changed == 'true' }} steps: - name: Get PR info id: get-pr-info @@ -82,6 +83,7 @@ jobs: - '!java/**' - '!notebooks/**' - '!python/**' + - '!ci/cudf_pandas_scripts/**' java: - '**' - '!CONTRIBUTING.md' @@ -90,11 +92,13 @@ jobs: - '!img/**' - '!notebooks/**' - '!python/**' + - '!ci/cudf_pandas_scripts/**' notebooks: - '**' - '!CONTRIBUTING.md' - '!README.md' - '!java/**' + - '!ci/cudf_pandas_scripts/**' python: - '**' - '!CONTRIBUTING.md' @@ -103,6 +107,16 @@ jobs: - '!img/**' - '!java/**' - '!notebooks/**' + - '!ci/cudf_pandas_scripts/**' + cudf_pandas: + - '**' + - 'ci/cudf_pandas_scripts/**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!java/**' + - '!notebooks/**' checks: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 @@ -289,7 +303,7 @@ jobs: needs: [wheel-build-cudf, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 - if: needs.changed-files.outputs.test_python == 'true' + if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -300,7 +314,7 @@ jobs: needs: [wheel-build-cudf, changed-files] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 - if: needs.changed-files.outputs.test_python == 'true' + if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py index 7a12db927e5..485b2ac8a51 100644 --- a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py +++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py @@ -67,20 +67,33 @@ def emoji_failed(x): # convert pr_results to a pandas DataFrame and then a markdown table pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index() main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index() -diff_df = pr_df - main_df -total_usage = pr_df['_slow_function_call'] + pr_df['_fast_function_call'] -pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/total_usage)*100.0).round(1) -pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/total_usage)*100.0).round(1) +total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"] +main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1) +main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1) + +total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"] +pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1) +pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1) + +cpu_usage_mean = pr_df["CPU Usage"].mean().round(2) +gpu_usage_mean = pr_df["GPU Usage"].mean().round(2) + +gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()) +pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0) +pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0) +main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0) +main_df["GPU Usage"] = main_df["GPU Usage"].fillna(0) -cpu_usage_mean = pr_df['CPU Usage'].mean().round(2) -gpu_usage_mean = pr_df['GPU Usage'].mean().round(2) +diff_df = pr_df - main_df +diff_df["CPU Usage"] = diff_df["CPU Usage"].round(1).fillna(0) +diff_df["GPU Usage"] = diff_df["GPU Usage"].round(1).fillna(0) -# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns -pr_df['CPU Usage'] = pr_df['CPU Usage'].fillna(0).astype(str) + '%' -pr_df['GPU Usage'] = pr_df['GPU Usage'].fillna(0).astype(str) + '%' +# Add '%' suffix to "CPU Usage" and "GPU Usage" columns +pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%" +pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%" -pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']] -diff_df = diff_df[["total", "passed", "failed", "skipped"]] +pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]] +diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]] diff_df.columns = diff_df.columns + "_diff" diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed) diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed) @@ -99,13 +112,36 @@ def emoji_failed(x): "passed_diff": "Passed delta", "failed_diff": "Failed delta", "skipped_diff": "Skipped delta", + "CPU Usage_diff": "CPU Usage delta", + "GPU Usage_diff": "GPU Usage delta", } ) -df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False) - +df = df.sort_values(by=["CPU Usage delta", "Total tests"], ascending=False) +df["CPU Usage delta"] = df["CPU Usage delta"].map(emoji_failed) +df["GPU Usage delta"] = df["GPU Usage delta"].map(emoji_passed) +df = df[ + [ + "Total tests", + "CPU Usage delta", + "GPU Usage delta", + "Passed tests", + "Failed tests", + "Skipped tests", + "CPU Usage", + "GPU Usage", + "Total delta", + "Passed delta", + "Failed delta", + "Skipped delta", + ] +] print(comment) print() -print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%") +print( + f"Average GPU usage: {gpu_usage_mean}% {'an increase' if gpu_usage_rate_change > 0 else 'a decrease'} by {gpu_usage_rate_change}%" +) +print() +print(f"Average CPU usage: {cpu_usage_mean}%") print() print("Here are the results of running the Pandas tests against this PR:") print()