From dafb3e7559710d5af7118a206312f250eb671558 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 19 Sep 2024 12:06:53 -0500
Subject: [PATCH] Generate GPU vs CPU usage metrics per pytest file in pandas
 testsuite for `cudf.pandas` (#16739)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR introduces GPU and CPU usage reporting to cudf.pandas pytest suite and the generated metrics will be available for viewing in the existing pandas pytest summary page:
https://github.com/rapidsai/cudf/actions/runs/10886370333/attempts/1#summary-30220192117

![Screenshot 2024-09-16 at 2 39 07 PM](https://github.com/user-attachments/assets/6d31c7d2-8a27-4f02-bf9d-c1b40ad1d756)


Note: I'm aware of cases of where both GPU and CPU usage show 0%, which is due to various reasons that I'm working on addressing in a follow-up PR.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/16739
---
 .../pandas-tests/job-summary.py               | 14 ++++-
 python/cudf/cudf/pandas/fast_slow_proxy.py    | 16 +++++
 .../cudf/pandas/scripts/conftest-patch.py     | 59 ++++++++++++++++++-
 .../cudf/pandas/scripts/run-pandas-tests.sh   |  5 +-
 .../pandas/scripts/summarize-test-results.py  | 40 +++++++++++++
 5 files changed, 128 insertions(+), 6 deletions(-)

diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py
index 93a815838b7..7a12db927e5 100644
--- a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py
+++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py
@@ -68,8 +68,18 @@ def emoji_failed(x):
 pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
 main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
 diff_df = pr_df - main_df
+total_usage = pr_df['_slow_function_call'] + pr_df['_fast_function_call']
+pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/total_usage)*100.0).round(1)
+pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/total_usage)*100.0).round(1)
 
-pr_df = pr_df[["total", "passed", "failed", "skipped"]]
+cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
+gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)
+
+# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
+pr_df['CPU Usage'] = pr_df['CPU Usage'].fillna(0).astype(str) + '%'
+pr_df['GPU Usage'] = pr_df['GPU Usage'].fillna(0).astype(str) + '%'
+
+pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
 diff_df = diff_df[["total", "passed", "failed", "skipped"]]
 diff_df.columns = diff_df.columns + "_diff"
 diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
@@ -95,6 +105,8 @@ def emoji_failed(x):
 
 print(comment)
 print()
+print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
+print()
 print("Here are the results of running the Pandas tests against this PR:")
 print()
 print(df.to_markdown())
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index afa1ce5f86c..bf2ee6ae624 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -881,6 +881,20 @@ def _assert_fast_slow_eq(left, right):
         assert_eq(left, right)
 
 
+def _fast_function_call():
+    """
+    Placeholder fast function for pytest profiling purposes.
+    """
+    return None
+
+
+def _slow_function_call():
+    """
+    Placeholder slow function for pytest profiling purposes.
+    """
+    return None
+
+
 def _fast_slow_function_call(
     func: Callable,
     /,
@@ -910,6 +924,7 @@ def _fast_slow_function_call(
                 # try slow path
                 raise Exception()
             fast = True
+            _fast_function_call()
             if _env_get_bool("CUDF_PANDAS_DEBUGGING", False):
                 try:
                     with nvtx.annotate(
@@ -952,6 +967,7 @@ def _fast_slow_function_call(
                 from ._logger import log_fallback
 
                 log_fallback(slow_args, slow_kwargs, err)
+            _slow_function_call()
             with disable_module_accelerator():
                 result = func(*slow_args, **slow_kwargs)
     return _maybe_wrap_result(result, func, *args, **kwargs), fast
diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
index 505a40b0bfa..d12d2697729 100644
--- a/python/cudf/cudf/pandas/scripts/conftest-patch.py
+++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -1,10 +1,13 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import contextlib
+import json
 import os
 import sys
+import traceback
+from collections import defaultdict
 from functools import wraps
 
 import pytest
@@ -36,4 +39,58 @@ def patch_testing_functions():
     pytest.raises = replace_kwargs({"match": None})(pytest.raises)
 
 
+# Dictionary to store function call counts
+function_call_counts = {}  # type: ignore
+
+# The specific functions to track
+FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"}
+
+
+def find_pytest_file(frame):
+    stack = traceback.extract_stack()
+    absolute_paths = [frame.filename for frame in stack]
+    for file in absolute_paths:
+        if "pandas-testing/pandas-tests/tests" in file and file.rsplit("/", 1)[
+            -1
+        ].startswith("test_"):
+            return str(file).rsplit("pandas-tests/", 1)[-1]
+    return None
+
+
+def trace_calls(frame, event, arg):
+    if event != "call":
+        return
+    code = frame.f_code
+    func_name = code.co_name
+
+    if func_name in FUNCTION_NAME:
+        filename = find_pytest_file(frame)
+        if filename is None:
+            return
+        if filename not in function_call_counts:
+            function_call_counts[filename] = defaultdict(int)
+        function_call_counts[filename][func_name] += 1
+
+
+def pytest_sessionstart(session):
+    # Set the profile function to trace calls
+    sys.setprofile(trace_calls)
+
+
+def pytest_sessionfinish(session, exitstatus):
+    # Remove the profile function
+    sys.setprofile(None)
+
+
+@pytest.hookimpl(trylast=True)
+def pytest_unconfigure(config):
+    if hasattr(config, "workerinput"):
+        # Running in xdist worker, write the counts before exiting
+        worker_id = config.workerinput["workerid"]
+        output_file = f"function_call_counts_worker_{worker_id}.json"
+        with open(output_file, "w") as f:
+            json.dump(function_call_counts, f, indent=4)
+        print(f"Function call counts have been written to {output_file}")
+
+
 sys.path.append(os.path.dirname(__file__))
diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
index 9c65b74d081..9b9ce026571 100755
--- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
+++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
@@ -64,8 +64,6 @@ markers = [
   "skip_ubsan: Tests known to fail UBSAN check",
 ]
 EOF
-    # append the contents of patch-confest.py to conftest.py
-    cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py
 
     # Substitute `pandas.tests` with a relative import.
     # This will depend on the location of the test module relative to
@@ -137,7 +135,7 @@ and not test_eof_states \
 and not test_array_tz"
 
 # TODO: Remove "not db" once a postgres & mysql container is set up on the CI
-PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \
+PANDAS_CI="1" timeout 90m python -m pytest -p cudf.pandas \
     -v -m "not single_cpu and not db" \
     -k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and not test_groupby_raises_category_on_category and not test_constructor_no_pandas_array and not test_is_monotonic_na and not test_index_contains and not test_index_contains and not test_frame_op_subclass_nonclass_constructor and not test_round_trip_current" \
     --import-mode=importlib \
@@ -146,5 +144,4 @@ PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \
 
 mv *.json ..
 cd ..
-
 rm -rf pandas-testing/pandas-tests/
diff --git a/python/cudf/cudf/pandas/scripts/summarize-test-results.py b/python/cudf/cudf/pandas/scripts/summarize-test-results.py
index ffd2abb960d..4ea0b3b4413 100644
--- a/python/cudf/cudf/pandas/scripts/summarize-test-results.py
+++ b/python/cudf/cudf/pandas/scripts/summarize-test-results.py
@@ -12,7 +12,9 @@
 """
 
 import argparse
+import glob
 import json
+import os
 
 from rich.console import Console
 from rich.table import Table
@@ -57,6 +59,44 @@ def get_per_module_results(log_file_name):
                 per_module_results[module_name].setdefault(outcome, 0)
                 per_module_results[module_name]["total"] += 1
                 per_module_results[module_name][outcome] += 1
+
+    directory = os.path.dirname(log_file_name)
+    pattern = os.path.join(directory, "function_call_counts_worker_*.json")
+    matching_files = glob.glob(pattern)
+    function_call_counts = {}
+
+    for file in matching_files:
+        with open(file) as f:
+            function_call_count = json.load(f)
+        if not function_call_counts:
+            function_call_counts.update(function_call_count)
+        else:
+            for key, value in function_call_count.items():
+                if key not in function_call_counts:
+                    function_call_counts[key] = value
+                else:
+                    if "_slow_function_call" not in function_call_counts[key]:
+                        function_call_counts[key]["_slow_function_call"] = 0
+                    if "_fast_function_call" not in function_call_counts[key]:
+                        function_call_counts[key]["_fast_function_call"] = 0
+                    function_call_counts[key]["_slow_function_call"] += (
+                        value.get("_slow_function_call", 0)
+                    )
+                    function_call_counts[key]["_fast_function_call"] += (
+                        value.get("_fast_function_call", 0)
+                    )
+
+    for key, value in per_module_results.items():
+        if key in function_call_counts:
+            per_module_results[key]["_slow_function_call"] = (
+                function_call_counts[key].get("_slow_function_call", 0)
+            )
+            per_module_results[key]["_fast_function_call"] = (
+                function_call_counts[key].get("_fast_function_call", 0)
+            )
+        else:
+            per_module_results[key]["_slow_function_call"] = 0
+            per_module_results[key]["_fast_function_call"] = 0
     return per_module_results