Skip to content

Commit

Permalink
Merge pull request #16841 from rapidsai/branch-24.10
Browse files Browse the repository at this point in the history
Forward-merge branch-24.10 into branch-24.12
  • Loading branch information
GPUtester authored Sep 19, 2024
2 parents 83f9d2b + dafb3e7 commit a38ef96
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 6 deletions.
14 changes: 13 additions & 1 deletion ci/cudf_pandas_scripts/pandas-tests/job-summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,18 @@ def emoji_failed(x):
pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index()
main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index()
diff_df = pr_df - main_df
total_usage = pr_df['_slow_function_call'] + pr_df['_fast_function_call']
pr_df['CPU Usage'] = ((pr_df['_slow_function_call']/total_usage)*100.0).round(1)
pr_df['GPU Usage'] = ((pr_df['_fast_function_call']/total_usage)*100.0).round(1)

pr_df = pr_df[["total", "passed", "failed", "skipped"]]
cpu_usage_mean = pr_df['CPU Usage'].mean().round(2)
gpu_usage_mean = pr_df['GPU Usage'].mean().round(2)

# Add '%' suffix to 'CPU Usage' and 'GPU Usage' columns
pr_df['CPU Usage'] = pr_df['CPU Usage'].fillna(0).astype(str) + '%'
pr_df['GPU Usage'] = pr_df['GPU Usage'].fillna(0).astype(str) + '%'

pr_df = pr_df[["total", "passed", "failed", "skipped", 'CPU Usage', 'GPU Usage']]
diff_df = diff_df[["total", "passed", "failed", "skipped"]]
diff_df.columns = diff_df.columns + "_diff"
diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed)
Expand All @@ -95,6 +105,8 @@ def emoji_failed(x):

print(comment)
print()
print(f"Average CPU and GPU usage for the tests: {cpu_usage_mean}% and {gpu_usage_mean}%")
print()
print("Here are the results of running the Pandas tests against this PR:")
print()
print(df.to_markdown())
16 changes: 16 additions & 0 deletions python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -881,6 +881,20 @@ def _assert_fast_slow_eq(left, right):
assert_eq(left, right)


def _fast_function_call():
"""
Placeholder fast function for pytest profiling purposes.
"""
return None


def _slow_function_call():
"""
Placeholder slow function for pytest profiling purposes.
"""
return None


def _fast_slow_function_call(
func: Callable,
/,
Expand Down Expand Up @@ -910,6 +924,7 @@ def _fast_slow_function_call(
# try slow path
raise Exception()
fast = True
_fast_function_call()
if _env_get_bool("CUDF_PANDAS_DEBUGGING", False):
try:
with nvtx.annotate(
Expand Down Expand Up @@ -952,6 +967,7 @@ def _fast_slow_function_call(
from ._logger import log_fallback

log_fallback(slow_args, slow_kwargs, err)
_slow_function_call()
with disable_module_accelerator():
result = func(*slow_args, **slow_kwargs)
return _maybe_wrap_result(result, func, *args, **kwargs), fast
Expand Down
59 changes: 58 additions & 1 deletion python/cudf/cudf/pandas/scripts/conftest-patch.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import contextlib
import json
import os
import sys
import traceback
from collections import defaultdict
from functools import wraps

import pytest
Expand Down Expand Up @@ -36,4 +39,58 @@ def patch_testing_functions():
pytest.raises = replace_kwargs({"match": None})(pytest.raises)


# Dictionary to store function call counts
function_call_counts = {} # type: ignore

# The specific functions to track
FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"}


def find_pytest_file(frame):
stack = traceback.extract_stack()
absolute_paths = [frame.filename for frame in stack]
for file in absolute_paths:
if "pandas-testing/pandas-tests/tests" in file and file.rsplit("/", 1)[
-1
].startswith("test_"):
return str(file).rsplit("pandas-tests/", 1)[-1]
return None


def trace_calls(frame, event, arg):
if event != "call":
return
code = frame.f_code
func_name = code.co_name

if func_name in FUNCTION_NAME:
filename = find_pytest_file(frame)
if filename is None:
return
if filename not in function_call_counts:
function_call_counts[filename] = defaultdict(int)
function_call_counts[filename][func_name] += 1


def pytest_sessionstart(session):
# Set the profile function to trace calls
sys.setprofile(trace_calls)


def pytest_sessionfinish(session, exitstatus):
# Remove the profile function
sys.setprofile(None)


@pytest.hookimpl(trylast=True)
def pytest_unconfigure(config):
if hasattr(config, "workerinput"):
# Running in xdist worker, write the counts before exiting
worker_id = config.workerinput["workerid"]
output_file = f"function_call_counts_worker_{worker_id}.json"
with open(output_file, "w") as f:
json.dump(function_call_counts, f, indent=4)
print(f"Function call counts have been written to {output_file}")


sys.path.append(os.path.dirname(__file__))
5 changes: 1 addition & 4 deletions python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ markers = [
"skip_ubsan: Tests known to fail UBSAN check",
]
EOF
# append the contents of patch-confest.py to conftest.py
cat ../python/cudf/cudf/pandas/scripts/conftest-patch.py >> pandas-tests/conftest.py

# Substitute `pandas.tests` with a relative import.
# This will depend on the location of the test module relative to
Expand Down Expand Up @@ -137,7 +135,7 @@ and not test_eof_states \
and not test_array_tz"

# TODO: Remove "not db" once a postgres & mysql container is set up on the CI
PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \
PANDAS_CI="1" timeout 90m python -m pytest -p cudf.pandas \
-v -m "not single_cpu and not db" \
-k "$TEST_THAT_NEED_MOTO_SERVER and $TEST_THAT_CRASH_PYTEST_WORKERS and not test_groupby_raises_category_on_category and not test_constructor_no_pandas_array and not test_is_monotonic_na and not test_index_contains and not test_index_contains and not test_frame_op_subclass_nonclass_constructor and not test_round_trip_current" \
--import-mode=importlib \
Expand All @@ -146,5 +144,4 @@ PANDAS_CI="1" timeout 60m python -m pytest -p cudf.pandas \

mv *.json ..
cd ..

rm -rf pandas-testing/pandas-tests/
40 changes: 40 additions & 0 deletions python/cudf/cudf/pandas/scripts/summarize-test-results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
"""

import argparse
import glob
import json
import os

from rich.console import Console
from rich.table import Table
Expand Down Expand Up @@ -57,6 +59,44 @@ def get_per_module_results(log_file_name):
per_module_results[module_name].setdefault(outcome, 0)
per_module_results[module_name]["total"] += 1
per_module_results[module_name][outcome] += 1

directory = os.path.dirname(log_file_name)
pattern = os.path.join(directory, "function_call_counts_worker_*.json")
matching_files = glob.glob(pattern)
function_call_counts = {}

for file in matching_files:
with open(file) as f:
function_call_count = json.load(f)
if not function_call_counts:
function_call_counts.update(function_call_count)
else:
for key, value in function_call_count.items():
if key not in function_call_counts:
function_call_counts[key] = value
else:
if "_slow_function_call" not in function_call_counts[key]:
function_call_counts[key]["_slow_function_call"] = 0
if "_fast_function_call" not in function_call_counts[key]:
function_call_counts[key]["_fast_function_call"] = 0
function_call_counts[key]["_slow_function_call"] += (
value.get("_slow_function_call", 0)
)
function_call_counts[key]["_fast_function_call"] += (
value.get("_fast_function_call", 0)
)

for key, value in per_module_results.items():
if key in function_call_counts:
per_module_results[key]["_slow_function_call"] = (
function_call_counts[key].get("_slow_function_call", 0)
)
per_module_results[key]["_fast_function_call"] = (
function_call_counts[key].get("_fast_function_call", 0)
)
else:
per_module_results[key]["_slow_function_call"] = 0
per_module_results[key]["_fast_function_call"] = 0
return per_module_results


Expand Down

0 comments on commit a38ef96

Please sign in to comment.