Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Sep 18, 2024
1 parent 0404129 commit 13890fd
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 79 deletions.
2 changes: 1 addition & 1 deletion ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
-n 5 \
--tb=no \
-m "not slow" \
--max-worker-restart=3 \
--max-worker-restart=0 \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
--dist worksteal \
--report-log=${PANDAS_TESTS_BRANCH}.json 2>&1
Expand Down
51 changes: 3 additions & 48 deletions python/cudf/cudf/pandas/scripts/conftest-patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import contextlib
import json
import multiprocessing
import os
import sys
import traceback
Expand Down Expand Up @@ -41,8 +40,7 @@ def patch_testing_functions():


# Dictionary to store function call counts
manager = multiprocessing.Manager()
function_call_counts = defaultdict(int) # type: ignore
function_call_counts = {} # type: ignore

# The specific function to track
FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"}
Expand All @@ -55,14 +53,8 @@ def find_pytest_file(frame):
if "pandas-testing/pandas-tests/tests" in file and file.rsplit("/", 1)[
-1
].startswith("test_"):
return file
return str(file).rsplit("pandas-testing/", 1)[-1]
return None
# new_f = frame
# while new_f:
# if "pandas-testing/pandas-tests/tests" in new_f.f_globals.get("__file__", ""):
# return os.path.abspath(new_f.f_globals.get("__file__", ""))
# new_f = new_f.f_back
# return None


def trace_calls(frame, event, arg):
Expand Down Expand Up @@ -90,43 +82,6 @@ def pytest_sessionfinish(session, exitstatus):
sys.setprofile(None)


# @pytest.hookimpl(tryfirst=True)
# def pytest_runtest_setup(item):
# # Check if this is the first test in the file
# if item.nodeid.split("::")[0] != getattr(
# pytest_runtest_setup, "current_file", None
# ):
# # If it's a new file, reset the function call counts
# global function_call_counts
# function_call_counts = defaultdict(int)
# pytest_runtest_setup.current_file = item.nodeid.split("::")[0]


# @pytest.hookimpl(trylast=True)
# def pytest_runtest_teardown(item, nextitem):
# # Check if this is the last test in the file
# if (
# nextitem is None
# or nextitem.nodeid.split("::")[0] != item.nodeid.split("::")[0]
# ):
# # Write the function call counts to a file
# worker_id = os.getenv("PYTEST_XDIST_WORKER", "master")
# output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics.json'
# # if os.path.exists(output_file):
# # output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics_1.json'
# with open(output_file, "w") as f:
# json.dump(dict(function_call_counts), f, indent=4)
# print(f"Function call counts have been written to {output_file}")


# @pytest.hookimpl(tryfirst=True)
# def pytest_configure(config):
# if hasattr(config, "workerinput"):
# # Running in xdist worker
# global function_call_counts
# function_call_counts = defaultdict(int)


@pytest.hookimpl(trylast=True)
def pytest_unconfigure(config):
if hasattr(config, "workerinput"):
Expand All @@ -135,7 +90,7 @@ def pytest_unconfigure(config):
output_file = f"function_call_counts_worker_{worker_id}.json"
with open(output_file, "w") as f:
json.dump(dict(function_call_counts), f, indent=4)
# print(f"Function call counts have been written to {output_file}")
print(f"Function call counts have been written to {output_file}")


sys.path.append(os.path.dirname(__file__))
6 changes: 4 additions & 2 deletions python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ PANDAS_VERSION=$(python -c "import pandas; print(pandas.__version__)")

# tests/io/test_clipboard.py::TestClipboard crashes pytest workers (possibly due to fixture patching clipboard functionality)
PYTEST_IGNORES="--ignore=tests/io/parser/common/test_read_errors.py \
--ignore=tests/io/test_clipboard.py"
--ignore=tests/io/test_clipboard.py \
--ignore=tests/groupby/test_raises.py"

mkdir -p pandas-testing
cd pandas-testing
Expand Down Expand Up @@ -135,7 +136,8 @@ and not test_large_string_pyarrow \
and not test_interchange_from_corrected_buffer_dtypes \
and not test_eof_states \
and not test_array_tz \
and not test_groupby_raises_category"
and not test_groupby_raises_category \
and not test_groupby_raises_datetime"

# TODO: Remove "not db" once a postgres & mysql container is set up on the CI
PANDAS_CI="1" timeout 900m python -m pytest -p cudf.pandas \
Expand Down
52 changes: 24 additions & 28 deletions python/cudf/cudf/pandas/scripts/summarize-test-results.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,35 +71,31 @@ def get_per_module_results(log_file_name):
function_call_counts.update(function_call_count)
else:
for key, value in function_call_count.items():
function_call_counts[key]["_slow_function_call"] += value.get(
"_slow_function_call", 0
)
function_call_counts[key]["_fast_function_call"] += value.get(
"_fast_function_call", 0
)
# per_module_results[key]["_slow_function_call"] = (
# per_module_results[key].get("_slow_function_call", 0)
# + function_call_counts.get("_slow_function_call", 0)
# )
# per_module_results[key]["_fast_function_call"] = (
# per_module_results[key].get("_fast_function_call", 0)
# + function_call_counts.get("_fast_function_call", 0)
# )
if key not in function_call_counts:
function_call_counts[key] = value
else:
if "_slow_function_call" not in function_call_counts[key]:
function_call_counts[key]["_slow_function_call"] = 0
if "_fast_function_call" not in function_call_counts[key]:
function_call_counts[key]["_fast_function_call"] = 0
function_call_counts[key]["_slow_function_call"] += (
value.get("_slow_function_call", 0)
)
function_call_counts[key]["_fast_function_call"] += (
value.get("_fast_function_call", 0)
)

for key, value in per_module_results.items():
# processed_name = key.replace("/", "__") + "_*_metrics.json"
# # Assuming the directory is the same as the module name's directory
# directory = os.path.dirname(log_file_name)
# pattern = os.path.join(directory, processed_name)
# matching_files = glob.glob(pattern)
# for file in matching_files:
# with open(file) as f:
# function_call_counts = json.load(f)
per_module_results[key]["_slow_function_call"] = function_call_counts[
key
].get("_slow_function_call", 0)
per_module_results[key]["_fast_function_call"] = function_call_counts[
key
].get("_fast_function_call", 0)
if key in function_call_counts:
per_module_results[key]["_slow_function_call"] = (
function_call_counts[key].get("_slow_function_call", 0)
)
per_module_results[key]["_fast_function_call"] = (
function_call_counts[key].get("_fast_function_call", 0)
)
else:
per_module_results[key]["_slow_function_call"] = 0
per_module_results[key]["_fast_function_call"] = 0
return per_module_results


Expand Down

0 comments on commit 13890fd

Please sign in to comment.