diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index e5cd4436a3a..09ceba366b6 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -31,7 +31,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \ -n 5 \ --tb=no \ -m "not slow" \ - --max-worker-restart=3 \ + --max-worker-restart=0 \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \ --dist worksteal \ --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1 diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py index 52a6fa89bef..05b42ecc610 100644 --- a/python/cudf/cudf/pandas/scripts/conftest-patch.py +++ b/python/cudf/cudf/pandas/scripts/conftest-patch.py @@ -4,7 +4,6 @@ import contextlib import json -import multiprocessing import os import sys import traceback @@ -41,8 +40,7 @@ def patch_testing_functions(): # Dictionary to store function call counts -manager = multiprocessing.Manager() -function_call_counts = defaultdict(int) # type: ignore +function_call_counts = {} # type: ignore # The specific function to track FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"} @@ -55,14 +53,8 @@ def find_pytest_file(frame): if "pandas-testing/pandas-tests/tests" in file and file.rsplit("/", 1)[ -1 ].startswith("test_"): - return file + return str(file).rsplit("pandas-testing/", 1)[-1] return None - # new_f = frame - # while new_f: - # if "pandas-testing/pandas-tests/tests" in new_f.f_globals.get("__file__", ""): - # return os.path.abspath(new_f.f_globals.get("__file__", "")) - # new_f = new_f.f_back - # return None def trace_calls(frame, event, arg): @@ -90,43 +82,6 @@ def pytest_sessionfinish(session, exitstatus): sys.setprofile(None) -# @pytest.hookimpl(tryfirst=True) -# def pytest_runtest_setup(item): -# # Check if this is the first test in the file -# if item.nodeid.split("::")[0] != getattr( -# pytest_runtest_setup, "current_file", None -# ): -# # If it's a new file, reset the function call counts -# global function_call_counts -# function_call_counts = defaultdict(int) -# pytest_runtest_setup.current_file = item.nodeid.split("::")[0] - - -# @pytest.hookimpl(trylast=True) -# def pytest_runtest_teardown(item, nextitem): -# # Check if this is the last test in the file -# if ( -# nextitem is None -# or nextitem.nodeid.split("::")[0] != item.nodeid.split("::")[0] -# ): -# # Write the function call counts to a file -# worker_id = os.getenv("PYTEST_XDIST_WORKER", "master") -# output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics.json' -# # if os.path.exists(output_file): -# # output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics_1.json' -# with open(output_file, "w") as f: -# json.dump(dict(function_call_counts), f, indent=4) -# print(f"Function call counts have been written to {output_file}") - - -# @pytest.hookimpl(tryfirst=True) -# def pytest_configure(config): -# if hasattr(config, "workerinput"): -# # Running in xdist worker -# global function_call_counts -# function_call_counts = defaultdict(int) - - @pytest.hookimpl(trylast=True) def pytest_unconfigure(config): if hasattr(config, "workerinput"): @@ -135,7 +90,7 @@ def pytest_unconfigure(config): output_file = f"function_call_counts_worker_{worker_id}.json" with open(output_file, "w") as f: json.dump(dict(function_call_counts), f, indent=4) - # print(f"Function call counts have been written to {output_file}") + print(f"Function call counts have been written to {output_file}") sys.path.append(os.path.dirname(__file__)) diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh index be83086a7dd..517b990b319 100755 --- a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh +++ b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh @@ -24,7 +24,8 @@ PANDAS_VERSION=$(python -c "import pandas; print(pandas.__version__)") # tests/io/test_clipboard.py::TestClipboard crashes pytest workers (possibly due to fixture patching clipboard functionality) PYTEST_IGNORES="--ignore=tests/io/parser/common/test_read_errors.py \ ---ignore=tests/io/test_clipboard.py" +--ignore=tests/io/test_clipboard.py \ +--ignore=tests/groupby/test_raises.py" mkdir -p pandas-testing cd pandas-testing @@ -135,7 +136,8 @@ and not test_large_string_pyarrow \ and not test_interchange_from_corrected_buffer_dtypes \ and not test_eof_states \ and not test_array_tz \ -and not test_groupby_raises_category" +and not test_groupby_raises_category \ +and not test_groupby_raises_datetime" # TODO: Remove "not db" once a postgres & mysql container is set up on the CI PANDAS_CI="1" timeout 900m python -m pytest -p cudf.pandas \ diff --git a/python/cudf/cudf/pandas/scripts/summarize-test-results.py b/python/cudf/cudf/pandas/scripts/summarize-test-results.py index 347118b290d..fd76c12f78e 100644 --- a/python/cudf/cudf/pandas/scripts/summarize-test-results.py +++ b/python/cudf/cudf/pandas/scripts/summarize-test-results.py @@ -71,35 +71,31 @@ def get_per_module_results(log_file_name): function_call_counts.update(function_call_count) else: for key, value in function_call_count.items(): - function_call_counts[key]["_slow_function_call"] += value.get( - "_slow_function_call", 0 - ) - function_call_counts[key]["_fast_function_call"] += value.get( - "_fast_function_call", 0 - ) - # per_module_results[key]["_slow_function_call"] = ( - # per_module_results[key].get("_slow_function_call", 0) - # + function_call_counts.get("_slow_function_call", 0) - # ) - # per_module_results[key]["_fast_function_call"] = ( - # per_module_results[key].get("_fast_function_call", 0) - # + function_call_counts.get("_fast_function_call", 0) - # ) + if key not in function_call_counts: + function_call_counts[key] = value + else: + if "_slow_function_call" not in function_call_counts[key]: + function_call_counts[key]["_slow_function_call"] = 0 + if "_fast_function_call" not in function_call_counts[key]: + function_call_counts[key]["_fast_function_call"] = 0 + function_call_counts[key]["_slow_function_call"] += ( + value.get("_slow_function_call", 0) + ) + function_call_counts[key]["_fast_function_call"] += ( + value.get("_fast_function_call", 0) + ) + for key, value in per_module_results.items(): - # processed_name = key.replace("/", "__") + "_*_metrics.json" - # # Assuming the directory is the same as the module name's directory - # directory = os.path.dirname(log_file_name) - # pattern = os.path.join(directory, processed_name) - # matching_files = glob.glob(pattern) - # for file in matching_files: - # with open(file) as f: - # function_call_counts = json.load(f) - per_module_results[key]["_slow_function_call"] = function_call_counts[ - key - ].get("_slow_function_call", 0) - per_module_results[key]["_fast_function_call"] = function_call_counts[ - key - ].get("_fast_function_call", 0) + if key in function_call_counts: + per_module_results[key]["_slow_function_call"] = ( + function_call_counts[key].get("_slow_function_call", 0) + ) + per_module_results[key]["_fast_function_call"] = ( + function_call_counts[key].get("_fast_function_call", 0) + ) + else: + per_module_results[key]["_slow_function_call"] = 0 + per_module_results[key]["_fast_function_call"] = 0 return per_module_results