test

rapidsai · Sep 18, 2024 · 13890fd · 13890fd
1 parent 0404129
commit 13890fd
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 79 deletions.
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -31,7 +31,7 @@ bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   -n 5 \
   --tb=no \
   -m "not slow" \
-  --max-worker-restart=3 \
+  --max-worker-restart=0 \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
   --dist worksteal \
   --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1

diff --git a/python/cudf/cudf/pandas/scripts/conftest-patch.py b/python/cudf/cudf/pandas/scripts/conftest-patch.py
@@ -4,7 +4,6 @@
 
 import contextlib
 import json
-import multiprocessing
 import os
 import sys
 import traceback
@@ -41,8 +40,7 @@ def patch_testing_functions():
 
 
 # Dictionary to store function call counts
-manager = multiprocessing.Manager()
-function_call_counts = defaultdict(int)  # type: ignore
+function_call_counts = {}  # type: ignore
 
 # The specific function to track
 FUNCTION_NAME = {"_slow_function_call", "_fast_function_call"}
@@ -55,14 +53,8 @@ def find_pytest_file(frame):
         if "pandas-testing/pandas-tests/tests" in file and file.rsplit("/", 1)[
             -1
         ].startswith("test_"):
-            return file
+            return str(file).rsplit("pandas-testing/", 1)[-1]
     return None
-    # new_f = frame
-    # while new_f:
-    #     if "pandas-testing/pandas-tests/tests" in new_f.f_globals.get("__file__", ""):
-    #         return os.path.abspath(new_f.f_globals.get("__file__", ""))
-    #     new_f = new_f.f_back
-    # return None
 
 
 def trace_calls(frame, event, arg):
@@ -90,43 +82,6 @@ def pytest_sessionfinish(session, exitstatus):
     sys.setprofile(None)
 
 
-# @pytest.hookimpl(tryfirst=True)
-# def pytest_runtest_setup(item):
-#     # Check if this is the first test in the file
-#     if item.nodeid.split("::")[0] != getattr(
-#         pytest_runtest_setup, "current_file", None
-#     ):
-#         # If it's a new file, reset the function call counts
-#         global function_call_counts
-#         function_call_counts = defaultdict(int)
-#         pytest_runtest_setup.current_file = item.nodeid.split("::")[0]
-
-
-# @pytest.hookimpl(trylast=True)
-# def pytest_runtest_teardown(item, nextitem):
-#     # Check if this is the last test in the file
-#     if (
-#         nextitem is None
-#         or nextitem.nodeid.split("::")[0] != item.nodeid.split("::")[0]
-#     ):
-#         # Write the function call counts to a file
-#         worker_id = os.getenv("PYTEST_XDIST_WORKER", "master")
-#         output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics.json'
-#         # if os.path.exists(output_file):
-#         #     output_file = f'{item.nodeid.split("::")[0].replace("/", "__")}_{worker_id}_metrics_1.json'
-#         with open(output_file, "w") as f:
-#             json.dump(dict(function_call_counts), f, indent=4)
-#         print(f"Function call counts have been written to {output_file}")
-
-
-# @pytest.hookimpl(tryfirst=True)
-# def pytest_configure(config):
-#     if hasattr(config, "workerinput"):
-#         # Running in xdist worker
-#         global function_call_counts
-#         function_call_counts = defaultdict(int)
-
-
 @pytest.hookimpl(trylast=True)
 def pytest_unconfigure(config):
     if hasattr(config, "workerinput"):
@@ -135,7 +90,7 @@ def pytest_unconfigure(config):
         output_file = f"function_call_counts_worker_{worker_id}.json"
         with open(output_file, "w") as f:
             json.dump(dict(function_call_counts), f, indent=4)
-        # print(f"Function call counts have been written to {output_file}")
+        print(f"Function call counts have been written to {output_file}")
 
 
 sys.path.append(os.path.dirname(__file__))
diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
@@ -24,7 +24,8 @@ PANDAS_VERSION=$(python -c "import pandas; print(pandas.__version__)")
 
 # tests/io/test_clipboard.py::TestClipboard crashes pytest workers (possibly due to fixture patching clipboard functionality)
 PYTEST_IGNORES="--ignore=tests/io/parser/common/test_read_errors.py \
---ignore=tests/io/test_clipboard.py"
+--ignore=tests/io/test_clipboard.py \
+--ignore=tests/groupby/test_raises.py"
 
 mkdir -p pandas-testing
 cd pandas-testing
@@ -135,7 +136,8 @@ and not test_large_string_pyarrow \
 and not test_interchange_from_corrected_buffer_dtypes \
 and not test_eof_states \
 and not test_array_tz \
-and not test_groupby_raises_category"
+and not test_groupby_raises_category \
+and not test_groupby_raises_datetime"
 
 # TODO: Remove "not db" once a postgres & mysql container is set up on the CI
 PANDAS_CI="1" timeout 900m python -m pytest -p cudf.pandas \

diff --git a/python/cudf/cudf/pandas/scripts/summarize-test-results.py b/python/cudf/cudf/pandas/scripts/summarize-test-results.py
@@ -71,35 +71,31 @@ def get_per_module_results(log_file_name):
             function_call_counts.update(function_call_count)
         else:
             for key, value in function_call_count.items():
-                function_call_counts[key]["_slow_function_call"] += value.get(
-                    "_slow_function_call", 0
-                )
-                function_call_counts[key]["_fast_function_call"] += value.get(
-                    "_fast_function_call", 0
-                )
-            # per_module_results[key]["_slow_function_call"] = (
-            #     per_module_results[key].get("_slow_function_call", 0)
-            #     + function_call_counts.get("_slow_function_call", 0)
-            # )
-            # per_module_results[key]["_fast_function_call"] = (
-            #     per_module_results[key].get("_fast_function_call", 0)
-            #     + function_call_counts.get("_fast_function_call", 0)
-            # )
+                if key not in function_call_counts:
+                    function_call_counts[key] = value
+                else:
+                    if "_slow_function_call" not in function_call_counts[key]:
+                        function_call_counts[key]["_slow_function_call"] = 0
+                    if "_fast_function_call" not in function_call_counts[key]:
+                        function_call_counts[key]["_fast_function_call"] = 0
+                    function_call_counts[key]["_slow_function_call"] += (
+                        value.get("_slow_function_call", 0)
+                    )
+                    function_call_counts[key]["_fast_function_call"] += (
+                        value.get("_fast_function_call", 0)
+                    )
+
     for key, value in per_module_results.items():
-        # processed_name = key.replace("/", "__") + "_*_metrics.json"
-        # # Assuming the directory is the same as the module name's directory
-        # directory = os.path.dirname(log_file_name)
-        # pattern = os.path.join(directory, processed_name)
-        # matching_files = glob.glob(pattern)
-        # for file in matching_files:
-        #     with open(file) as f:
-        #         function_call_counts = json.load(f)
-        per_module_results[key]["_slow_function_call"] = function_call_counts[
-            key
-        ].get("_slow_function_call", 0)
-        per_module_results[key]["_fast_function_call"] = function_call_counts[
-            key
-        ].get("_fast_function_call", 0)
+        if key in function_call_counts:
+            per_module_results[key]["_slow_function_call"] = (
+                function_call_counts[key].get("_slow_function_call", 0)
+            )
+            per_module_results[key]["_fast_function_call"] = (
+                function_call_counts[key].get("_fast_function_call", 0)
+            )
+        else:
+            per_module_results[key]["_slow_function_call"] = 0
+            per_module_results[key]["_fast_function_call"] = 0
     return per_module_results