From 4fddb30f61f027747b12919888d3e3a66e08b623 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Fri, 23 Aug 2024 07:42:53 -0700
Subject: [PATCH 01/24] Move tests and ci files to cudf

---
 .../ci/check_style.sh                         |  18 +
 .../ci/ci_run_library_tests.sh                |  65 ++++
 .../ci/extract_lib.sh                         |  28 ++
 .../ci/release/update-version.sh              |  41 ++
 .../third_party_integration_tests/ci/test.sh  |  55 +++
 .../third_party_integration_tests/conftest.py | 173 +++++++++
 .../third_party_integration_tests/pytest.ini  |   7 +
 .../run_library_tests.sh                      |  11 +
 .../test_cugraph.py                           |  94 +++++
 .../test_cuml.py                              | 152 ++++++++
 .../test_dask.py                              |  10 +
 .../test_featureengine.py                     |  47 +++
 .../test_holoviews.py                         |  79 ++++
 .../test_hvplot.py                            |  72 ++++
 .../test_ibis.py                              | 169 ++++++++
 .../test_matplotlib.py                        |  70 ++++
 .../test_numpy.py                             |  59 +++
 .../test_plotly.py                            |  67 ++++
 .../test_pytorch.py                           | 126 ++++++
 .../test_scipy.py                             |  65 ++++
 .../test_seaborn.py                           |  60 +++
 .../test_sklearn.py                           |  82 ++++
 .../test_stumpy.py                            |  94 +++++
 .../test_stumpy_distributed.py                |  48 +++
 .../test_tensorflow.py                        | 367 ++++++++++++++++++
 .../test_xgboost.py                           | 135 +++++++
 26 files changed, 2194 insertions(+)
 create mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh
 create mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
 create mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
 create mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh
 create mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/conftest.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/pytest.ini
 create mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/run_library_tests.sh
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cugraph.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cuml.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_dask.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_featureengine.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_holoviews.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_hvplot.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_ibis.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_matplotlib.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_numpy.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_plotly.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_pytorch.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_scipy.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_seaborn.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_sklearn.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy_distributed.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_tensorflow.py
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/test_xgboost.py

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh
new file mode 100755
index 00000000000..b81b36ddb45
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+rapids-logger "Create checks conda environment"
+. /opt/conda/etc/profile.d/conda.sh
+
+rapids-dependency-file-generator \
+  --output conda \
+  --file-key checks \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+rapids-mamba-retry env create --yes -f env.yaml -n checks
+conda activate checks
+
+# Run pre-commit checks
+pre-commit run --hook-stage manual --all-files --show-diff-on-failure
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
new file mode 100755
index 00000000000..d1627d6436b
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+cleanup() {
+    rm tests/results-*.pickle
+}
+
+trap cleanup EXIT
+
+runtest_gold() {
+    local lib=$1
+    local test_keys=${@:2}
+
+    pytest \
+    -v \
+    --continue-on-collection-errors \
+    --cache-clear \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-${lib}-gold.xml" \
+    --numprocesses=${NUM_PROCESSES} \
+    --dist=worksteal \
+    ${TEST_DIR}/test_${lib}*.py \
+    ${test_keys}
+}
+
+runtest_cudf_pandas() {
+    local lib=$1
+    local test_keys=${@:2}
+
+    pytest \
+    -p cudf.pandas \
+    -v \
+    --continue-on-collection-errors \
+    --cache-clear \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-${lib}-cudf-pandas.xml" \
+    --numprocesses=${NUM_PROCESSES} \
+    --dist=worksteal \
+    ${TEST_DIR}/test_${lib}*.py \
+    ${test_keys}
+}
+
+main() {
+    local lib=$1
+    local test_keys=${@:2}
+
+    # generation phase
+    runtest_gold ${lib} ${test_keys}
+    runtest_cudf_pandas ${lib} ${test_keys}
+
+    # assertion phase
+    pytest \
+    --compare \
+    -p cudf.pandas \
+    -v \
+    --continue-on-collection-errors \
+    --cache-clear \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-${lib}-assertion.xml" \
+    --numprocesses=${NUM_PROCESSES} \
+    --dist=worksteal \
+    ${TEST_DIR}/test_${lib}*.py \
+    ${test_keys}
+}
+
+main $@
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
new file mode 100755
index 00000000000..4511363146e
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+write_output() {
+  local key="$1"
+  local value="$2"
+  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
+}
+
+extract_lib_from_dependencies_yaml() {
+    local file=$1
+    # Parse all keys in dependencies.yaml under the "files" section,
+    # extract all the keys that starts with "test_", and extract the
+    # rest
+    local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries( select(.key | contains("test_")) ) | keys | map(sub("^test_"; ""))')"
+    echo $extracted_libs
+    write_output "LIBS" $extracted_libs
+}
+
+
+main() {
+    local dependencies_yaml="$1"
+    extract_lib_from_dependencies_yaml "$dependencies_yaml"
+}
+
+main "$@"
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh
new file mode 100755
index 00000000000..5b6f8f5ce1c
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+###########################################
+# cudf.pandas integration Version Updater #
+###########################################
+
+## Usage
+# bash update-version.sh <new_version>
+
+
+# Format is YY.MM.PP - no leading 'v' or trailing 'a'
+NEXT_FULL_TAG=$1
+
+#Get <major>.<minor> for next version
+NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
+NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
+NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
+
+# Need to distutils-normalize the versions for some use cases
+NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
+
+# Inplace sed replace; workaround for Linux and Mac
+function sed_runner() {
+    sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak
+}
+
+# CI files
+for FILE in .github/workflows/*.yaml; do
+  sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
+done
+
+DEPENDENCIES=(
+  cugraph
+  cudf
+  cuml
+)
+for DEP in "${DEPENDENCIES[@]}"; do
+  for FILE in dependencies.yaml; do
+    sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" "${FILE}"
+  done
+done
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
new file mode 100755
index 00000000000..a012513b93a
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+# Common setup steps shared by Python test jobs
+
+LIB=$1
+
+set -euo pipefail
+
+. /opt/conda/etc/profile.d/conda.sh
+
+rapids-logger "Generate Python testing dependencies"
+rapids-dependency-file-generator \
+  --output conda \
+  --file-key test_${LIB} \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+rapids-mamba-retry env create --yes -f env.yaml -n test
+
+# Temporarily allow unbound variables for conda activation.
+set +u
+conda activate test
+set -u
+
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+mkdir -p "${RAPIDS_TESTS_DIR}"
+
+repo_root=$(git rev-parse --show-toplevel)
+TEST_DIR=${repo_root}/tests
+
+rapids-print-env
+
+rapids-logger "Check GPU usage"
+nvidia-smi
+
+EXITCODE=0
+trap "EXITCODE=1" ERR
+set +e
+
+rapids-logger "pytest ${LIB}"
+
+NUM_PROCESSES=8
+serial_libraries=(
+    "tensorflow"
+)
+for serial_library in "${serial_libraries[@]}"; do
+    if [ "${LIB}" = "${serial_library}" ]; then
+        NUM_PROCESSES=1
+    fi
+done
+
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/ci_run_library_tests.sh ${LIB}
+
+rapids-logger "Test script exiting with value: ${EXITCODE}"
+exit ${EXITCODE}
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/conftest.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/conftest.py
new file mode 100644
index 00000000000..33b6ffdbd5c
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/conftest.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import os
+import pickle
+from typing import TYPE_CHECKING, BinaryIO
+
+import _pytest
+import _pytest.config
+import _pytest.nodes
+import pytest
+
+if TYPE_CHECKING:
+    import _pytest.python
+
+from _pytest.stash import StashKey
+
+from cudf.pandas.module_accelerator import disable_module_accelerator
+
+file_handle_key = StashKey[BinaryIO]()
+basename_key = StashKey[str]()
+test_folder_key = StashKey[str]()
+results = StashKey[tuple[dict, dict]]()
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--compare",
+        action="store_true",
+        default=False,
+        help="Run comparison step?",
+    )
+
+
+def read_results(f):
+    while True:
+        try:
+            yield pickle.load(f)
+        except EOFError:
+            return
+
+
+def pytest_collection_modifyitems(
+    session, config: _pytest.config.Config, items: list[_pytest.nodes.Item]
+):
+    if config.getoption("--compare"):
+        current_pass = "compare"
+    elif "cudf.pandas" in config.option.plugins:
+        current_pass = "cudf_pandas"
+    else:
+        current_pass = "gold"
+
+    def swap_xfail(item: _pytest.nodes.Item, name: str):
+        """Replace custom `xfail_**` mark with a `xfail` mark having the same kwargs."""
+
+        old_mark = item.keywords[name]
+        new_mark = pytest.mark.xfail(**old_mark.kwargs)
+
+        # Replace all "xfail_**" mark in the node chain with the "xfail" mark
+        # if not found, the node chain is not modified.
+        for node, mark in item.iter_markers_with_node(name):
+            idx = node.own_markers.index(mark)
+            node.own_markers[idx] = new_mark
+
+    for item in items:
+        if current_pass == "gold" and "xfail_gold" in item.keywords:
+            swap_xfail(item, "xfail_gold")
+        elif (
+            current_pass == "cudf_pandas"
+            and "xfail_cudf_pandas" in item.keywords
+        ):
+            swap_xfail(item, "xfail_cudf_pandas")
+        elif current_pass == "compare" and "xfail_compare" in item.keywords:
+            swap_xfail(item, "xfail_compare")
+
+
+def pytest_configure(config: _pytest.config.Config):
+    gold_basename = "results-gold"
+    cudf_basename = "results-cudf-pandas"
+    test_folder = os.path.join(os.path.dirname(__file__))
+
+    if config.getoption("--compare"):
+        # Everyone reads everything
+        gold_path = os.path.join(test_folder, f"{gold_basename}.pickle")
+        cudf_path = os.path.join(test_folder, f"{cudf_basename}.pickle")
+        with disable_module_accelerator():
+            with open(gold_path, "rb") as f:
+                gold_results = dict(read_results(f))
+        with open(cudf_path, "rb") as f:
+            cudf_results = dict(read_results(f))
+        config.stash[results] = (gold_results, cudf_results)
+    else:
+        if "cudf.pandas" in config.option.plugins:
+            basename = cudf_basename
+        else:
+            basename = gold_basename
+
+        if hasattr(config, "workerinput"):
+            # If we're on an xdist worker, open a worker-unique pickle file.
+            worker = config.workerinput["workerid"]
+            filename = f"{basename}-{worker}.pickle"
+        else:
+            filename = f"{basename}.pickle"
+
+        pickle_path = os.path.join(test_folder, filename)
+        config.stash[file_handle_key] = open(pickle_path, "wb")
+        config.stash[test_folder_key] = test_folder
+        config.stash[basename_key] = basename
+
+
+def pytest_pyfunc_call(pyfuncitem: _pytest.python.Function):
+    if pyfuncitem.config.getoption("--compare"):
+        gold_results, cudf_results = pyfuncitem.config.stash[results]
+        key = pyfuncitem.nodeid
+        try:
+            gold = gold_results[key]
+        except KeyError:
+            assert False, "pickled gold result is not available"
+        try:
+            cudf = cudf_results[key]
+        except KeyError:
+            assert False, "pickled cudf result is not available"
+        if gold is None and cudf is None:
+            raise ValueError(f"Integration test {key} did not return a value")
+        asserter = pyfuncitem.get_closest_marker("assert_eq")
+        if asserter is None:
+            assert gold == cudf, "Test failed"
+        else:
+            asserter.kwargs["fn"](gold, cudf)
+    else:
+        # Replace default call of test function with one that captures the
+        # result
+        testfunction = pyfuncitem.obj
+        funcargs = pyfuncitem.funcargs
+        testargs = {
+            arg: funcargs[arg] for arg in pyfuncitem._fixtureinfo.argnames
+        }
+        result = testfunction(**testargs)
+        # Tuple-based key-value pairs, key is the node-id
+        try:
+            pickle.dump(
+                (pyfuncitem.nodeid, result),
+                pyfuncitem.config.stash[file_handle_key],
+            )
+        except pickle.PicklingError:
+            pass
+    return True
+
+
+def pytest_unconfigure(config):
+    if config.getoption("--compare"):
+        return
+    if file_handle_key not in config.stash:
+        # We didn't open a pickle file
+        return
+    if not hasattr(config, "workerinput"):
+        # If we're the controlling process
+        if (
+            hasattr(config.option, "numprocesses")
+            and config.option.numprocesses is not None
+        ):
+            # Concat the worker partial pickle results and remove them
+            for i in range(config.option.numprocesses):
+                worker_result = os.path.join(
+                    config.stash[test_folder_key],
+                    f"{config.stash[basename_key]}-gw{i}.pickle",
+                )
+                with open(worker_result, "rb") as f:
+                    config.stash[file_handle_key].write(f.read())
+                os.remove(worker_result)
+    # Close our file
+    del config.stash[file_handle_key]
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/pytest.ini b/python/cudf/cudf_pandas_tests/third_party_integration_tests/pytest.ini
new file mode 100644
index 00000000000..817d98e6ba2
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+xfail_strict=true
+markers=
+    assert_eq: custom binary asserter for a test
+    xfail_gold: this test is expected to fail in the gold pass
+    xfail_cudf_pandas: this test is expected to fail in the cudf_pandas pass
+    xfail_compare: this test is expected to fail in the comparison pass
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/run_library_tests.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/run_library_tests.sh
new file mode 100755
index 00000000000..dafd2e77761
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/run_library_tests.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"$(dirname "$0")"}
+mkdir -p "${RAPIDS_TESTS_DIR}/test-results"
+
+repo_root=$(git rev-parse --show-toplevel)
+
+TEST_DIR="${repo_root}/tests/" RAPIDS_TESTS_DIR="${RAPIDS_TESTS_DIR}" ${repo_root}/ci/ci_run_library_tests.sh "$@"
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cugraph.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cugraph.py
new file mode 100644
index 00000000000..7acc8672063
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cugraph.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import cugraph
+import cupy as cp
+import networkx as nx
+import numpy as np
+import pandas as pd
+import pytest
+
+cugraph_algos = [
+    "betweenness_centrality",
+    "degree_centrality",
+    "katz_centrality",
+    "sorensen_coefficient",
+    "jaccard_coefficient",
+]
+
+nx_algos = [
+    "betweenness_centrality",
+    "degree_centrality",
+    "katz_centrality",
+]
+
+
+def assert_cugraph_equal(expect, got):
+    if isinstance(expect, cp.ndarray):
+        expect = expect.get()
+    if isinstance(got, cp.ndarray):
+        got = got.get()
+    elif isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
+        assert np.array_equal(expect, got)
+    else:
+        assert expect == got
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_cugraph_equal)
+
+
+@pytest.fixture(scope="session")
+def df():
+    return pd.DataFrame({"source": [0, 1, 2], "destination": [1, 2, 3]})
+
+
+@pytest.fixture(scope="session")
+def adjacency_matrix():
+    data = {
+        "A": [0, 1, 1, 0],
+        "B": [1, 0, 0, 1],
+        "C": [1, 0, 0, 1],
+        "D": [0, 1, 1, 0],
+    }
+    df = pd.DataFrame(data, index=["A", "B", "C", "D"])
+    return df
+
+
+@pytest.mark.parametrize("algo", cugraph_algos)
+def test_cugraph_from_pandas_edgelist(df, algo):
+    G = cugraph.Graph()
+    G.from_pandas_edgelist(df)
+    return getattr(cugraph, algo)(G).to_pandas().values
+
+
+@pytest.mark.parametrize("algo", cugraph_algos)
+def test_cugraph_from_pandas_adjacency(adjacency_matrix, algo):
+    G = cugraph.Graph()
+    G.from_pandas_adjacency(adjacency_matrix)
+    res = getattr(cugraph, algo)(G).to_pandas()
+    return res.sort_values(list(res.columns)).values
+
+
+@pytest.mark.parametrize("algo", cugraph_algos)
+def test_cugraph_from_numpy_array(df, algo):
+    G = cugraph.Graph()
+    G.from_numpy_array(df.values)
+    return getattr(cugraph, algo)(G).to_pandas().values
+
+
+@pytest.mark.parametrize("algo", nx_algos)
+def test_networkx_from_pandas_edgelist(df, algo):
+    G = nx.from_pandas_edgelist(
+        df, "source", "destination", ["source", "destination"]
+    )
+    return getattr(nx, algo)(G)
+
+
+@pytest.mark.parametrize("algo", nx_algos)
+def test_networkx_from_pandas_adjacency(adjacency_matrix, algo):
+    G = nx.from_pandas_adjacency(adjacency_matrix)
+    return getattr(nx, algo)(G)
+
+
+@pytest.mark.parametrize("algo", nx_algos)
+def test_networkx_from_numpy_array(adjacency_matrix, algo):
+    G = nx.from_numpy_array(adjacency_matrix.values)
+    return getattr(nx, algo)(G)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cuml.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cuml.py
new file mode 100644
index 00000000000..892d0886596
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cuml.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import cupy as cp
+import numpy as np
+import pandas as pd
+import pytest
+from cuml.cluster import KMeans
+from cuml.decomposition import PCA
+from cuml.ensemble import RandomForestClassifier
+from cuml.linear_model import LinearRegression, LogisticRegression
+from cuml.metrics import accuracy_score
+from cuml.model_selection import train_test_split
+from cuml.pipeline import Pipeline
+from cuml.preprocessing import StandardScaler
+
+
+def assert_cuml_equal(expect, got):
+    # Coerce GPU arrays to CPU
+    if isinstance(expect, cp.ndarray):
+        expect = expect.get()
+    if isinstance(got, cp.ndarray):
+        got = got.get()
+
+    # Handle equality
+    if isinstance(expect, KMeans) and isinstance(got, KMeans):
+        # same clusters
+        np.testing.assert_allclose(
+            expect.cluster_centers_, got.cluster_centers_
+        )
+    elif isinstance(expect, np.ndarray) and isinstance(got, np.ndarray):
+        np.testing.assert_allclose(expect, got)
+    elif isinstance(expect, tuple) and isinstance(got, tuple):
+        assert len(expect) == len(got)
+        for e, g in zip(expect, got):
+            assert_cuml_equal(e, g)
+    elif isinstance(expect, pd.DataFrame):
+        assert pd.testing.assert_frame_equal(expect, got)
+    elif isinstance(expect, pd.Series):
+        assert pd.testing.assert_series_equal(expect, got)
+    else:
+        assert expect == got
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_cuml_equal)
+
+
+@pytest.fixture
+def binary_classification_data():
+    data = {
+        "feature1": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0],
+        "feature2": [2.0, 4.0, 1.0, 3.0, 5.0, 7.0, 6.0, 8.0, 10.0, 9.0],
+        "target": [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+    }
+    df = pd.DataFrame(data)
+    return df
+
+
+def test_linear_regression():
+    lr = LinearRegression(fit_intercept=True, normalize=False, algorithm="eig")
+    X = pd.DataFrame()
+    X["col1"] = np.array([1, 1, 2, 2], dtype=np.float32)
+    X["col2"] = np.array([1, 2, 2, 3], dtype=np.float32)
+    y = pd.Series(np.array([6.0, 8.0, 9.0, 11.0], dtype=np.float32))
+    lr.fit(X, y)
+
+    X_new = pd.DataFrame()
+    X_new["col1"] = np.array([3, 2], dtype=np.float32)
+    X_new["col2"] = np.array([5, 5], dtype=np.float32)
+    preds = lr.predict(X_new)
+    return preds.values
+
+
+def test_logistic_regression(binary_classification_data):
+    X = binary_classification_data[["feature1", "feature2"]]
+    y = binary_classification_data["target"]
+
+    (X_train, X_test, y_train, y_test) = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    model = LogisticRegression()
+    model.fit(X_train, y_train)
+
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+
+    return accuracy
+
+
+def test_random_forest(binary_classification_data):
+    X = binary_classification_data[["feature1", "feature2"]]
+    y = binary_classification_data["target"]
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+    model = RandomForestClassifier(n_estimators=100)
+    model.fit(X_train, y_train)
+    preds = model.predict(X_test)
+    return preds.values
+
+
+def test_clustering():
+    rng = np.random.default_rng(42)
+    nsamps = 300
+    X = rng.random((nsamps, 2))
+    data = pd.DataFrame(X, columns=["x", "y"])
+
+    kmeans = KMeans(n_clusters=3, random_state=42)
+    kmeans.fit(data)
+    return kmeans
+
+
+def test_data_scaling():
+    data = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
+    scaler = StandardScaler()
+
+    scaled_data = scaler.fit_transform(data.values.reshape(-1, 1))
+    return scaled_data
+
+
+def test_pipeline(binary_classification_data):
+    X = binary_classification_data[["feature1", "feature2"]]
+    y = binary_classification_data["target"]
+
+    pipe = Pipeline(
+        [
+            ("scaler", StandardScaler()),
+            ("pca", PCA()),
+            ("random_forest", LogisticRegression()),
+        ]
+    )
+
+    pipe.fit(X, y)
+    results = pipe.predict(X)
+    return results.values
+
+
+@pytest.mark.parametrize(
+    "X, y",
+    [
+        (pd.DataFrame({"a": range(10), "b": range(10)}), pd.Series(range(10))),
+        (
+            pd.DataFrame({"a": range(10), "b": range(10)}).values,
+            pd.Series(range(10)).values,
+        ),  # cudf.pandas wrapped numpy arrays
+    ],
+)
+def test_train_test_split(X, y):
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    # Compare only the size of the data splits
+    return len(X_train), len(X_test), len(y_train), len(y_test)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_dask.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_dask.py
new file mode 100644
index 00000000000..c34778dfded
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_dask.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import pandas as pd
+
+import dask.dataframe as dd
+
+
+def test_sum():
+    data = {"x": range(1, 11)}
+    ddf = dd.from_pandas(pd.DataFrame(data), npartitions=2)
+    return ddf["x"].sum().compute()
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_featureengine.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_featureengine.py
new file mode 100644
index 00000000000..3e247291fad
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_featureengine.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+from feature_engine.imputation import DropMissingData
+from feature_engine.preprocessing import MatchVariables
+
+
+def test_drop_missing_data():
+    data = {
+        "x": [np.nan, 1, 1, 0, np.nan],
+        "y": ["a", np.nan, "b", np.nan, "a"],
+    }
+    df = pd.DataFrame(data)
+
+    dmd = DropMissingData()
+    dmd.fit(df)
+    dmd.transform(df)
+
+    return dmd
+
+
+def test_match_variables():
+    train = pd.DataFrame(
+        {
+            "Name": ["tom", "nick", "krish", "jack"],
+            "City": ["London", "Manchester", "Liverpool", "Bristol"],
+            "Age": [20, 21, 19, 18],
+            "Marks": [0.9, 0.8, 0.7, 0.6],
+        }
+    )
+
+    test = pd.DataFrame(
+        {
+            "Name": ["tom", "sam", "nick"],
+            "Age": [20, 22, 23],
+            "Marks": [0.9, 0.7, 0.6],
+            "Hobbies": ["tennis", "rugby", "football"],
+        }
+    )
+
+    match_columns = MatchVariables()
+
+    match_columns.fit(train)
+
+    df_transformed = match_columns.transform(test)
+
+    return df_transformed
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_holoviews.py
new file mode 100644
index 00000000000..bef02c86355
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_holoviews.py
@@ -0,0 +1,79 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import holoviews as hv
+import numpy as np
+import pandas as pd
+import pytest
+
+nsamps = 1000
+hv.extension("bokeh")  # load holoviews extension
+
+
+def assert_holoviews_equal(expect, got):
+    expect_data, expect_ndims, expect_kdims, expect_vdims, expect_shape = (
+        expect
+    )
+    got_data, got_ndims, got_kdims, got_vdims, got_shape = got
+
+    if isinstance(expect_data, dict):
+        np.testing.assert_allclose(expect_data["x"], got_data["x"])
+        np.testing.assert_allclose(
+            expect_data["Frequency"], got_data["Frequency"]
+        )
+    else:
+        pd._testing.assert_frame_equal(expect_data, got_data)
+    assert expect_ndims == got_ndims
+    assert expect_kdims == got_kdims
+    assert expect_vdims == got_vdims
+    assert expect_shape == got_shape
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_holoviews_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame(
+        {
+            "x": rng.random(nsamps),
+            "y": rng.random(nsamps),
+            "category": rng.integers(0, 10, nsamps),
+            "category2": rng.integers(0, 10, nsamps),
+        }
+    )
+
+
+def get_plot_info(plot):
+    return (
+        plot.data,
+        plot.ndims,
+        plot.kdims,
+        plot.vdims,
+        plot.shape,
+    )
+
+
+def test_holoviews_barplot(df):
+    return get_plot_info(hv.Bars(df, kdims="category", vdims="y"))
+
+
+def test_holoviews_scatterplot(df):
+    return get_plot_info(hv.Scatter(df, kdims="x", vdims="y"))
+
+
+def test_holoviews_curve(df):
+    return get_plot_info(hv.Curve(df, kdims="category", vdims="y"))
+
+
+def test_holoviews_heatmap(df):
+    return get_plot_info(
+        hv.HeatMap(df, kdims=["category", "category2"], vdims="y")
+    )
+
+
+def test_holoviews_histogram(df):
+    return get_plot_info(hv.Histogram(df.values))
+
+
+def test_holoviews_hexbin(df):
+    return get_plot_info(hv.HexTiles(df, kdims=["x", "y"], vdims="y"))
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_hvplot.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_hvplot.py
new file mode 100644
index 00000000000..0f0d2f8bcbd
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_hvplot.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import hvplot.pandas  # noqa: F401, needs to monkey patch pandas with this.
+import numpy as np
+import pandas as pd
+import pytest
+
+nsamps = 1000
+
+
+def assert_hvplot_equal(expect, got):
+    expect_data, expect_ndims, expect_kdims, expect_vdims, expect_shape = (
+        expect
+    )
+    got_data, got_ndims, got_kdims, got_vdims, got_shape = got
+
+    if isinstance(expect_data, dict):
+        np.testing.assert_allclose(expect_data["x"], got_data["x"])
+        np.testing.assert_allclose(
+            expect_data["Frequency"], got_data["Frequency"]
+        )
+    else:
+        pd._testing.assert_frame_equal(expect_data, got_data)
+    assert expect_ndims == got_ndims
+    assert expect_kdims == got_kdims
+    assert expect_vdims == got_vdims
+    assert expect_shape == got_shape
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_hvplot_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame(
+        {
+            "x": rng.random(nsamps),
+            "y": rng.random(nsamps),
+            "category": rng.integers(0, 10, nsamps),
+            "category2": rng.integers(0, 10, nsamps),
+        }
+    )
+
+
+def get_plot_info(plot):
+    return (
+        plot.data,
+        plot.ndims,
+        plot.kdims,
+        plot.vdims,
+        plot.shape,
+    )
+
+
+def test_hvplot_barplot(df):
+    return get_plot_info(df.hvplot.bar(x="category", y="y"))
+
+
+def test_hvplot_scatterplot(df):
+    return get_plot_info(df.hvplot.scatter(x="x", y="y"))
+
+
+def test_hvplot_lineplot(df):
+    return get_plot_info(df.hvplot.line(x="x", y="y"))
+
+
+def test_hvplot_heatmap(df):
+    return get_plot_info(df.hvplot.heatmap(x="x", y="y", C="y"))
+
+
+def test_hvplot_hexbin(df):
+    return get_plot_info(df.hvplot.hexbin(x="x", y="y", C="y"))
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_ibis.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_ibis.py
new file mode 100644
index 00000000000..2a8cf7c6ac2
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_ibis.py
@@ -0,0 +1,169 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import ibis
+import numpy as np
+import pandas as pd
+import pytest
+
+ibis.set_backend("pandas")
+ibis.options.interactive = False
+
+
+def ibis_assert_equal(expect, got, rtol: float = 1e-7, atol: float = 0.0):
+    pd._testing.assert_almost_equal(expect, got, rtol=rtol, atol=atol)
+
+
+pytestmark = pytest.mark.assert_eq(fn=ibis_assert_equal)
+
+
+COLUMN_REDUCTIONS = ["sum", "min", "max", "mean", "var", "std"]
+ELEMENTWISE_UFUNCS = [
+    "sin",
+    "cos",
+    "atan",
+    "exp",
+    "log",
+    "abs",
+]
+STRING_UNARY_FUNCS = [
+    "lower",
+    "upper",
+    "capitalize",
+    "reverse",
+]
+
+
+@pytest.fixture
+def ibis_table_num_str():
+    N = 1000
+    K = 5
+    rng = np.random.default_rng(42)
+
+    df = pd.DataFrame(
+        rng.integers(0, 100, (N, K)), columns=[f"col{x}" for x in np.arange(K)]
+    )
+    df["key"] = rng.choice(np.arange(10), N)
+    df["str_col"] = rng.choice(["Hello", "World", "It's", "Me", "Again"], N)
+    table = ibis.memtable(df, name="t")
+    return table
+
+
+@pytest.fixture
+def ibis_table_num():
+    N = 100
+    K = 2
+    rng = np.random.default_rng(42)
+
+    df = pd.DataFrame(
+        rng.integers(0, 100, (N, K)), columns=[f"val{x}" for x in np.arange(K)]
+    )
+    df["key"] = rng.choice(np.arange(10), N)
+    table = ibis.memtable(df, name="t")
+    return table
+
+
+@pytest.mark.parametrize("op", COLUMN_REDUCTIONS)
+def test_column_reductions(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    return getattr(t.col1, op)().to_pandas()
+
+
+@pytest.mark.parametrize("op", ["mean", "sum", "min", "max"])
+def test_groupby_reductions(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    return getattr(t.group_by("key").col1, op)().to_pandas()
+
+
+@pytest.mark.parametrize("op", ELEMENTWISE_UFUNCS)
+def test_mutate_ufunc(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    expr = getattr(t.col1, op)()
+    return t.mutate(col1_sin=expr).to_pandas()
+
+
+@pytest.mark.parametrize("op", STRING_UNARY_FUNCS)
+def test_string_unary(ibis_table_num_str, op):
+    t = ibis_table_num_str
+    return getattr(t.str_col, op)().to_pandas()
+
+
+def test_nunique(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.col1.nunique().to_pandas()
+
+
+def test_count(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.col1.count().to_pandas()
+
+
+def test_select(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.select("col0", "col1").to_pandas()
+
+
+def test_between(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.key.between(4, 8).to_pandas()
+
+
+def test_notin(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.key.notin([0, 1, 8, 3]).to_pandas()
+
+
+def test_window(ibis_table_num_str):
+    t = ibis_table_num_str
+    return (
+        t.group_by("key").mutate(demeaned=t.col1 - t.col1.mean()).to_pandas()
+    )
+
+
+def test_limit(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.limit(5).to_pandas()
+
+
+def test_filter(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.filter([t.key == 4, t.col0 > 15]).to_pandas()
+
+
+@pytest.mark.skip(reason="Join ordering not currently guaranteed, i.e., flaky")
+@pytest.mark.parametrize("join_type", ["inner", "left", "right"])
+def test_join_exact_ordering(ibis_table_num_str, ibis_table_num, join_type):
+    t1 = ibis_table_num_str
+    t2 = ibis_table_num
+    res = t1.join(t2, "key", how=join_type).to_pandas()
+    return res
+
+
+@pytest.mark.parametrize("join_type", ["inner", "left", "right"])
+def test_join_sort_correctness(ibis_table_num_str, ibis_table_num, join_type):
+    """
+    While we don't currently guarantee exact row ordering
+    we can still test join correctness with ex-post sorting.
+    """
+    t1 = ibis_table_num_str
+    t2 = ibis_table_num
+    res = t1.join(t2, "key", how=join_type).to_pandas()
+
+    res_sorted = res.sort_values(by=res.columns.tolist()).reset_index(
+        drop=True
+    )
+    return res_sorted
+
+
+def test_order_by(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.order_by(ibis.desc("col1")).to_pandas()
+
+
+def test_aggregate_having(ibis_table_num_str):
+    t = ibis_table_num_str
+    return t.aggregate(
+        by=["key"],
+        sum_c0=t.col0.sum(),
+        avg_c0=t.col0.mean(),
+        having=t.col1.mean() > 50,
+    ).to_pandas()
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_matplotlib.py
new file mode 100644
index 00000000000..665b9d6fb08
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_matplotlib.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import pytest
+from matplotlib.axes import Axes
+from matplotlib.collections import PathCollection
+from matplotlib.lines import Line2D
+from matplotlib.patches import Rectangle
+from pandas._testing import assert_equal
+
+
+def assert_plots_equal(expect, got):
+    if isinstance(expect, Axes) and isinstance(got, Axes):
+        for expect_ch, got_ch in zip(
+            expect.get_children(), got.get_children()
+        ):
+            assert type(expect_ch) == type(got_ch)
+            if isinstance(expect_ch, Line2D):
+                assert_equal(expect_ch.get_xdata(), got_ch.get_xdata())
+                assert_equal(expect_ch.get_ydata(), got_ch.get_ydata())
+            elif isinstance(expect_ch, Rectangle):
+                assert expect_ch.get_height() == got_ch.get_height()
+    elif isinstance(expect, PathCollection) and isinstance(
+        got, PathCollection
+    ):
+        assert_equal(expect.get_offsets()[:, 0], got.get_offsets()[:, 0])
+        assert_equal(expect.get_offsets()[:, 1], got.get_offsets()[:, 1])
+    else:
+        assert_equal(expect, got)
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal)
+
+
+def test_line():
+    df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]})
+    (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-")
+
+    return plt.gca()
+
+
+def test_bar():
+    data = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
+    ax = data.plot(kind="bar")
+    return ax
+
+
+def test_scatter():
+    df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [5, 4, 3, 2, 1]})
+
+    fig, ax = plt.subplots(figsize=(8, 6))
+    ax.scatter(df["x"], df["y"])
+
+    return plt.gca()
+
+
+def test_dataframe_plot():
+    rng = np.random.default_rng(42)
+    df = pd.DataFrame(rng.random((10, 5)), columns=["a", "b", "c", "d", "e"])
+    ax = df.plot()
+
+    return ax
+
+
+def test_series_plot():
+    sr = pd.Series([1, 2, 3, 4, 5])
+    ax = sr.plot()
+
+    return ax
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_numpy.py
new file mode 100644
index 00000000000..472f1889354
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_numpy.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+
+nsamps = 1000
+reductions = ["sum", "min", "max", "mean", "var", "std"]
+
+
+pytestmark = pytest.mark.assert_eq(fn=np.testing.assert_allclose)
+
+
+@pytest.fixture(scope="module")
+def sr():
+    rng = np.random.default_rng(42)
+    return pd.Series(rng.random(nsamps))
+
+
+@pytest.mark.parametrize("op", reductions)
+def test_numpy_series_reductions(sr, op):
+    return getattr(np, op)(sr)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame({"A": rng.random(nsamps), "B": rng.random(nsamps)})
+
+
+@pytest.mark.parametrize("op", reductions)
+def test_numpy_dataframe_reductions(df, op):
+    return getattr(np, op)(df)
+
+
+def test_numpy_dot(df):
+    return np.dot(df, df.T)
+
+
+def test_numpy_fft(sr):
+    fft = np.fft.fft(sr)
+    return fft
+
+
+def test_numpy_sort(df):
+    return np.sort(df)
+
+
+@pytest.mark.parametrize("percentile", [0, 25, 50, 75, 100])
+def test_numpy_percentile(df, percentile):
+    return np.percentile(df, percentile)
+
+
+def test_numpy_unique(df):
+    return np.unique(df)
+
+
+def test_numpy_transpose(df):
+    return np.transpose(df)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_plotly.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_plotly.py
new file mode 100644
index 00000000000..27d9df83476
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_plotly.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import plotly.express as px
+import pytest
+
+nsamps = 100
+
+
+def assert_plotly_equal(expect, got):
+    assert type(expect) == type(got)
+    if isinstance(expect, dict):
+        assert expect.keys() == got.keys()
+        for k in expect.keys():
+            assert_plotly_equal(expect[k], got[k])
+    elif isinstance(got, list):
+        assert len(expect) == len(got)
+        for i in range(len(expect)):
+            assert_plotly_equal(expect[i], got[i])
+    elif isinstance(expect, np.ndarray):
+        np.testing.assert_allclose(expect, got)
+    else:
+        assert expect == got
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_plotly_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.default_rng(42)
+    return pd.DataFrame(
+        {
+            "x": rng.random(nsamps),
+            "y": rng.random(nsamps),
+            "category": rng.integers(0, 10, nsamps),
+            "category2": rng.integers(0, 10, nsamps),
+        }
+    )
+
+
+def test_plotly_scatterplot(df):
+    return px.scatter(df, x="x", y="y").to_plotly_json()
+
+
+def test_plotly_lineplot(df):
+    return px.line(df, x="category", y="y").to_plotly_json()
+
+
+def test_plotly_barplot(df):
+    return px.bar(df, x="category", y="y").to_plotly_json()
+
+
+def test_plotly_histogram(df):
+    return px.histogram(df, x="category").to_plotly_json()
+
+
+def test_plotly_pie(df):
+    return px.pie(df, values="category", names="category2").to_plotly_json()
+
+
+def test_plotly_heatmap(df):
+    return px.density_heatmap(df, x="category", y="category2").to_plotly_json()
+
+
+def test_plotly_boxplot(df):
+    return px.box(df, x="category", y="y").to_plotly_json()
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_pytorch.py
new file mode 100644
index 00000000000..ad287471aa0
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_pytorch.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import torch
+
+pytestmark = pytest.mark.assert_eq(fn=torch.testing.assert_close)
+
+
+@pytest.fixture
+def data():
+    rng = np.random.default_rng(0)
+    x1 = rng.random(100, dtype=np.float32)
+    x2 = rng.random(100, dtype=np.float32)
+    y = np.zeros(100).astype(np.int64)
+
+    y[(x1 > x2) & (x1 > 0)] = 0
+    y[(x1 < x2) & (x1 > 0)] = 1
+    y[(x1 > x2) & (x1 < 0)] = 2
+    y[(x1 < x2) & (x1 < 0)] = 3
+
+    return x1, x2, y
+
+
+class Dataset(torch.utils.data.Dataset):
+    def __init__(self, x1, x2, y):
+        self.x1 = x1
+        self.x2 = x2
+        self.y = y
+
+    def __getitem__(self, idx):
+        x1 = self.x1[idx]
+        x2 = self.x2[idx]
+        y = self.y[idx]
+        return (x1, x2), y
+
+    def __len__(self):
+        return len(self.x1)
+
+
+def test_dataloader_auto_batching(data):
+    x1, x2, y = (pd.Series(i) for i in data)
+
+    dataset = Dataset(x1, x2, y)
+
+    # default collate_fn
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=10)
+
+    (x1, x2), y = next(iter(dataloader))
+    return x1, x2, y
+
+
+def test_dataloader_manual_batching(data):
+    x1, x2, y = (pd.Series(i) for i in data)
+
+    dataset = Dataset(x1, x2, y)
+
+    # default collate_fn
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=None)
+
+    (x1, x2), y = next(iter(dataloader))
+    return x1, x2, y
+
+
+class Model(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = torch.nn.Linear(2, 10)
+        self.relu1 = torch.nn.ReLU()
+        self.fc2 = torch.nn.Linear(10, 10)
+        self.relu2 = torch.nn.ReLU()
+        self.output = torch.nn.Linear(10, 4)
+
+    def forward(self, x1, x2):
+        x = torch.stack([x1, x2], dim=0).T
+        x = self.fc1(x)
+        x = self.relu1(x)
+        x = self.fc2(x)
+        x = self.relu2(x)
+        return torch.nn.functional.softmax(x, dim=1)
+
+
+def train(model, dataloader, optimizer, criterion):
+    model.train()
+    for (x1, x2), y in dataloader:
+        x1 = x1.to("cuda")
+        x2 = x2.to("cuda")
+        y = y.to("cuda")
+
+        optimizer.zero_grad()
+        y_pred = model(x1, x2)
+        loss = criterion(y_pred, y)
+        loss.backward()
+        optimizer.step()
+
+
+def test_torch_train(data):
+    torch.manual_seed(0)
+
+    x1, x2, y = (pd.Series(i) for i in data)
+    dataset = Dataset(x1, x2, y)
+    # default collate_fn
+    dataloader = torch.utils.data.DataLoader(dataset, batch_size=10)
+
+    model = Model().to("cuda")
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
+    criterion = torch.nn.CrossEntropyLoss()
+
+    train(model, dataloader, optimizer, criterion)
+
+    test_x1, test_x2 = next(iter(dataloader))[0]
+    test_x1 = test_x1.to("cuda")
+    test_x2 = test_x2.to("cuda")
+
+    return model(test_x1, test_x2)
+
+
+def test_torch_tensor_ctor():
+    s = pd.Series(range(5))
+    return torch.tensor(s.values)
+
+
+def test_torch_tensor_from_numpy():
+    s = pd.Series(range(5))
+    return torch.from_numpy(s.values)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_scipy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_scipy.py
new file mode 100644
index 00000000000..963a8549000
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_scipy.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import scipy
+
+
+@pytest.mark.parametrize("func", ["hmean", "tvar", "gstd"])
+def test_scipy_stats(func):
+    rng = np.random.default_rng(42)
+    data = pd.Series(rng.random(1000))
+    return getattr(scipy.stats, func)(data)
+
+
+@pytest.mark.parametrize("func", ["norm"])
+def test_scipy_linalg(func):
+    rng = np.random.default_rng(42)
+    data = pd.Series(rng.random(1000))
+    return getattr(scipy.linalg, func)(data)
+
+
+pytestmark = pytest.mark.assert_eq(fn=pd._testing.assert_almost_equal)
+
+
+def test_compute_pi():
+    def circle(x):
+        return (1 - x**2) ** 0.5
+
+    x = pd.Series(np.linspace(0, 1, 100))
+    y = pd.Series(circle(np.linspace(0, 1, 100)))
+
+    result = scipy.integrate.trapezoid(y, x)
+    return result * 4
+
+
+def test_matrix_solve():
+    A = pd.DataFrame([[2, 3], [1, 2]])
+    b = pd.Series([1, 2])
+
+    return scipy.linalg.solve(A, b)
+
+
+def test_correlation():
+    data = pd.DataFrame({"A": [1, 2, 3, 4, 5], "B": [5, 4, 3, 2, 1]})
+
+    return scipy.stats.pearsonr(data["A"], data["B"])
+
+
+def test_optimization():
+    x = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
+
+    def rosen(x):  # banana function from scipy tutorial
+        return sum(
+            100.0 * (x[1:] - x[:-1] ** 2.0) ** 2.0 + (1 - x[:-1]) ** 2.0
+        )
+
+    result = scipy.optimize.fmin(rosen, x)
+    return result
+
+
+def test_regression():
+    data = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 5, 4, 5]})
+    result = scipy.stats.linregress(data["y"], data["y"])
+    return result.slope, result.intercept
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_seaborn.py
new file mode 100644
index 00000000000..4b272900acd
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_seaborn.py
@@ -0,0 +1,60 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import pandas as pd
+import pytest
+import seaborn as sns
+from matplotlib.axes import Axes
+from matplotlib.collections import PathCollection
+from matplotlib.lines import Line2D
+from matplotlib.patches import Rectangle
+from pandas._testing import assert_equal
+
+
+def assert_plots_equal(expect, got):
+    if isinstance(expect, Axes) and isinstance(got, Axes):
+        for expect_ch, got_ch in zip(
+            expect.get_children(), got.get_children()
+        ):
+            assert type(expect_ch) == type(got_ch)
+            if isinstance(expect_ch, Line2D):
+                assert_equal(expect_ch.get_xdata(), got_ch.get_xdata())
+                assert_equal(expect_ch.get_ydata(), got_ch.get_ydata())
+            elif isinstance(expect_ch, Rectangle):
+                assert expect_ch.get_height() == got_ch.get_height()
+    elif isinstance(expect, PathCollection) and isinstance(
+        got, PathCollection
+    ):
+        assert_equal(expect.get_offsets()[:, 0], got.get_offsets()[:, 0])
+        assert_equal(expect.get_offsets()[:, 1], got.get_offsets()[:, 1])
+    else:
+        assert_equal(expect, got)
+
+
+pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    df = pd.DataFrame(
+        {
+            "x": [2, 3, 4, 5, 11],
+            "y": [4, 3, 2, 1, 15],
+            "hue": ["c", "a", "b", "b", "a"],
+        }
+    )
+    return df
+
+
+def test_bar(df):
+    ax = sns.barplot(data=df, x="x", y="y")
+    return ax
+
+
+def test_scatter(df):
+    ax = sns.scatterplot(data=df, x="x", y="y", hue="hue")
+    return ax
+
+
+def test_lineplot_with_sns_data():
+    df = sns.load_dataset("flights")
+    ax = sns.lineplot(data=df, x="month", y="passengers")
+    return ax
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_sklearn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_sklearn.py
new file mode 100644
index 00000000000..1635fd3dcda
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_sklearn.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.cluster import KMeans
+from sklearn.feature_selection import SelectKBest, f_classif
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+
+
+def test_regression():
+    data = {
+        "feature1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+        "feature2": [2, 4, 1, 3, 5, 7, 6, 8, 10, 9],
+        "target": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
+    }
+    df = pd.DataFrame(data)
+
+    X = df[["feature1", "feature2"]]
+    y = df["target"]
+
+    # Data Splitting
+    (X_train, X_test, y_train, y_test) = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # Basic deterministic LR model
+    model = LogisticRegression()
+    model.fit(X_train, y_train)
+
+    # predction phase
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+
+    return accuracy
+
+
+@pytest.mark.assert_eq(fn=np.testing.assert_allclose)
+def test_clustering():
+    rng = np.random.default_rng(42)
+    nsamps = 300
+    X = rng.random((nsamps, 2))
+    data = pd.DataFrame(X, columns=["x", "y"])
+
+    # Create and fit a KMeans clustering model
+    kmeans = KMeans(n_clusters=3, random_state=42)
+    kmeans.fit(data)
+    return kmeans.cluster_centers_
+
+
+def test_feature_selection():
+    rng = np.random.default_rng(42)
+    n_samples = 100
+    n_features = 10
+
+    X = rng.random((n_samples, n_features))
+    y = rng.integers(0, 2, size=n_samples)
+
+    data = pd.DataFrame(
+        X, columns=[f"feature{i}" for i in range(1, n_features + 1)]
+    )
+    data["target"] = y
+
+    # Select the top k features
+    k_best = SelectKBest(score_func=f_classif, k=5)
+    k_best.fit_transform(X, y)
+
+    feat_inds = k_best.get_support(indices=True)
+    features = data.iloc[:, feat_inds]
+
+    return sorted(features.columns.tolist())
+
+
+@pytest.mark.assert_eq(fn=np.testing.assert_allclose)
+def test_data_scaling():
+    data = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0])
+    scaler = StandardScaler()
+
+    scaled_data = scaler.fit_transform(data.values.reshape(-1, 1))
+    return scaled_data
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy.py
new file mode 100644
index 00000000000..69248002a58
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import stumpy
+from numba import cuda
+from pandas._testing import assert_equal
+
+
+def stumpy_assert_equal(expected, got):
+    def as_float64(x):
+        if isinstance(x, (tuple, list)):
+            return [as_float64(y) for y in x]
+        else:
+            return x.astype(np.float64)
+
+    assert_equal(as_float64(expected), as_float64(got))
+
+
+pytestmark = pytest.mark.assert_eq(fn=stumpy_assert_equal)
+
+
+def test_1d_time_series():
+    rng = np.random.default_rng(42)
+    ts = pd.Series(rng.random(10))
+    m = 3
+
+    return stumpy.stump(ts, m)
+
+
+def test_1d_gpu():
+    rng = np.random.default_rng(42)
+    your_time_series = rng.random(10000)
+    window_size = (
+        50  # Approximately, how many data points might be found in a pattern
+    )
+    all_gpu_devices = [
+        device.id for device in cuda.list_devices()
+    ]  # Get a list of all available GPU devices
+
+    return stumpy.gpu_stump(
+        your_time_series, m=window_size, device_id=all_gpu_devices
+    )
+
+
+def test_multidimensional_timeseries():
+    rng = np.random.default_rng(42)
+    # Each row represents data from a different dimension while each column represents
+    # data from the same dimension
+    your_time_series = rng.random((3, 1000))
+    # Approximately, how many data points might be found in a pattern
+    window_size = 50
+
+    return stumpy.mstump(your_time_series, m=window_size)
+
+
+def test_anchored_time_series_chains():
+    rng = np.random.default_rng(42)
+    your_time_series = rng.random(10000)
+    window_size = (
+        50  # Approximately, how many data points might be found in a pattern
+    )
+
+    matrix_profile = stumpy.stump(your_time_series, m=window_size)
+
+    left_matrix_profile_index = matrix_profile[:, 2]
+    right_matrix_profile_index = matrix_profile[:, 3]
+    idx = 10  # Subsequence index for which to retrieve the anchored time series chain for
+
+    anchored_chain = stumpy.atsc(
+        left_matrix_profile_index, right_matrix_profile_index, idx
+    )
+
+    all_chain_set, longest_unanchored_chain = stumpy.allc(
+        left_matrix_profile_index, right_matrix_profile_index
+    )
+
+    return anchored_chain, all_chain_set, longest_unanchored_chain
+
+
+def test_semantic_segmentation():
+    rng = np.random.default_rng(42)
+    your_time_series = rng.random(10000)
+    window_size = (
+        50  # Approximately, how many data points might be found in a pattern
+    )
+
+    matrix_profile = stumpy.stump(your_time_series, m=window_size)
+
+    subseq_len = 50
+    return stumpy.fluss(
+        matrix_profile[:, 1], L=subseq_len, n_regimes=2, excl_factor=1
+    )
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy_distributed.py
new file mode 100644
index 00000000000..37e3cc34856
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy_distributed.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import stumpy
+from pandas._testing import assert_equal
+
+from dask.distributed import Client, LocalCluster
+
+
+def stumpy_assert_equal(expected, got):
+    def as_float64(x):
+        if isinstance(x, (tuple, list)):
+            return [as_float64(y) for y in x]
+        else:
+            return x.astype(np.float64)
+
+    assert_equal(as_float64(expected), as_float64(got))
+
+
+pytestmark = pytest.mark.assert_eq(fn=stumpy_assert_equal)
+
+
+# Shared dask client for all tests in this module
+@pytest.fixture(scope="module")
+def dask_client():
+    with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:
+        with Client(cluster) as dask_client:
+            yield dask_client
+
+
+def test_1d_distributed(dask_client):
+    np.random.seed(42)
+    ts = pd.Series(np.random.rand(100))
+    m = 10
+    return stumpy.stumped(dask_client, ts, m)
+
+
+def test_multidimensional_distributed_timeseries(dask_client):
+    np.random.seed(42)
+    # Each row represents data from a different dimension while each column represents
+    # data from the same dimension
+    your_time_series = np.random.rand(3, 1000)
+    # Approximately, how many data points might be found in a pattern
+    window_size = 50
+
+    return stumpy.mstumped(dask_client, your_time_series, m=window_size)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_tensorflow.py
new file mode 100644
index 00000000000..ba1f518cbfd
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_tensorflow.py
@@ -0,0 +1,367 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+import tensorflow as tf
+
+SHUFFLE_BUFFER = 500
+BATCH_SIZE = 2
+
+pytestmark = pytest.mark.assert_eq(fn=pd._testing.assert_equal)
+
+
+@pytest.fixture(scope="module")
+def df():
+    rng = np.random.RandomState(42)
+
+    nrows = 303
+    columns = {
+        "age": rng.randint(29, 78, size=(nrows,), dtype="int64"),
+        "sex": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+        "cp": rng.randint(0, 5, size=(nrows,), dtype="int64"),
+        "trestbps": rng.randint(94, 201, size=(nrows,), dtype="int64"),
+        "chol": rng.randint(126, 565, size=(nrows,), dtype="int64"),
+        "fbs": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+        "restecg": rng.randint(0, 3, size=(nrows,), dtype="int64"),
+        "thalach": rng.randint(71, 203, size=(nrows,), dtype="int64"),
+        "exang": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+        "oldpeak": rng.uniform(0.0, 6.2, size=(nrows,)),
+        "slope": rng.randint(1, 4, size=(nrows,), dtype="int64"),
+        "ca": rng.randint(0, 4, size=(nrows,), dtype="int64"),
+        "thal": rng.choice(
+            ["fixed", "normal", "reversible", "1", "2"], size=(nrows,)
+        ),
+        "target": rng.randint(0, 2, size=(nrows,), dtype="int64"),
+    }
+
+    return pd.DataFrame(columns)
+
+
+@pytest.fixture(scope="module")
+def target(df):
+    return df.pop("target")
+
+
+@pytest.fixture
+def model_gen():
+    def make_model(numeric_features):
+        normalizer = tf.keras.layers.Normalization(axis=-1)
+        normalizer.adapt(numeric_features)
+        model = tf.keras.Sequential(
+            [
+                normalizer,
+                tf.keras.layers.Dense(10, activation="relu"),
+                tf.keras.layers.Dense(1),
+            ]
+        )
+
+        model.compile(
+            optimizer="adam",
+            loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+            metrics=["accuracy"],
+        )
+        return model
+
+    return make_model
+
+
+def test_dataframe_as_array(model_gen, df, target):
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    numeric_features = df[numeric_feature_names]
+
+    numeric_features = tf.convert_to_tensor(
+        numeric_features.values, dtype=tf.float32
+    )
+
+    model = model_gen(numeric_features)
+    model.fit(numeric_features, target, epochs=1, batch_size=BATCH_SIZE)
+
+    test_data = numeric_features[:BATCH_SIZE]
+    return model.predict(test_data)
+
+
+def test_dataframe_as_dataset(model_gen, df, target):
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    numeric_features = df[numeric_feature_names]
+
+    numeric_features = tf.convert_to_tensor(
+        numeric_features.values, dtype=tf.float32
+    )
+
+    dataset = tf.data.Dataset.from_tensor_slices((numeric_features, target))
+    dataset = dataset.shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE)
+
+    model = model_gen(numeric_features)
+    model.fit(dataset, epochs=1)
+
+    test_data = dataset.take(1)
+    return model.predict(test_data)
+
+
+def stack_dict(inputs, func=tf.stack):
+    values = []
+    for key in sorted(inputs.keys()):
+        values.append(CastLayer()(inputs[key]))
+
+    class MyLayer(tf.keras.layers.Layer):
+        def call(self, val):
+            return func(val, axis=-1)
+
+    return MyLayer()(values)
+
+
+def test_dataframe_as_dictionary_with_keras_input_layer(df, target):
+    # ensure deterministic results
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    numeric_features = df[numeric_feature_names]
+
+    inputs = {}
+    for name in numeric_features:
+        inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=tf.float32)
+
+    x = stack_dict(inputs, func=tf.concat)
+
+    normalizer = tf.keras.layers.Normalization(axis=-1)
+    normalizer.adapt(stack_dict(dict(numeric_features)))
+
+    x = normalizer(x)
+    x = tf.keras.layers.Dense(10, activation="relu")(x)
+    x = tf.keras.layers.Dense(1)(x)
+
+    model = tf.keras.Model(inputs, x)
+
+    model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+        run_eagerly=True,
+    )
+
+    # Train with dictionary of columns as input:
+    model.fit(dict(numeric_features), target, epochs=1, batch_size=BATCH_SIZE)
+
+    # Train with a dataset of dictionary-elements
+    numeric_dict_ds = tf.data.Dataset.from_tensor_slices(
+        (dict(numeric_features), target)
+    )
+    numeric_dict_batches = numeric_dict_ds.shuffle(SHUFFLE_BUFFER).batch(
+        BATCH_SIZE
+    )
+    model.fit(numeric_dict_batches, epochs=1)
+
+    # Predict
+    return model.predict(numeric_dict_batches.take(1))
+
+
+def test_full_example_train_with_ds(df, target):
+    # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example
+    # Inputs are converted to tf.dataset and then batched
+
+    # ensure deterministic results
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    binary_feature_names = ["sex", "fbs", "exang"]
+    categorical_feature_names = ["cp", "restecg", "slope", "thal", "ca"]
+
+    numeric_features = df[numeric_feature_names]
+
+    inputs = {}
+    for name, column in df.items():
+        if isinstance(column[0], str):
+            dtype = tf.string
+        elif name in categorical_feature_names or name in binary_feature_names:
+            dtype = tf.int64
+        else:
+            dtype = tf.float32
+
+        inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)
+
+    preprocessed = []
+
+    # Process binary features
+    for name in binary_feature_names:
+        inp = inputs[name]
+        inp = inp[:, tf.newaxis]
+        float_value = CastLayer()(inp)
+        preprocessed.append(float_value)
+
+    normalizer = tf.keras.layers.Normalization(axis=-1)
+    normalizer.adapt(stack_dict(dict(numeric_features)))
+
+    # Process numeric features
+    numeric_inputs = {}
+    for name in numeric_feature_names:
+        numeric_inputs[name] = inputs[name]
+
+    numeric_inputs = stack_dict(numeric_inputs)
+    numeric_normalized = normalizer(numeric_inputs)
+
+    preprocessed.append(numeric_normalized)
+
+    # Process categorical features
+    for name in categorical_feature_names:
+        vocab = sorted(set(df[name]))
+        print(f"name: {name}")
+        print(f"vocab: {vocab}\n")
+
+        if isinstance(vocab[0], str):
+            lookup = tf.keras.layers.StringLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+        else:
+            lookup = tf.keras.layers.IntegerLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+
+        x = inputs[name][:, tf.newaxis]
+        x = lookup(x)
+        preprocessed.append(x)
+
+    # Concatenate all tensors
+    preprocesssed_result = MyConcatLayer()(preprocessed)
+
+    preprocessor = tf.keras.Model(inputs, preprocesssed_result)
+
+    # Create the model
+    body = tf.keras.Sequential(
+        [
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(1),
+        ]
+    )
+
+    x = preprocessor(inputs)
+    result = body(x)
+
+    model = tf.keras.Model(inputs, result)
+
+    model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+    )
+
+    ds = tf.data.Dataset.from_tensor_slices((dict(df), target))
+    ds = ds.batch(BATCH_SIZE)
+    model.fit(ds, epochs=1)
+
+    return model.predict(ds.take(1))
+
+
+class CastLayer(tf.keras.layers.Layer):
+    def __init__(self, **kwargs):
+        super(CastLayer, self).__init__(**kwargs)
+
+    def call(self, inp):
+        return tf.cast(inp, tf.float32)
+
+
+class MyConcatLayer(tf.keras.layers.Layer):
+    def call(self, values):
+        values = [tf.cast(v, tf.float32) for v in values]
+        return tf.concat(values, axis=-1)
+
+
+def test_full_example_train_with_df(df, target):
+    # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example
+    # Inputs are directly passed as dictionary of series
+
+    # ensure deterministic results
+    tf.keras.utils.set_random_seed(42)
+
+    numeric_feature_names = ["age", "thalach", "trestbps", "chol", "oldpeak"]
+    binary_feature_names = ["sex", "fbs", "exang"]
+    categorical_feature_names = ["cp", "restecg", "slope", "thal", "ca"]
+
+    numeric_features = df[numeric_feature_names]
+
+    inputs = {}
+
+    for name, column in df.items():
+        if isinstance(column[0], str):
+            dtype = tf.string
+        elif name in categorical_feature_names or name in binary_feature_names:
+            dtype = tf.int64
+        else:
+            dtype = tf.float32
+
+        inputs[name] = tf.keras.Input(shape=(), name=name, dtype=dtype)
+
+    preprocessed = []
+
+    # Process binary features
+    for name in binary_feature_names:
+        inp = inputs[name]
+        inp = inp[:, tf.newaxis]
+        float_value = CastLayer()(inp)
+        preprocessed.append(float_value)
+
+    normalizer = tf.keras.layers.Normalization(axis=-1)
+    normalizer.adapt(stack_dict(dict(numeric_features)))
+
+    # Process numeric features
+    numeric_inputs = {}
+    for name in numeric_feature_names:
+        numeric_inputs[name] = inputs[name]
+
+    numeric_inputs = stack_dict(numeric_inputs)
+    numeric_normalized = normalizer(numeric_inputs)
+
+    preprocessed.append(numeric_normalized)
+
+    # Process categorical features
+    for name in categorical_feature_names:
+        vocab = sorted(set(df[name]))
+        print(f"name: {name}")
+        print(f"vocab: {vocab}\n")
+
+        if isinstance(vocab[0], str):
+            lookup = tf.keras.layers.StringLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+        else:
+            lookup = tf.keras.layers.IntegerLookup(
+                vocabulary=vocab, output_mode="one_hot"
+            )
+
+        x = inputs[name][:, tf.newaxis]
+        x = lookup(x)
+        preprocessed.append(x)
+
+    # Concatenate all tensors
+    preprocesssed_result = MyConcatLayer()(preprocessed)
+
+    preprocessor = tf.keras.Model(inputs, preprocesssed_result)
+
+    # Create the model
+    body = tf.keras.Sequential(
+        [
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(10, activation="relu"),
+            tf.keras.layers.Dense(1),
+        ]
+    )
+
+    x = preprocessor(inputs)
+    result = body(x)
+
+    model = tf.keras.Model(inputs, result)
+
+    model.compile(
+        optimizer="adam",
+        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+        metrics=["accuracy"],
+    )
+
+    model.fit(dict(df), target, epochs=1, batch_size=BATCH_SIZE)
+
+    return model.predict(dict(df[:BATCH_SIZE]))
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_xgboost.py
new file mode 100644
index 00000000000..70f1e6a4250
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_xgboost.py
@@ -0,0 +1,135 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+import pytest
+import scipy.sparse
+import xgboost as xgb
+from sklearn.datasets import make_regression
+from xgboost.testing import IteratorForTest, make_categorical
+
+n_samples = 128
+n_features = 16
+
+
+def xgboost_assert_equal(expect, got, rtol: float = 1e-7, atol: float = 0.0):
+    if isinstance(expect, (tuple, list)):
+        assert len(expect) == len(got)
+        for e, g in zip(expect, got):
+            xgboost_assert_equal(e, g, rtol, atol)
+    elif isinstance(expect, scipy.sparse.csr_matrix):
+        np.testing.assert_allclose(expect.data, got.data, rtol=rtol, atol=atol)
+        np.testing.assert_equal(expect.indices, got.indices)
+        np.testing.assert_equal(expect.indptr, got.indptr)
+    else:
+        pd._testing.assert_almost_equal(expect, got, rtol=rtol, atol=atol)
+
+
+pytestmark = pytest.mark.assert_eq(fn=xgboost_assert_equal)
+
+
+@pytest.fixture
+def reg_data() -> tuple[np.ndarray, np.ndarray]:
+    X, y = make_regression(n_samples, n_features, random_state=11)
+    return X, y
+
+
+@pytest.fixture
+def reg_batches_data() -> tuple[list[pd.DataFrame], list[pd.Series]]:
+    cov = []
+    res = []
+    for i in range(3):
+        X, y = make_regression(n_samples, n_features, random_state=i + 1)
+        cov.append(pd.DataFrame(X))
+        res.append(pd.Series(y))
+    return cov, res
+
+
+def test_with_dmatrix(
+    reg_data: tuple[np.ndarray, np.ndarray],
+) -> tuple[scipy.sparse.csr_matrix, scipy.sparse.csr_matrix]:
+    """DMatrix is the primary interface for XGBoost."""
+    X, y = reg_data
+    X_df = pd.DataFrame(X)
+    y_ser = pd.Series(y)
+    Xy = xgb.DMatrix(X_df, y_ser)
+    assert Xy.feature_names == list(map(str, X_df.columns))
+    csr_0 = Xy.get_data()
+
+    Xc, yc = make_categorical(
+        n_samples, n_features, n_categories=13, onehot=False
+    )
+    Xy = xgb.DMatrix(Xc, yc, enable_categorical=True)
+    csr_1 = Xy.get_data()
+    return csr_0, csr_1
+
+
+def test_with_quantile_dmatrix(
+    reg_data: tuple[np.ndarray, np.ndarray],
+) -> tuple[scipy.sparse.csr_matrix, scipy.sparse.csr_matrix]:
+    """QuantileDMatrix is an optimization for the `hist` tree method for XGBoost."""
+    from xgboost.testing.data import memory
+
+    memory.clear(warn=False)
+
+    X, y = reg_data
+    X_df = pd.DataFrame(X)
+    y_ser = pd.Series(y)
+    Xy = xgb.QuantileDMatrix(X_df, y_ser)
+    assert Xy.feature_names == list(map(str, X_df.columns))
+    csr_0 = Xy.get_data()
+
+    Xc, yc = make_categorical(
+        n_samples, n_features, n_categories=13, onehot=False
+    )
+    Xy = xgb.QuantileDMatrix(Xc, yc, enable_categorical=True)
+    csr_1 = Xy.get_data()
+    return csr_0, csr_1
+
+
+def test_with_iter_quantile_dmatrix(
+    reg_batches_data: tuple[list[pd.DataFrame], list[pd.DataFrame]],
+) -> scipy.sparse.csr_matrix:
+    """Using iterator to initialize QuantileDMatrix."""
+    cov, res = reg_batches_data
+    it = IteratorForTest(cov, res, w=None, cache=None)
+    Xy = xgb.QuantileDMatrix(it)
+    csr = Xy.get_data()
+    return csr
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_with_external_memory(
+    device: str,
+    reg_batches_data: tuple[list[pd.DataFrame], list[pd.DataFrame]],
+) -> np.ndarray:
+    """Test with iterator-based external memory."""
+    cov, res = reg_batches_data
+    it = IteratorForTest(cov, res, w=None, cache="cache")
+    Xy = xgb.DMatrix(it)
+    predt = xgb.train({"device": device}, Xy, num_boost_round=1).predict(Xy)
+    return predt
+
+
+@pytest.mark.parametrize("device", ["cpu", "cuda"])
+def test_predict(device: str) -> np.ndarray:
+    reg = xgb.XGBRegressor(n_estimators=2, device=device)
+    X, y = make_regression(n_samples, n_features, random_state=11)
+    X_df = pd.DataFrame(X)
+    reg.fit(X_df, y)
+    booster = reg.get_booster()
+
+    predt0 = reg.predict(X_df)
+
+    predt1 = booster.inplace_predict(X_df)
+    np.testing.assert_allclose(predt0, predt1)
+
+    predt2 = booster.predict(xgb.DMatrix(X_df))
+    np.testing.assert_allclose(predt0, predt2)
+
+    predt3 = booster.inplace_predict(X)
+    np.testing.assert_allclose(predt0, predt3)
+
+    return predt0

From c2c88a9b6e15fd20bd41edd82bf21b9fb2884471 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Fri, 23 Aug 2024 08:36:50 -0700
Subject: [PATCH 02/24] Add missing ci

---
 .github/workflows/nightly.yaml                |  58 ++++
 .../dependencies.yaml                         | 268 ++++++++++++++++++
 .../{ => tests}/conftest.py                   |   0
 .../{ => tests}/pytest.ini                    |   0
 .../{ => tests}/run_library_tests.sh          |   0
 .../{ => tests}/test_cugraph.py               |   0
 .../{ => tests}/test_cuml.py                  |   0
 .../{ => tests}/test_dask.py                  |   0
 .../{ => tests}/test_featureengine.py         |   0
 .../{ => tests}/test_holoviews.py             |   0
 .../{ => tests}/test_hvplot.py                |   0
 .../{ => tests}/test_ibis.py                  |   0
 .../{ => tests}/test_matplotlib.py            |   0
 .../{ => tests}/test_numpy.py                 |   0
 .../{ => tests}/test_plotly.py                |   0
 .../{ => tests}/test_pytorch.py               |   0
 .../{ => tests}/test_scipy.py                 |   0
 .../{ => tests}/test_seaborn.py               |   0
 .../{ => tests}/test_sklearn.py               |   0
 .../{ => tests}/test_stumpy.py                |   0
 .../{ => tests}/test_stumpy_distributed.py    |   0
 .../{ => tests}/test_tensorflow.py            |   0
 .../{ => tests}/test_xgboost.py               |   0
 23 files changed, 326 insertions(+)
 create mode 100644 .github/workflows/nightly.yaml
 create mode 100644 python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/conftest.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/pytest.ini (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/run_library_tests.sh (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_cugraph.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_cuml.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_dask.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_featureengine.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_holoviews.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_hvplot.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_ibis.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_matplotlib.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_numpy.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_plotly.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_pytorch.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_scipy.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_seaborn.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_sklearn.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_stumpy.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_stumpy_distributed.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_tensorflow.py (100%)
 rename python/cudf/cudf_pandas_tests/third_party_integration_tests/{ => tests}/test_xgboost.py (100%)

diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml
new file mode 100644
index 00000000000..e0660c627a0
--- /dev/null
+++ b/.github/workflows/nightly.yaml
@@ -0,0 +1,58 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+name: cudf-pandas-integration test on default branch (nightly / manually)
+
+on:
+  workflow_dispatch:
+    # The below exists in alignment with rest of RAPIDS nightly pipeline. They are currently unused.
+    inputs:
+      branch:
+        required: true
+        type: string
+      date:
+        required: true
+        type: string
+      sha:
+        required: true
+        type: string
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  checkout:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+  extract_libs:
+    name: Extract libraries from dependencies.yaml
+    runs-on: ubuntu-latest
+    outputs:
+      LIBS: ${{ steps.extractlib.outputs.LIBS }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Extract libraries
+        id: extractlib
+        run: |
+          python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+
+  integration-tests:
+    secrets: inherit
+    needs: extract_libs
+    strategy:
+      fail-fast: false # Continue testing other libraries even if one fails
+      matrix:
+          lib: ${{ fromJSON(needs.extract_libs.outputs.LIBS) }}
+          include:
+            - CUDA_MAJOR: "12" # By default, test libraries with the latest CUDA 12
+            - lib: "tensorflow"
+              CUDA_MAJOR: "11" # Tensorflow does not have cuda 12 build, use one CUDA 11 instance
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
+    with:
+      build_type: nightly
+      script: "python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh ${{ matrix.lib }}"
+      run_codecov: false
+      # Select a single configuration using amd64 and the desired CUDA major version, with the latest (Python, CUDA) versions.
+      matrix_filter: 'map(select(.ARCH == "amd64" and (.CUDA_VER|startswith("${{ matrix.CUDA_MAJOR }}")))) | max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]) | [.]'
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
new file mode 100644
index 00000000000..c894e0cfb2d
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -0,0 +1,268 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Dependency list for https://github.com/rapidsai/dependency-file-generator
+files:
+  checks:
+    output: none
+    includes:
+      - develop
+      - py_version
+  test_dask:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_dask
+  test_matplotlib:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_matplotlib
+  test_numpy:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_numpy
+  test_pytorch:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_pytorch
+  test_seaborn:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_seaborn
+  test_scipy:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_scipy
+  test_sklearn:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_sklearn
+  test_stumpy:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_stumpy
+  test_tensorflow:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_tensorflow
+  test_xgboost:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_xgboost
+  test_cuml:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_cuml
+  test_cugraph:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_cugraph
+  test_ibis:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_ibis
+  test_hvplot:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_hvplot
+  test_holoviews:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_holoviews
+  test_plotly:
+    output: none
+    includes:
+      - cuda_version
+      - py_version
+      - test_base
+      - test_plotly
+
+channels:
+  - rapidsai-nightly
+  - rapidsai
+  - conda-forge
+  - nvidia
+
+dependencies:
+  develop:
+    common:
+      - output_types: conda
+        packages:
+          - pre-commit
+  cuda_version:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cuda-version=11.8
+          - matrix:
+              cuda: "12.0"
+            packages:
+              - cuda-version=12.0
+          - matrix:
+              cuda: "12.2"
+            packages:
+              - cuda-version=12.2
+          - matrix:
+              cuda: "12.5"
+            packages:
+              - cuda-version=12.5
+  py_version:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              py: "3.10"
+            packages:
+              - python=3.10
+          - matrix:
+              py: "3.11"
+            packages:
+              - python=3.11
+          - matrix:
+            packages:
+              - python>=3.10,<3.12
+  test_base:
+    common:
+      - output_types: conda
+        packages:
+          - cudf==24.10.*,>=0.0.0a0
+          - pandas
+          - pytest
+          - pytest-xdist
+  test_dask:
+    common:
+      - output_types: conda
+        packages:
+          - dask
+  test_matplotlib:
+    common:
+      - output_types: conda
+        packages:
+          - matplotlib-base
+  test_numpy:
+    common:
+      - output_types: conda
+        packages:
+          - numpy
+  test_pytorch:
+    common:
+      - output_types: conda
+        packages:
+          - numpy
+          - pytorch>=2.1.0
+  test_seaborn:
+    common:
+      - output_types: conda
+        packages:
+          - seaborn
+  test_scipy:
+    common:
+      - output_types: conda
+        packages:
+          - scipy
+  test_sklearn:
+    common:
+      - output_types: conda
+        packages:
+          - scikit-learn
+  test_stumpy:
+    common:
+      - output_types: conda
+        packages:
+          - dask
+          - stumpy
+  test_tensorflow:
+    common:
+      - output_types: conda
+        packages:
+          - tensorflow
+  test_xgboost:
+    common:
+      - output_types: conda
+        packages:
+          - hypothesis
+          - numpy
+          - scipy
+          - scikit-learn
+          - pip
+          - pip:
+            - xgboost>=2.0.1
+  test_cuml:
+    common:
+      - output_types: conda
+        packages:
+          - cuml==24.10.*,>=0.0.0a0
+          - scikit-learn
+  test_cugraph:
+    common:
+      - output_types: conda
+        packages:
+          - cugraph==24.10.*,>=0.0.0a0
+          - networkx
+  test_ibis:
+    common:
+      - output_types: conda
+        packages:
+          - pip
+          - pip:
+              - ibis-framework[pandas]
+  test_hvplot:
+    common:
+      - output_types: conda
+        packages:
+          - hvplot
+  test_holoviews:
+    common:
+      - output_types: conda
+        packages:
+          - holoviews
+  test_plotly:
+    common:
+      - output_types: conda
+        packages:
+          - plotly
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/conftest.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/conftest.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/pytest.ini b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/pytest.ini
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/run_library_tests.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/run_library_tests.sh
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/run_library_tests.sh
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/run_library_tests.sh
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cugraph.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cugraph.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cuml.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_cuml.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_dask.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_dask.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_featureengine.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_featureengine.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_holoviews.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_hvplot.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_hvplot.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_ibis.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_ibis.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_matplotlib.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_numpy.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_plotly.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_plotly.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_pytorch.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_scipy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_scipy.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_seaborn.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_sklearn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_sklearn.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_stumpy_distributed.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_tensorflow.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/test_xgboost.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py

From 88789eb024e4861b93933e4d6cb6bc857d244589 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Fri, 23 Aug 2024 09:49:32 -0700
Subject: [PATCH 03/24] Combine jobs

---
 .github/workflows/nightly.yaml                | 42 +++++++------------
 .../ci/check_style.sh                         | 18 --------
 .../ci/release/update-version.sh              | 41 ------------------
 .../tests/run_library_tests.sh                | 11 -----
 4 files changed, 14 insertions(+), 98 deletions(-)
 delete mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh
 delete mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh
 delete mode 100755 python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/run_library_tests.sh

diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml
index e0660c627a0..0c844efdc4e 100644
--- a/.github/workflows/nightly.yaml
+++ b/.github/workflows/nightly.yaml
@@ -20,39 +20,25 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  checkout:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v4
-  extract_libs:
-    name: Extract libraries from dependencies.yaml
+  integration-tests:
     runs-on: ubuntu-latest
-    outputs:
-      LIBS: ${{ steps.extractlib.outputs.LIBS }}
     steps:
       - name: Checkout repo
         uses: actions/checkout@v4
-      - name: Extract libraries
+
+      - name: Extract libraries from dependencies.yaml
         id: extractlib
         run: |
-          python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+          LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
+          echo "LIBS=${LIBS}" >> $GITHUB_ENV
 
-  integration-tests:
+      - name: Run integration tests
+        run: |
+          for lib in ${{ env.LIBS }}; do
+            echo "Running tests for $lib"
+            CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
+            python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+          done
+        env:
+          LIBS: ${{ env.LIBS }}
     secrets: inherit
-    needs: extract_libs
-    strategy:
-      fail-fast: false # Continue testing other libraries even if one fails
-      matrix:
-          lib: ${{ fromJSON(needs.extract_libs.outputs.LIBS) }}
-          include:
-            - CUDA_MAJOR: "12" # By default, test libraries with the latest CUDA 12
-            - lib: "tensorflow"
-              CUDA_MAJOR: "11" # Tensorflow does not have cuda 12 build, use one CUDA 11 instance
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10
-    with:
-      build_type: nightly
-      script: "python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh ${{ matrix.lib }}"
-      run_codecov: false
-      # Select a single configuration using amd64 and the desired CUDA major version, with the latest (Python, CUDA) versions.
-      matrix_filter: 'map(select(.ARCH == "amd64" and (.CUDA_VER|startswith("${{ matrix.CUDA_MAJOR }}")))) | max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]) | [.]'
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh
deleted file mode 100755
index b81b36ddb45..00000000000
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/check_style.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-set -euo pipefail
-
-rapids-logger "Create checks conda environment"
-. /opt/conda/etc/profile.d/conda.sh
-
-rapids-dependency-file-generator \
-  --output conda \
-  --file-key checks \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
-
-rapids-mamba-retry env create --yes -f env.yaml -n checks
-conda activate checks
-
-# Run pre-commit checks
-pre-commit run --hook-stage manual --all-files --show-diff-on-failure
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh
deleted file mode 100755
index 5b6f8f5ce1c..00000000000
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/release/update-version.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-###########################################
-# cudf.pandas integration Version Updater #
-###########################################
-
-## Usage
-# bash update-version.sh <new_version>
-
-
-# Format is YY.MM.PP - no leading 'v' or trailing 'a'
-NEXT_FULL_TAG=$1
-
-#Get <major>.<minor> for next version
-NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
-NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
-NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
-
-# Need to distutils-normalize the versions for some use cases
-NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
-
-# Inplace sed replace; workaround for Linux and Mac
-function sed_runner() {
-    sed -i.bak ''"$1"'' $2 && rm -f ${2}.bak
-}
-
-# CI files
-for FILE in .github/workflows/*.yaml; do
-  sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
-done
-
-DEPENDENCIES=(
-  cugraph
-  cudf
-  cuml
-)
-for DEP in "${DEPENDENCIES[@]}"; do
-  for FILE in dependencies.yaml; do
-    sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*/g" "${FILE}"
-  done
-done
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/run_library_tests.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/run_library_tests.sh
deleted file mode 100755
index dafd2e77761..00000000000
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/run_library_tests.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
-# All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"$(dirname "$0")"}
-mkdir -p "${RAPIDS_TESTS_DIR}/test-results"
-
-repo_root=$(git rev-parse --show-toplevel)
-
-TEST_DIR="${repo_root}/tests/" RAPIDS_TESTS_DIR="${RAPIDS_TESTS_DIR}" ${repo_root}/ci/ci_run_library_tests.sh "$@"

From f3ccceabf08530b7a9a893d83fa2b6bc38d5e370 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Sat, 24 Aug 2024 06:05:48 -0700
Subject: [PATCH 04/24] Address review: mv nightly.yml to pr.yml and test.yml

---
 .github/workflows/nightly.yaml | 44 ----------------------------------
 .github/workflows/pr.yaml      | 16 +++++++++++++
 .github/workflows/test.yaml    | 16 +++++++++++++
 3 files changed, 32 insertions(+), 44 deletions(-)
 delete mode 100644 .github/workflows/nightly.yaml

diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml
deleted file mode 100644
index 0c844efdc4e..00000000000
--- a/.github/workflows/nightly.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-name: cudf-pandas-integration test on default branch (nightly / manually)
-
-on:
-  workflow_dispatch:
-    # The below exists in alignment with rest of RAPIDS nightly pipeline. They are currently unused.
-    inputs:
-      branch:
-        required: true
-        type: string
-      date:
-        required: true
-        type: string
-      sha:
-        required: true
-        type: string
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  integration-tests:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v4
-
-      - name: Extract libraries from dependencies.yaml
-        id: extractlib
-        run: |
-          LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
-          echo "LIBS=${LIBS}" >> $GITHUB_ENV
-
-      - name: Run integration tests
-        run: |
-          for lib in ${{ env.LIBS }}; do
-            echo "Running tests for $lib"
-            CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-            python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
-          done
-        env:
-          LIBS: ${{ env.LIBS }}
-    secrets: inherit
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 2e2a8b6b9bc..1c4f7131699 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -228,3 +228,19 @@ jobs:
         node_type: cpu4
         build_type: pull-request
         run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
+  integration-tests:
+    needs: wheel-build-cudf
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    with:
+      build_type: pull-request
+      node_type: "gpu-v100-latest-1"
+      arch: "amd64"
+      container_image: "rapidsai/ci-conda:latest"
+      run_script: |
+        LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
+        for lib in $LIBS; do
+          echo "Running tests for $lib"
+          CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
+          python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+        done
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 9feea050b19..ce08d372d89 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -124,3 +124,19 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/cudf_pandas_scripts/run_tests.sh
+  integration-tests:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      container_image: "rapidsai/ci-conda:latest"
+      run_script: |
+        LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
+        for lib in $LIBS; do
+          echo "Running tests for $lib"
+          CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
+          python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+        done

From 8bd1378d068f247bc15ad8866d153d5fdca8c8ee Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Sat, 24 Aug 2024 06:11:34 -0700
Subject: [PATCH 05/24] add job to pr bnuilder

---
 .github/workflows/pr.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 1c4f7131699..d173b719f46 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -35,6 +35,7 @@ jobs:
       - unit-tests-cudf-pandas
       - pandas-tests
       - pandas-tests-diff
+      - integration-tests
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
   checks:

From d2a6fc8ef707b4a5bbca7a66626dc30cfb8a8f46 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 26 Aug 2024 05:45:24 -0700
Subject: [PATCH 06/24] preprocess test names

---
 .github/workflows/pr.yaml                                    | 5 ++++-
 .../third_party_integration_tests/ci/extract_lib.sh          | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index d173b719f46..0f64321dbfb 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -240,7 +240,10 @@ jobs:
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
         LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
-        for lib in $LIBS; do
+        LIBS=${LIBS#[}
+        LIBS=${LIBS%]}
+        for lib in ${LIBS//,/ }; do
+          lib=$(echo "$lib" | tr -d '""')
           echo "Running tests for $lib"
           CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
           python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
index 4511363146e..67ec5c773bc 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
@@ -16,7 +16,6 @@ extract_lib_from_dependencies_yaml() {
     # rest
     local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries( select(.key | contains("test_")) ) | keys | map(sub("^test_"; ""))')"
     echo $extracted_libs
-    write_output "LIBS" $extracted_libs
 }
 
 

From de531e2bd1357677a396911cd99927922fad0311 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 26 Aug 2024 07:43:35 -0700
Subject: [PATCH 07/24] Add --config to rdfg

---
 .github/workflows/test.yaml                                  | 5 ++++-
 .../third_party_integration_tests/ci/test.sh                 | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index ce08d372d89..26754b1a1b0 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -135,7 +135,10 @@ jobs:
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
         LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
-        for lib in $LIBS; do
+        LIBS=${LIBS#[}
+        LIBS=${LIBS%]}
+        for lib in ${LIBS//,/ }; do
+          lib=$(echo "$lib" | tr -d '""')
           echo "Running tests for $lib"
           CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
           python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
index a012513b93a..0d66e27e21a 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
@@ -11,6 +11,7 @@ set -euo pipefail
 
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
+  --config "../dependencies.yaml"
   --output conda \
   --file-key test_${LIB} \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

From aba7509c13996b4cee75889bea58df39674d2ddf Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 26 Aug 2024 09:52:14 -0700
Subject: [PATCH 08/24] Change --config arg in rdfg

---
 .../cudf_pandas_tests/third_party_integration_tests/ci/test.sh  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
index 0d66e27e21a..1c32d1d8d3d 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
@@ -11,7 +11,7 @@ set -euo pipefail
 
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
-  --config "../dependencies.yaml"
+  --config "python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml"
   --output conda \
   --file-key test_${LIB} \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

From 555ffd67266678854d1211e5592a061ebe9b98a7 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 26 Aug 2024 11:42:38 -0700
Subject: [PATCH 09/24] continue --output on next line

---
 .../cudf_pandas_tests/third_party_integration_tests/ci/test.sh  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
index 1c32d1d8d3d..5c3ebd0af98 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
@@ -11,7 +11,7 @@ set -euo pipefail
 
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
-  --config "python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml"
+  --config "python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml" \
   --output conda \
   --file-key test_${LIB} \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

From 72c806a0e9124180a71706ad3a16fd1cd68d952b Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 26 Aug 2024 13:29:01 -0700
Subject: [PATCH 10/24] Point to ci script

---
 .../cudf_pandas_tests/third_party_integration_tests/ci/test.sh  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
index 5c3ebd0af98..4734e2382fb 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
@@ -50,7 +50,7 @@ for serial_library in "${serial_libraries[@]}"; do
     fi
 done
 
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/ci_run_library_tests.sh ${LIB}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh ${LIB}
 
 rapids-logger "Test script exiting with value: ${EXITCODE}"
 exit ${EXITCODE}

From e11bff682fd8be9ae135527470bb888537b5feb4 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Mon, 26 Aug 2024 18:31:01 -0700
Subject: [PATCH 11/24] preprend pythonpath to pytest

---
 .../ci/ci_run_library_tests.sh                              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
index d1627d6436b..0ec46c70028 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
@@ -13,7 +13,7 @@ runtest_gold() {
     local lib=$1
     local test_keys=${@:2}
 
-    pytest \
+    PYTHONPATH=python/cudf/cudf_pandas_tests/third_party_integration_tests/tests pytest \
     -v \
     --continue-on-collection-errors \
     --cache-clear \
@@ -28,7 +28,7 @@ runtest_cudf_pandas() {
     local lib=$1
     local test_keys=${@:2}
 
-    pytest \
+    PYTHONPATH=python/cudf/cudf_pandas_tests/third_party_integration_tests/tests pytest \
     -p cudf.pandas \
     -v \
     --continue-on-collection-errors \
@@ -49,7 +49,7 @@ main() {
     runtest_cudf_pandas ${lib} ${test_keys}
 
     # assertion phase
-    pytest \
+    PYTHONPATH=python/cudf/cudf_pandas_tests/third_party_integration_tests/tests pytest \
     --compare \
     -p cudf.pandas \
     -v \

From 8443f55ffeda65e412a0c8dc62c8e8a8a1a0c89a Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 27 Aug 2024 04:28:57 -0700
Subject: [PATCH 12/24] set the test_dir

---
 .../ci/ci_run_library_tests.sh                              | 6 +++---
 .../third_party_integration_tests/ci/test.sh                | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
index 0ec46c70028..d1627d6436b 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
@@ -13,7 +13,7 @@ runtest_gold() {
     local lib=$1
     local test_keys=${@:2}
 
-    PYTHONPATH=python/cudf/cudf_pandas_tests/third_party_integration_tests/tests pytest \
+    pytest \
     -v \
     --continue-on-collection-errors \
     --cache-clear \
@@ -28,7 +28,7 @@ runtest_cudf_pandas() {
     local lib=$1
     local test_keys=${@:2}
 
-    PYTHONPATH=python/cudf/cudf_pandas_tests/third_party_integration_tests/tests pytest \
+    pytest \
     -p cudf.pandas \
     -v \
     --continue-on-collection-errors \
@@ -49,7 +49,7 @@ main() {
     runtest_cudf_pandas ${lib} ${test_keys}
 
     # assertion phase
-    PYTHONPATH=python/cudf/cudf_pandas_tests/third_party_integration_tests/tests pytest \
+    pytest \
     --compare \
     -p cudf.pandas \
     -v \
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
index 4734e2382fb..09caea65004 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
@@ -24,10 +24,11 @@ conda activate test
 set -u
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+echo "The working directory is ${PWD}"
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
 repo_root=$(git rev-parse --show-toplevel)
-TEST_DIR=${repo_root}/tests
+TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
 
 rapids-print-env
 

From 8ffbc2f651340464e90d75931512174bd4638e18 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 27 Aug 2024 06:42:50 -0700
Subject: [PATCH 13/24] xfail pytorch test and move integration tests out of
 cudf_pandas_tests

---
 .github/workflows/pr.yaml                                   | 4 ++--
 .github/workflows/test.yaml                                 | 4 ++--
 .../ci/ci_run_library_tests.sh                              | 0
 .../ci/extract_lib.sh                                       | 0
 .../ci/test.sh                                              | 6 +++---
 .../dependencies.yaml                                       | 0
 .../tests/conftest.py                                       | 0
 .../tests/pytest.ini                                        | 0
 .../tests/test_cugraph.py                                   | 0
 .../tests/test_cuml.py                                      | 0
 .../tests/test_dask.py                                      | 0
 .../tests/test_featureengine.py                             | 0
 .../tests/test_holoviews.py                                 | 0
 .../tests/test_hvplot.py                                    | 0
 .../tests/test_ibis.py                                      | 0
 .../tests/test_matplotlib.py                                | 0
 .../tests/test_numpy.py                                     | 0
 .../tests/test_plotly.py                                    | 0
 .../tests/test_pytorch.py                                   | 2 ++
 .../tests/test_scipy.py                                     | 0
 .../tests/test_seaborn.py                                   | 0
 .../tests/test_sklearn.py                                   | 0
 .../tests/test_stumpy.py                                    | 0
 .../tests/test_stumpy_distributed.py                        | 0
 .../tests/test_tensorflow.py                                | 0
 .../tests/test_xgboost.py                                   | 0
 26 files changed, 9 insertions(+), 7 deletions(-)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/ci/ci_run_library_tests.sh (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/ci/extract_lib.sh (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/ci/test.sh (79%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/dependencies.yaml (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/conftest.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/pytest.ini (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_cugraph.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_cuml.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_dask.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_featureengine.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_holoviews.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_hvplot.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_ibis.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_matplotlib.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_numpy.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_plotly.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_pytorch.py (96%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_scipy.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_seaborn.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_sklearn.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_stumpy.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_stumpy_distributed.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_tensorflow.py (100%)
 rename python/cudf/{cudf_pandas_tests/third_party_integration_tests => cudf_pandas_third_party_integration_tests}/tests/test_xgboost.py (100%)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 0f64321dbfb..459c0fc5189 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -239,12 +239,12 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
+        LIBS=$(python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml)
         LIBS=${LIBS#[}
         LIBS=${LIBS%]}
         for lib in ${LIBS//,/ }; do
           lib=$(echo "$lib" | tr -d '""')
           echo "Running tests for $lib"
           CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-          python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+          python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
         done
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 26754b1a1b0..e7558511ab2 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -134,12 +134,12 @@ jobs:
       sha: ${{ inputs.sha }}
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        LIBS=$(python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
+        LIBS=$(python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml)
         LIBS=${LIBS#[}
         LIBS=${LIBS%]}
         for lib in ${LIBS//,/ }; do
           lib=$(echo "$lib" | tr -d '""')
           echo "Running tests for $lib"
           CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-          python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+          python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
         done
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh b/python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh
rename to python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh b/python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/extract_lib.sh
rename to python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh b/python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh
similarity index 79%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
rename to python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh
index 09caea65004..53c295b7e7a 100755
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/test.sh
+++ b/python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh
@@ -11,7 +11,7 @@ set -euo pipefail
 
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
-  --config "python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml" \
+  --config "python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml" \
   --output conda \
   --file-key test_${LIB} \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
@@ -28,7 +28,7 @@ echo "The working directory is ${PWD}"
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
 repo_root=$(git rev-parse --show-toplevel)
-TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
+TEST_DIR=${repo_root}/python/cudf/cudf_pandas_third_party_integration_tests/tests
 
 rapids-print-env
 
@@ -51,7 +51,7 @@ for serial_library in "${serial_libraries[@]}"; do
     fi
 done
 
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} python/cudf/cudf_pandas_tests/third_party_integration_tests/ci/ci_run_library_tests.sh ${LIB}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh ${LIB}
 
 rapids-logger "Test script exiting with value: ${EXITCODE}"
 exit ${EXITCODE}
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
rename to python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/conftest.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/conftest.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini b/python/cudf/cudf_pandas_third_party_integration_tests/tests/pytest.ini
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/pytest.ini
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cugraph.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cugraph.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cuml.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cuml.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_dask.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_dask.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_featureengine.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_featureengine.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_holoviews.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_holoviews.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_hvplot.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_hvplot.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_ibis.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_ibis.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_matplotlib.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_matplotlib.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_numpy.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_numpy.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_plotly.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_plotly.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_pytorch.py
similarity index 96%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_pytorch.py
index ad287471aa0..ae9db3836a6 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
+++ b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_pytorch.py
@@ -121,6 +121,8 @@ def test_torch_tensor_ctor():
     return torch.tensor(s.values)
 
 
+@pytest.mark.xfail_cudf_pandas(reason="Known failure, see xdf/#210")
+@pytest.mark.xfail_compare
 def test_torch_tensor_from_numpy():
     s = pd.Series(range(5))
     return torch.from_numpy(s.values)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_scipy.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_scipy.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_seaborn.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_seaborn.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_sklearn.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_sklearn.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy_distributed.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy_distributed.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_tensorflow.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_tensorflow.py
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_xgboost.py
similarity index 100%
rename from python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
rename to python/cudf/cudf_pandas_third_party_integration_tests/tests/test_xgboost.py

From ef773c5bb712c12ebdbefc34f979edbdacb76d4e Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 27 Aug 2024 12:15:02 -0700
Subject: [PATCH 14/24] mrefactor

---
 .github/workflows/pr.yaml                                 | 6 +++---
 .github/workflows/test.yaml                               | 4 ++--
 ci/cudf_pandas_scripts/run_tests.sh                       | 2 ++
 .../third-party-integration}/ci_run_library_tests.sh      | 2 +-
 .../third-party-integration}/extract_lib.sh               | 0
 .../cudf_pandas_scripts/third-party-integration}/test.sh  | 8 ++++----
 .../third_party_integration_tests}/dependencies.yaml      | 0
 .../third_party_integration_tests}/tests/conftest.py      | 0
 .../third_party_integration_tests}/tests/pytest.ini       | 0
 .../third_party_integration_tests}/tests/test_cugraph.py  | 0
 .../third_party_integration_tests}/tests/test_cuml.py     | 0
 .../third_party_integration_tests}/tests/test_dask.py     | 0
 .../tests/test_featureengine.py                           | 0
 .../tests/test_holoviews.py                               | 0
 .../third_party_integration_tests}/tests/test_hvplot.py   | 0
 .../third_party_integration_tests}/tests/test_ibis.py     | 0
 .../tests/test_matplotlib.py                              | 0
 .../third_party_integration_tests}/tests/test_numpy.py    | 0
 .../third_party_integration_tests}/tests/test_plotly.py   | 0
 .../third_party_integration_tests}/tests/test_pytorch.py  | 0
 .../third_party_integration_tests}/tests/test_scipy.py    | 0
 .../third_party_integration_tests}/tests/test_seaborn.py  | 0
 .../third_party_integration_tests}/tests/test_sklearn.py  | 0
 .../third_party_integration_tests}/tests/test_stumpy.py   | 0
 .../tests/test_stumpy_distributed.py                      | 0
 .../tests/test_tensorflow.py                              | 0
 .../third_party_integration_tests}/tests/test_xgboost.py  | 0
 27 files changed, 12 insertions(+), 10 deletions(-)
 rename {python/cudf/cudf_pandas_third_party_integration_tests/ci => ci/cudf_pandas_scripts/third-party-integration}/ci_run_library_tests.sh (97%)
 rename {python/cudf/cudf_pandas_third_party_integration_tests/ci => ci/cudf_pandas_scripts/third-party-integration}/extract_lib.sh (100%)
 rename {python/cudf/cudf_pandas_third_party_integration_tests/ci => ci/cudf_pandas_scripts/third-party-integration}/test.sh (78%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/dependencies.yaml (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/conftest.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/pytest.ini (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_cugraph.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_cuml.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_dask.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_featureengine.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_holoviews.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_hvplot.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_ibis.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_matplotlib.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_numpy.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_plotly.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_pytorch.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_scipy.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_seaborn.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_sklearn.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_stumpy.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_stumpy_distributed.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_tensorflow.py (100%)
 rename python/cudf/{cudf_pandas_third_party_integration_tests => cudf_pandas_tests/third_party_integration_tests}/tests/test_xgboost.py (100%)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 459c0fc5189..7c7fef53d9a 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -229,7 +229,7 @@ jobs:
         node_type: cpu4
         build_type: pull-request
         run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
-  integration-tests:
+  third-party-integration-tests-cudf-pandas:
     needs: wheel-build-cudf
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
@@ -239,12 +239,12 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        LIBS=$(python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml)
+        LIBS=$(ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
         LIBS=${LIBS#[}
         LIBS=${LIBS%]}
         for lib in ${LIBS//,/ }; do
           lib=$(echo "$lib" | tr -d '""')
           echo "Running tests for $lib"
           CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-          python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+          ci/cudf_pandas_scripts/third-party-integration/test.sh $lib $CUDA_MAJOR
         done
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index e7558511ab2..10e0ed2bd9f 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -134,12 +134,12 @@ jobs:
       sha: ${{ inputs.sha }}
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        LIBS=$(python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml)
+        LIBS=$(ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
         LIBS=${LIBS#[}
         LIBS=${LIBS%]}
         for lib in ${LIBS//,/ }; do
           lib=$(echo "$lib" | tr -d '""')
           echo "Running tests for $lib"
           CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-          python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh $lib $CUDA_MAJOR
+          ci/cudf_pandas_scripts/third-party-integration/test.sh $lib $CUDA_MAJOR
         done
diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index 39056d58d56..38cc785af00 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -62,6 +62,7 @@ else
 fi
 
 python -m pytest -p cudf.pandas \
+    --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
     --cov-config=./python/cudf/.coveragerc \
     --cov=cudf \
     --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
@@ -77,6 +78,7 @@ for version in "${versions[@]}"; do
     echo "Installing pandas version: ${version}"
     python -m pip install "numpy>=1.23,<2.0a0" "pandas==${version}"
     python -m pytest -p cudf.pandas \
+    --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
     --cov-config=./python/cudf/.coveragerc \
     --cov=cudf \
     --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh b/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
similarity index 97%
rename from python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh
rename to ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
index d1627d6436b..678ee36b3d9 100755
--- a/python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 cleanup() {
-    rm tests/results-*.pickle
+    rm ${TEST_DIR}/results-*.pickle
 }
 
 trap cleanup EXIT
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh b/ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/ci/extract_lib.sh
rename to ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh
similarity index 78%
rename from python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh
rename to ci/cudf_pandas_scripts/third-party-integration/test.sh
index 53c295b7e7a..6c1c7efe489 100755
--- a/python/cudf/cudf_pandas_third_party_integration_tests/ci/test.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh
@@ -11,7 +11,7 @@ set -euo pipefail
 
 rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
-  --config "python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml" \
+  --config "python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml" \
   --output conda \
   --file-key test_${LIB} \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
@@ -24,11 +24,11 @@ conda activate test
 set -u
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
-echo "The working directory is ${PWD}"
+
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
 repo_root=$(git rev-parse --show-toplevel)
-TEST_DIR=${repo_root}/python/cudf/cudf_pandas_third_party_integration_tests/tests
+TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
 
 rapids-print-env
 
@@ -51,7 +51,7 @@ for serial_library in "${serial_libraries[@]}"; do
     fi
 done
 
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} python/cudf/cudf_pandas_third_party_integration_tests/ci/ci_run_library_tests.sh ${LIB}
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${LIB}
 
 rapids-logger "Test script exiting with value: ${EXITCODE}"
 exit ${EXITCODE}
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/dependencies.yaml
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/conftest.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/conftest.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/pytest.ini b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/pytest.ini
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/pytest.ini
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cugraph.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cugraph.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cuml.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_cuml.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_dask.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_dask.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_dask.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_featureengine.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_featureengine.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_featureengine.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_holoviews.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_hvplot.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_hvplot.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_hvplot.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_ibis.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_ibis.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_ibis.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_matplotlib.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_numpy.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_plotly.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_plotly.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_plotly.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_pytorch.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_scipy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_scipy.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_scipy.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_seaborn.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_sklearn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_sklearn.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_sklearn.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_stumpy_distributed.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_tensorflow.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
diff --git a/python/cudf/cudf_pandas_third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
similarity index 100%
rename from python/cudf/cudf_pandas_third_party_integration_tests/tests/test_xgboost.py
rename to python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py

From a92ef3f09576b7f5306154c176aa63e4eae85b51 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 27 Aug 2024 13:23:58 -0700
Subject: [PATCH 15/24] change job name to match pr.ymal

---
 .github/workflows/test.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 10e0ed2bd9f..dcd9ade932c 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -124,7 +124,7 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/cudf_pandas_scripts/run_tests.sh
-  integration-tests:
+  third-party-integration-tests-cudf-pandas:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
     with:

From 7245f768f8779876cbc0c25f07b5ae9ab48e4fad Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 27 Aug 2024 16:36:49 -0400
Subject: [PATCH 16/24] Update pr.yaml

---
 .github/workflows/pr.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 222e276bd11..19fea53e22b 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -36,7 +36,7 @@ jobs:
       - unit-tests-cudf-pandas
       - pandas-tests
       - pandas-tests-diff
-      - integration-tests
+      - third-party-integration-tests-cudf-pandas
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
     if: always()

From 9855a0ceb4cf7d5c77a7699c144c0b473fdfad24 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Tue, 27 Aug 2024 18:40:43 -0700
Subject: [PATCH 17/24] merge extract_lib.sh and test.sh

---
 .github/workflows/pr.yaml                     | 10 +--
 .github/workflows/test.yaml                   | 10 +--
 .../third-party-integration/test_new.sh       | 87 +++++++++++++++++++
 3 files changed, 89 insertions(+), 18 deletions(-)
 create mode 100644 ci/cudf_pandas_scripts/third-party-integration/test_new.sh

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 222e276bd11..8578a71ce7c 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -307,12 +307,4 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        LIBS=$(ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
-        LIBS=${LIBS#[}
-        LIBS=${LIBS%]}
-        for lib in ${LIBS//,/ }; do
-          lib=$(echo "$lib" | tr -d '""')
-          echo "Running tests for $lib"
-          CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-          ci/cudf_pandas_scripts/third-party-integration/test.sh $lib $CUDA_MAJOR
-        done
+        ci/cudf_pandas_scripts/third-party-integration/test_new.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index dcd9ade932c..f499e55a713 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -134,12 +134,4 @@ jobs:
       sha: ${{ inputs.sha }}
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        LIBS=$(ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml)
-        LIBS=${LIBS#[}
-        LIBS=${LIBS%]}
-        for lib in ${LIBS//,/ }; do
-          lib=$(echo "$lib" | tr -d '""')
-          echo "Running tests for $lib"
-          CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-          ci/cudf_pandas_scripts/third-party-integration/test.sh $lib $CUDA_MAJOR
-        done
+        ci/cudf_pandas_scripts/third-party-integration/test_new.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/ci/cudf_pandas_scripts/third-party-integration/test_new.sh b/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
new file mode 100644
index 00000000000..c0937ceeb7c
--- /dev/null
+++ b/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+# Common setup steps shared by Python test jobs
+
+set -euo pipefail
+
+write_output() {
+  local key="$1"
+  local value="$2"
+  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
+}
+
+extract_lib_from_dependencies_yaml() {
+    local file=$1
+    # Parse all keys in dependencies.yaml under the "files" section,
+    # extract all the keys that start with "test_", and extract the rest
+    local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries(select(.key | contains("test_"))) | keys | map(sub("^test_"; ""))')"
+    echo $extracted_libs
+}
+
+main() {
+    local dependencies_yaml="$1"
+
+    LIBS=$(extract_lib_from_dependencies_yaml "$dependencies_yaml")
+    LIBS=${LIBS#[}
+    LIBS=${LIBS%]}
+
+    for lib in ${LIBS//,/ }; do
+        lib=$(echo "$lib" | tr -d '""')
+        echo "Running tests for $lib"
+
+        CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
+
+        . /opt/conda/etc/profile.d/conda.sh
+
+        rapids-logger "Generate Python testing dependencies"
+        rapids-dependency-file-generator \
+          --config "$dependencies_yaml" \
+          --output conda \
+          --file-key test_${lib} \
+          --matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+        rapids-mamba-retry env create --yes -f env.yaml -n test
+
+        # Temporarily allow unbound variables for conda activation.
+        set +u
+        conda activate test
+        set -u
+
+        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+
+        mkdir -p "${RAPIDS_TESTS_DIR}"
+
+        repo_root=$(git rev-parse --show-toplevel)
+        TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
+
+        rapids-print-env
+
+        rapids-logger "Check GPU usage"
+        nvidia-smi
+
+        EXITCODE=0
+        trap "EXITCODE=1" ERR
+        set +e
+
+        rapids-logger "pytest ${lib}"
+
+        NUM_PROCESSES=8
+        serial_libraries=(
+            "tensorflow"
+        )
+        for serial_library in "${serial_libraries[@]}"; do
+            if [ "${lib}" = "${serial_library}" ]; then
+                NUM_PROCESSES=1
+            fi
+        done
+
+        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
+
+        rapids-logger "Test script exiting with value: ${EXITCODE}"
+    done
+
+    exit ${EXITCODE}
+}
+
+main "$@"

From 560eb827ff5e918d300378514a98535463f9a810 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 28 Aug 2024 05:15:36 -0700
Subject: [PATCH 18/24] chmod test.sh

---
 ci/cudf_pandas_scripts/third-party-integration/test_new.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/cudf_pandas_scripts/third-party-integration/test_new.sh b/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
index c0937ceeb7c..61370a1dfdb 100644
--- a/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
@@ -28,7 +28,7 @@ main() {
 
     for lib in ${LIBS//,/ }; do
         lib=$(echo "$lib" | tr -d '""')
-        echo "Running tests for $lib"
+        echo "Running tests for library $lib"
 
         CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
 

From 609313c0117a5cd76589696efc4839f0d15100e2 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 28 Aug 2024 06:46:03 -0700
Subject: [PATCH 19/24] remove extract_lib.sh

---
 .github/workflows/pr.yaml                     |   2 +-
 .github/workflows/test.yaml                   |   2 +-
 .../third-party-integration/extract_lib.sh    |  27 -----
 .../third-party-integration/test.sh           | 104 +++++++++++-------
 .../third-party-integration/test_new.sh       |  87 ---------------
 5 files changed, 69 insertions(+), 153 deletions(-)
 delete mode 100755 ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh
 delete mode 100644 ci/cudf_pandas_scripts/third-party-integration/test_new.sh

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 41c71d30a0c..d55207dd0dd 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -307,4 +307,4 @@ jobs:
       arch: "amd64"
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        ci/cudf_pandas_scripts/third-party-integration/test_new.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+        ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f499e55a713..2c68f2861bb 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -134,4 +134,4 @@ jobs:
       sha: ${{ inputs.sha }}
       container_image: "rapidsai/ci-conda:latest"
       run_script: |
-        ci/cudf_pandas_scripts/third-party-integration/test_new.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+        ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh b/ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh
deleted file mode 100755
index 67ec5c773bc..00000000000
--- a/ci/cudf_pandas_scripts/third-party-integration/extract_lib.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-set -euo pipefail
-
-write_output() {
-  local key="$1"
-  local value="$2"
-  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
-}
-
-extract_lib_from_dependencies_yaml() {
-    local file=$1
-    # Parse all keys in dependencies.yaml under the "files" section,
-    # extract all the keys that starts with "test_", and extract the
-    # rest
-    local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries( select(.key | contains("test_")) ) | keys | map(sub("^test_"; ""))')"
-    echo $extracted_libs
-}
-
-
-main() {
-    local dependencies_yaml="$1"
-    extract_lib_from_dependencies_yaml "$dependencies_yaml"
-}
-
-main "$@"
diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh
index 6c1c7efe489..61370a1dfdb 100755
--- a/ci/cudf_pandas_scripts/third-party-integration/test.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh
@@ -3,55 +3,85 @@
 
 # Common setup steps shared by Python test jobs
 
-LIB=$1
-
 set -euo pipefail
 
-. /opt/conda/etc/profile.d/conda.sh
+write_output() {
+  local key="$1"
+  local value="$2"
+  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
+}
+
+extract_lib_from_dependencies_yaml() {
+    local file=$1
+    # Parse all keys in dependencies.yaml under the "files" section,
+    # extract all the keys that start with "test_", and extract the rest
+    local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries(select(.key | contains("test_"))) | keys | map(sub("^test_"; ""))')"
+    echo $extracted_libs
+}
+
+main() {
+    local dependencies_yaml="$1"
+
+    LIBS=$(extract_lib_from_dependencies_yaml "$dependencies_yaml")
+    LIBS=${LIBS#[}
+    LIBS=${LIBS%]}
+
+    for lib in ${LIBS//,/ }; do
+        lib=$(echo "$lib" | tr -d '""')
+        echo "Running tests for library $lib"
+
+        CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
+
+        . /opt/conda/etc/profile.d/conda.sh
+
+        rapids-logger "Generate Python testing dependencies"
+        rapids-dependency-file-generator \
+          --config "$dependencies_yaml" \
+          --output conda \
+          --file-key test_${lib} \
+          --matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+
+        rapids-mamba-retry env create --yes -f env.yaml -n test
 
-rapids-logger "Generate Python testing dependencies"
-rapids-dependency-file-generator \
-  --config "python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml" \
-  --output conda \
-  --file-key test_${LIB} \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+        # Temporarily allow unbound variables for conda activation.
+        set +u
+        conda activate test
+        set -u
 
-rapids-mamba-retry env create --yes -f env.yaml -n test
+        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 
-# Temporarily allow unbound variables for conda activation.
-set +u
-conda activate test
-set -u
+        mkdir -p "${RAPIDS_TESTS_DIR}"
 
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+        repo_root=$(git rev-parse --show-toplevel)
+        TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
 
-mkdir -p "${RAPIDS_TESTS_DIR}"
+        rapids-print-env
 
-repo_root=$(git rev-parse --show-toplevel)
-TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
+        rapids-logger "Check GPU usage"
+        nvidia-smi
 
-rapids-print-env
+        EXITCODE=0
+        trap "EXITCODE=1" ERR
+        set +e
 
-rapids-logger "Check GPU usage"
-nvidia-smi
+        rapids-logger "pytest ${lib}"
 
-EXITCODE=0
-trap "EXITCODE=1" ERR
-set +e
+        NUM_PROCESSES=8
+        serial_libraries=(
+            "tensorflow"
+        )
+        for serial_library in "${serial_libraries[@]}"; do
+            if [ "${lib}" = "${serial_library}" ]; then
+                NUM_PROCESSES=1
+            fi
+        done
 
-rapids-logger "pytest ${LIB}"
+        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
 
-NUM_PROCESSES=8
-serial_libraries=(
-    "tensorflow"
-)
-for serial_library in "${serial_libraries[@]}"; do
-    if [ "${LIB}" = "${serial_library}" ]; then
-        NUM_PROCESSES=1
-    fi
-done
+        rapids-logger "Test script exiting with value: ${EXITCODE}"
+    done
 
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${LIB}
+    exit ${EXITCODE}
+}
 
-rapids-logger "Test script exiting with value: ${EXITCODE}"
-exit ${EXITCODE}
+main "$@"
diff --git a/ci/cudf_pandas_scripts/third-party-integration/test_new.sh b/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
deleted file mode 100644
index 61370a1dfdb..00000000000
--- a/ci/cudf_pandas_scripts/third-party-integration/test_new.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-# Common setup steps shared by Python test jobs
-
-set -euo pipefail
-
-write_output() {
-  local key="$1"
-  local value="$2"
-  echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}"
-}
-
-extract_lib_from_dependencies_yaml() {
-    local file=$1
-    # Parse all keys in dependencies.yaml under the "files" section,
-    # extract all the keys that start with "test_", and extract the rest
-    local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries(select(.key | contains("test_"))) | keys | map(sub("^test_"; ""))')"
-    echo $extracted_libs
-}
-
-main() {
-    local dependencies_yaml="$1"
-
-    LIBS=$(extract_lib_from_dependencies_yaml "$dependencies_yaml")
-    LIBS=${LIBS#[}
-    LIBS=${LIBS%]}
-
-    for lib in ${LIBS//,/ }; do
-        lib=$(echo "$lib" | tr -d '""')
-        echo "Running tests for library $lib"
-
-        CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi)
-
-        . /opt/conda/etc/profile.d/conda.sh
-
-        rapids-logger "Generate Python testing dependencies"
-        rapids-dependency-file-generator \
-          --config "$dependencies_yaml" \
-          --output conda \
-          --file-key test_${lib} \
-          --matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
-
-        rapids-mamba-retry env create --yes -f env.yaml -n test
-
-        # Temporarily allow unbound variables for conda activation.
-        set +u
-        conda activate test
-        set -u
-
-        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
-
-        mkdir -p "${RAPIDS_TESTS_DIR}"
-
-        repo_root=$(git rev-parse --show-toplevel)
-        TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
-
-        rapids-print-env
-
-        rapids-logger "Check GPU usage"
-        nvidia-smi
-
-        EXITCODE=0
-        trap "EXITCODE=1" ERR
-        set +e
-
-        rapids-logger "pytest ${lib}"
-
-        NUM_PROCESSES=8
-        serial_libraries=(
-            "tensorflow"
-        )
-        for serial_library in "${serial_libraries[@]}"; do
-            if [ "${lib}" = "${serial_library}" ]; then
-                NUM_PROCESSES=1
-            fi
-        done
-
-        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
-
-        rapids-logger "Test script exiting with value: ${EXITCODE}"
-    done
-
-    exit ${EXITCODE}
-}
-
-main "$@"

From 0922bfe4ea0a26318b97c0a481c02f84f454fa94 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 28 Aug 2024 08:53:15 -0700
Subject: [PATCH 20/24] default to 11.8 and 12.5

---
 .../third_party_integration_tests/dependencies.yaml       | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
index c894e0cfb2d..05e1d8178d5 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -135,6 +135,10 @@ dependencies:
     specific:
       - output_types: conda
         matrices:
+          - matrix:
+              cuda: "11"
+            packages:
+              - cuda-version=11.8
           - matrix:
               cuda: "11.8"
             packages:
@@ -151,6 +155,10 @@ dependencies:
               cuda: "12.5"
             packages:
               - cuda-version=12.5
+          - matrix:
+              cuda: "12"
+            packages:
+              - cuda-version=12.5
   py_version:
     specific:
       - output_types: conda

From 9561ea2d2fe87151eb7bae80393572b6ecb00478 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 28 Aug 2024 15:29:46 -0700
Subject: [PATCH 21/24] remove some pytest flags

---
 .../third-party-integration/ci_run_library_tests.sh         | 3 ---
 ci/cudf_pandas_scripts/third-party-integration/test.sh      | 6 +-----
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh b/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
index 678ee36b3d9..f082c4dfa58 100755
--- a/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
@@ -17,7 +17,6 @@ runtest_gold() {
     -v \
     --continue-on-collection-errors \
     --cache-clear \
-    --junitxml="${RAPIDS_TESTS_DIR}/junit-${lib}-gold.xml" \
     --numprocesses=${NUM_PROCESSES} \
     --dist=worksteal \
     ${TEST_DIR}/test_${lib}*.py \
@@ -33,7 +32,6 @@ runtest_cudf_pandas() {
     -v \
     --continue-on-collection-errors \
     --cache-clear \
-    --junitxml="${RAPIDS_TESTS_DIR}/junit-${lib}-cudf-pandas.xml" \
     --numprocesses=${NUM_PROCESSES} \
     --dist=worksteal \
     ${TEST_DIR}/test_${lib}*.py \
@@ -55,7 +53,6 @@ main() {
     -v \
     --continue-on-collection-errors \
     --cache-clear \
-    --junitxml="${RAPIDS_TESTS_DIR}/junit-${lib}-assertion.xml" \
     --numprocesses=${NUM_PROCESSES} \
     --dist=worksteal \
     ${TEST_DIR}/test_${lib}*.py \
diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh
index 61370a1dfdb..89b28c30e39 100755
--- a/ci/cudf_pandas_scripts/third-party-integration/test.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh
@@ -48,10 +48,6 @@ main() {
         conda activate test
         set -u
 
-        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
-
-        mkdir -p "${RAPIDS_TESTS_DIR}"
-
         repo_root=$(git rev-parse --show-toplevel)
         TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests
 
@@ -76,7 +72,7 @@ main() {
             fi
         done
 
-        RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR} TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
+        TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh ${lib}
 
         rapids-logger "Test script exiting with value: ${EXITCODE}"
     done

From ef3dc723148f92d683b40d3cc80e03a56b4326b4 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 28 Aug 2024 17:58:23 -0700
Subject: [PATCH 22/24] remove test keys

---
 .../ci_run_library_tests.sh                      | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh b/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
index f082c4dfa58..54a56508cdc 100755
--- a/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/ci_run_library_tests.sh
@@ -11,7 +11,6 @@ trap cleanup EXIT
 
 runtest_gold() {
     local lib=$1
-    local test_keys=${@:2}
 
     pytest \
     -v \
@@ -19,13 +18,11 @@ runtest_gold() {
     --cache-clear \
     --numprocesses=${NUM_PROCESSES} \
     --dist=worksteal \
-    ${TEST_DIR}/test_${lib}*.py \
-    ${test_keys}
+    ${TEST_DIR}/test_${lib}*.py
 }
 
 runtest_cudf_pandas() {
     local lib=$1
-    local test_keys=${@:2}
 
     pytest \
     -p cudf.pandas \
@@ -34,17 +31,15 @@ runtest_cudf_pandas() {
     --cache-clear \
     --numprocesses=${NUM_PROCESSES} \
     --dist=worksteal \
-    ${TEST_DIR}/test_${lib}*.py \
-    ${test_keys}
+    ${TEST_DIR}/test_${lib}*.py
 }
 
 main() {
     local lib=$1
-    local test_keys=${@:2}
 
     # generation phase
-    runtest_gold ${lib} ${test_keys}
-    runtest_cudf_pandas ${lib} ${test_keys}
+    runtest_gold ${lib}
+    runtest_cudf_pandas ${lib}
 
     # assertion phase
     pytest \
@@ -55,8 +50,7 @@ main() {
     --cache-clear \
     --numprocesses=${NUM_PROCESSES} \
     --dist=worksteal \
-    ${TEST_DIR}/test_${lib}*.py \
-    ${test_keys}
+    ${TEST_DIR}/test_${lib}*.py
 }
 
 main $@

From 9c10daee1e01e93f769496bf87773e03c44add00 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Thu, 29 Aug 2024 15:33:02 -0700
Subject: [PATCH 23/24] address review

---
 .github/workflows/pr.yaml           | 11 -----------
 ci/cudf_pandas_scripts/run_tests.sh |  1 +
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index d55207dd0dd..c43c523a78e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -297,14 +297,3 @@ jobs:
         node_type: cpu4
         build_type: pull-request
         run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
-  third-party-integration-tests-cudf-pandas:
-    needs: wheel-build-cudf
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10
-    with:
-      build_type: pull-request
-      node_type: "gpu-v100-latest-1"
-      arch: "amd64"
-      container_image: "rapidsai/ci-conda:latest"
-      run_script: |
-        ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index bf618a48001..8b85695c861 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -64,6 +64,7 @@ fi
 python -m pip install ipykernel
 python -m ipykernel install --user --name python3
 
+# The third-party integration tests are ignored because they are run nightly in seperate CI job
 python -m pytest -p cudf.pandas \
     --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \
     --cov-config=./python/cudf/.coveragerc \

From 1afea54128126a9498fe865df92f7bb4e75970a2 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Thu, 29 Aug 2024 15:34:22 -0700
Subject: [PATCH 24/24] remove job completeyly from pr.yaml

---
 .github/workflows/pr.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index c43c523a78e..35c7e3d95b6 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -36,7 +36,6 @@ jobs:
       - unit-tests-cudf-pandas
       - pandas-tests
       - pandas-tests-diff
-      - third-party-integration-tests-cudf-pandas
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10
     if: always()