Skip to content

Commit

Permalink
Enable pandas pytests for cudf.pandas (#15147)
Browse files Browse the repository at this point in the history
This PR enables `cudf.pandas` pandas pytest suite.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #15147
  • Loading branch information
galipremsagar authored Mar 12, 2024
1 parent 241825a commit d48b904
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 32 deletions.
25 changes: 12 additions & 13 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
- wheel-tests-dask-cudf
- devcontainer
- unit-tests-cudf-pandas
# - pandas-tests
- pandas-tests
#- pandas-tests-diff
#- pandas-tests-diff-comment
secrets: inherit
Expand Down Expand Up @@ -156,21 +156,20 @@ jobs:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
# This selects "ARCH=amd64 + the latest supported Python + CUDA".
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/run_tests.sh
# pandas-tests:
# # run the Pandas unit tests using PR branch
# needs: wheel-build-cudf
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
# build_type: pull-request
# script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
# test_summary_show: "none"
pandas-tests:
# run the Pandas unit tests using PR branch
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: pull-request
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
# Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
test_summary_show: "none"
#pandas-tests-diff:
# # diff the results of running the Pandas unit tests and publish a job summary
# needs: [pandas-tests-main, pandas-tests-pr]
Expand Down
24 changes: 12 additions & 12 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,15 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
script: ci/cudf_pandas_scripts/run_tests.sh
# pandas-tests:
# # run the Pandas unit tests
# secrets: inherit
# uses: rapidsai/shared-workflows/.github/workflows/[email protected]
# with:
# matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
# build_type: nightly
# branch: ${{ inputs.branch }}
# date: ${{ inputs.date }}
# sha: ${{ inputs.sha }}
# # pr mode uses the HEAD of the branch, which is also correct for nightlies
# script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
pandas-tests:
# run the Pandas unit tests
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
# pr mode uses the HEAD of the branch, which is also correct for nightlies
script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
3 changes: 2 additions & 1 deletion ci/cudf_pandas_scripts/pandas-tests/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,11 @@ mkdir -p "${RAPIDS_TESTS_DIR}"

bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
-n 10 \
--tb=line \
--tb=no \
-m "not slow" \
--max-worker-restart=3 \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
--dist worksteal \
--report-log=${PANDAS_TESTS_BRANCH}.json 2>&1

# summarize the results and save them to artifacts:
Expand Down
7 changes: 5 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -717,8 +717,11 @@ dependencies:
packages:
# dependencies to run pandas tests
# https://github.com/pandas-dev/pandas/blob/main/environment.yml
# pandas[all] includes all of the required dependencies
- pandas[all]
# pandas[...] includes all of the required dependencies.
# Intentionally excluding `postgresql` because of
# installation issues with `psycopg2`.
- pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression]
- pytest-reportlog
test_python_cudf_pandas:
common:
- output_types: pyproject
Expand Down
14 changes: 12 additions & 2 deletions python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,18 @@ set -euo pipefail
# of Pandas installed.
PANDAS_VERSION=$(python -c "import pandas; print(pandas.__version__)")

PYTEST_IGNORES="--ignore=tests/io/test_user_agent.py --ignore=tests/interchange/test_impl.py --ignore=tests/window/test_dtypes.py --ignore=tests/strings/test_api.py --ignore=tests/window/test_numba.py"
PYTEST_IGNORES="--ignore=tests/io/test_user_agent.py \
--ignore=tests/interchange/test_impl.py \
--ignore=tests/window/test_dtypes.py \
--ignore=tests/strings/test_api.py \
--ignore=tests/window/test_numba.py \
--ignore=tests/window \
--ignore=tests/io/pytables \
--ignore=tests/plotting \
--ignore=tests/scalar \
--ignore=tests/series/test_arithmetic.py \
--ignore=tests/tslibs/test_parsing.py \
--ignore=tests/io/parser/common/test_read_errors.py"

mkdir -p pandas-testing
cd pandas-testing
Expand Down Expand Up @@ -185,7 +196,6 @@ and not test_numpy_ufuncs_basic[nullable_float-rad2deg]"
PANDAS_CI="1" python -m pytest -p cudf.pandas \
-v -m "not single_cpu and not db" \
-k "not test_overwrite_warns and not test_complex_series_frame_alignment and not test_to_parquet_gcs_new_file and not test_qcut_nat and not test_add and not test_ismethods and $TEST_NUMPY_UFUNCS_BASIC_FLAKY" \
--durations=50 \
--import-mode=importlib \
-o xfail_strict=True \
${PYTEST_IGNORES} \
Expand Down
5 changes: 3 additions & 2 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

[build-system]
build-backend = "scikit_build_core.build"
Expand Down Expand Up @@ -69,7 +69,8 @@ test = [
"tzdata",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
pandas-tests = [
"pandas[all]",
"pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression]",
"pytest-reportlog",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
cudf-pandas-tests = [
"ipython",
Expand Down

0 comments on commit d48b904

Please sign in to comment.