From cc23474129433d7700058c311be5154340a6bce3 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 8 Oct 2024 17:12:10 +0100 Subject: [PATCH] Turn on `xfail_strict = true` for all python packages (#16977) The cudf tests already treat tests that are expected to fail but pass as errors, but at the time we introduced that change, we didn't do the same for the other packages. Do that now, it turns out there are only a few xpassing tests. While here, it turns out that having multiple different pytest configuration files does not work. `pytest.ini` takes precedence over other options, and it's "first file wins". Consequently, the merge of #16851 turned off `xfail_strict = true` (and other options) for many of the subpackages. To fix this, migrate all pytest configuration into the appropriate section of the `pyproject.toml` files, so that all tool configuration lives in the same place. We also add a section in the developer guide to document this choice. - Closes #12391 - Closes #16974 Authors: - Lawrence Mitchell (https://github.com/wence-) Approvers: - James Lamb (https://github.com/jameslamb) - Matthew Roeschke (https://github.com/mroeschke) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16977 --- docs/cudf/source/developer_guide/testing.md | 17 ++++++++ python/cudf/cudf/tests/pytest.ini | 19 --------- python/cudf/cudf_pandas_tests/pytest.ini | 9 ++++ .../cudf_pandas_tests/test_cudf_pandas.py | 41 +++++++++++-------- python/cudf/pyproject.toml | 21 ++++++++++ python/cudf_kafka/cudf_kafka/tests/pytest.ini | 4 -- python/cudf_kafka/pyproject.toml | 3 ++ python/cudf_polars/pyproject.toml | 5 +++ python/cudf_polars/tests/pytest.ini | 4 -- python/custreamz/custreamz/tests/pytest.ini | 4 -- .../custreamz/tests/test_dataframes.py | 18 +++----- python/custreamz/pyproject.toml | 6 +++ python/dask_cudf/dask_cudf/tests/pytest.ini | 4 -- python/dask_cudf/pyproject.toml | 3 ++ python/pylibcudf/pylibcudf/tests/pytest.ini | 9 ---- python/pylibcudf/pyproject.toml | 10 +++++ 16 files changed, 103 insertions(+), 74 deletions(-) delete mode 100644 python/cudf/cudf/tests/pytest.ini create mode 100644 python/cudf/cudf_pandas_tests/pytest.ini delete mode 100644 python/cudf_kafka/cudf_kafka/tests/pytest.ini delete mode 100644 python/cudf_polars/tests/pytest.ini delete mode 100644 python/custreamz/custreamz/tests/pytest.ini delete mode 100644 python/dask_cudf/dask_cudf/tests/pytest.ini delete mode 100644 python/pylibcudf/pylibcudf/tests/pytest.ini diff --git a/docs/cudf/source/developer_guide/testing.md b/docs/cudf/source/developer_guide/testing.md index f12f809d5db..22cc1b5b8de 100644 --- a/docs/cudf/source/developer_guide/testing.md +++ b/docs/cudf/source/developer_guide/testing.md @@ -7,6 +7,23 @@ specifically the [`pytest-cov`](https://github.com/pytest-dev/pytest-cov) plugin Code coverage reports are uploaded to [Codecov](https://app.codecov.io/gh/rapidsai/cudf). Each PR also indicates whether it increases or decreases test coverage. +### Configuring pytest + +Pytest will accept configuration in [multiple different +files](https://docs.pytest.org/en/stable/reference/customize.html), +with a specified discovery and precedence order. Note in particular +that there is no automatic "include" mechanism, as soon as a matching +configuration file is found, discovery stops. + +For preference, so that all tool configuration lives in the same +place, we use `pyproject.toml`-based configuration. Test configuration +for a given package should live in that package's `pyproject.toml` +file. + +Where tests do not naturally belong to a project, for example the +`cudf.pandas` integration tests and the cuDF benchmarks, use a +`pytest.ini` file as close to the tests as possible. + ## Test organization How tests are organized depends on which of the following two groups they fall into: diff --git a/python/cudf/cudf/tests/pytest.ini b/python/cudf/cudf/tests/pytest.ini deleted file mode 100644 index 496a322ff80..00000000000 --- a/python/cudf/cudf/tests/pytest.ini +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. - -[pytest] -markers = - spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON` -xfail_strict = true -filterwarnings = - error - ignore:::.*xdist.* - ignore:::.*pytest.* - # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow() - ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore - # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+ - ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning - # PerformanceWarning from cupy warming up the JIT cache - ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning - # Ignore numba PEP 456 warning specific to arm machines - ignore:FNV hashing is not implemented in Numba.*:UserWarning -addopts = --tb=native diff --git a/python/cudf/cudf_pandas_tests/pytest.ini b/python/cudf/cudf_pandas_tests/pytest.ini new file mode 100644 index 00000000000..46e2448ea24 --- /dev/null +++ b/python/cudf/cudf_pandas_tests/pytest.ini @@ -0,0 +1,9 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Note, this config file overrides the default "cudf" test config in +# ../pyproject.toml We do so deliberately because we have different +# treatment of markers and warnings +[pytest] +addopts = --tb=native --strict-config --strict-markers +empty_parameter_set_mark = fail_at_collect +xfail_strict = true diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 2bbed40e34e..a74b7148c00 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import collections +import contextlib import copy import datetime import operator @@ -21,10 +22,15 @@ import pyarrow as pa import pytest from nbconvert.preprocessors import ExecutePreprocessor -from numba import NumbaDeprecationWarning, vectorize +from numba import ( + NumbaDeprecationWarning, + __version__ as numba_version, + vectorize, +) +from packaging import version from pytz import utc -from cudf.core._compat import PANDAS_GE_220 +from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220, PANDAS_VERSION from cudf.pandas import LOADED, Profiler from cudf.pandas.fast_slow_proxy import ( ProxyFallbackError, @@ -52,8 +58,6 @@ get_calendar, ) -from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION - # Accelerated pandas has the real pandas and cudf modules as attributes pd = xpd._fsproxy_slow cudf = xpd._fsproxy_fast @@ -622,10 +626,6 @@ def test_array_function_series_fallback(series): tm.assert_equal(expect, got) -@pytest.mark.xfail( - PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, - reason="Fails in older versions of pandas", -) def test_timedeltaproperties(series): psr, sr = series psr, sr = psr.astype("timedelta64[ns]"), sr.astype("timedelta64[ns]") @@ -685,10 +685,6 @@ def test_maintain_container_subclasses(multiindex): assert isinstance(got, xpd.core.indexes.frozen.FrozenList) -@pytest.mark.xfail( - PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, - reason="Fails in older versions of pandas due to unsupported boxcar window type", -) def test_rolling_win_type(): pdf = pd.DataFrame(range(5)) df = xpd.DataFrame(range(5)) @@ -697,8 +693,14 @@ def test_rolling_win_type(): tm.assert_equal(result, expected) -@pytest.mark.skip( - reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009" +@pytest.mark.skipif( + version.parse(numba_version) < version.parse("0.59"), + reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009", +) +@pytest.mark.xfail( + version.parse(numba_version) >= version.parse("0.59") + and PANDAS_VERSION < version.parse("2.1"), + reason="numba.generated_jit removed in 0.59, requires pandas >= 2.1", ) def test_rolling_apply_numba_engine(): def weighted_mean(x): @@ -709,7 +711,12 @@ def weighted_mean(x): pdf = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) df = xpd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]]) - with pytest.warns(NumbaDeprecationWarning): + ctx = ( + contextlib.nullcontext() + if PANDAS_GE_210 + else pytest.warns(NumbaDeprecationWarning) + ) + with ctx: expect = pdf.rolling(2, method="table", min_periods=0).apply( weighted_mean, raw=True, engine="numba" ) @@ -1305,7 +1312,7 @@ def max_times_two(self): @pytest.mark.xfail( - PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + PANDAS_VERSION < version.parse("2.1"), reason="DatetimeArray.__floordiv__ missing in pandas-2.0.0", ) def test_floordiv_array_vs_df(): @@ -1580,7 +1587,7 @@ def test_numpy_cupy_flatiter(series): @pytest.mark.xfail( - PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION, + PANDAS_VERSION < version.parse("2.1"), reason="pyarrow_numpy storage type was not supported in pandas-2.0.0", ) def test_arrow_string_arrays(): diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 1b730ffd13c..c0776fd0de6 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -124,6 +124,27 @@ skip = [ "__init__.py", ] +[tool.pytest.ini_options] +addopts = "--tb=native --strict-config --strict-markers" +empty_parameter_set_mark = "fail_at_collect" +filterwarnings = [ + "error", + "ignore:::.*xdist.*", + "ignore:::.*pytest.*", + # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow() + "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore", + # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+ + "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning", + # PerformanceWarning from cupy warming up the JIT cache + "ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning", + # Ignore numba PEP 456 warning specific to arm machines + "ignore:FNV hashing is not implemented in Numba.*:UserWarning" +] +markers = [ + "spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`" +] +xfail_strict = true + [tool.rapids-build-backend] build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" diff --git a/python/cudf_kafka/cudf_kafka/tests/pytest.ini b/python/cudf_kafka/cudf_kafka/tests/pytest.ini deleted file mode 100644 index 7b0a9f29fb1..00000000000 --- a/python/cudf_kafka/cudf_kafka/tests/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[pytest] -addopts = --tb=native diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index a1a3ec37842..87e19a2bccf 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -79,9 +79,12 @@ skip = [ ] [tool.pytest.ini_options] +addopts = "--tb=native --strict-config --strict-markers" +empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error" ] +xfail_strict = true [tool.scikit-build] build-dir = "build/{wheel_tag}" diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index f55031e0826..5345fad41a2 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -50,6 +50,11 @@ license-files = ["LICENSE"] version = {file = "cudf_polars/VERSION"} [tool.pytest.ini_options] +addopts = "--tb=native --strict-config --strict-markers" +empty_parameter_set_mark = "fail_at_collect" +filterwarnings = [ + "error" +] xfail_strict = true [tool.coverage.report] diff --git a/python/cudf_polars/tests/pytest.ini b/python/cudf_polars/tests/pytest.ini deleted file mode 100644 index 7b0a9f29fb1..00000000000 --- a/python/cudf_polars/tests/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[pytest] -addopts = --tb=native diff --git a/python/custreamz/custreamz/tests/pytest.ini b/python/custreamz/custreamz/tests/pytest.ini deleted file mode 100644 index 7b0a9f29fb1..00000000000 --- a/python/custreamz/custreamz/tests/pytest.ini +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -[pytest] -addopts = --tb=native diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py index bae4b051cae..8c0130d2818 100644 --- a/python/custreamz/custreamz/tests/test_dataframes.py +++ b/python/custreamz/custreamz/tests/test_dataframes.py @@ -377,24 +377,16 @@ def test_setitem_overwrites(stream): [ ({}, "sum"), ({}, "mean"), - pytest.param({}, "min"), + ({}, "min"), pytest.param( {}, "median", marks=pytest.mark.xfail(reason="Unavailable for rolling objects"), ), - pytest.param({}, "max"), - pytest.param( - {}, - "var", - marks=pytest.mark.xfail(reason="Unavailable for rolling objects"), - ), - pytest.param({}, "count"), - pytest.param( - {"ddof": 0}, - "std", - marks=pytest.mark.xfail(reason="Unavailable for rolling objects"), - ), + ({}, "max"), + ({}, "var"), + ({}, "count"), + ({"ddof": 0}, "std"), pytest.param( {"quantile": 0.5}, "quantile", diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml index 85ab0024bb5..af45f49d9b4 100644 --- a/python/custreamz/pyproject.toml +++ b/python/custreamz/pyproject.toml @@ -111,6 +111,8 @@ skip = [ ] [tool.pytest.ini_options] +addopts = "--tb=native --strict-config --strict-markers" +empty_parameter_set_mark = "fail_at_collect" filterwarnings = [ "error", "ignore:unclosed