Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into python-3.12
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb authored Sep 4, 2024
2 parents d4ba292 + ad1369d commit 8b29502
Show file tree
Hide file tree
Showing 34 changed files with 638 additions and 70 deletions.
13 changes: 12 additions & 1 deletion ci/cudf_pandas_scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,19 @@ else
RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

# echo to expand wildcard before adding `[extra]` requires for pip
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
# `test_python` constraints are for `[test]` not `[cudf-pandas-tests]`
rapids-dependency-file-generator \
--output requirements \
--file-key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
Expand Down
3 changes: 2 additions & 1 deletion ci/test_python_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ ENV_YAML_DIR="$(mktemp -d)"
rapids-dependency-file-generator \
--output conda \
--file-key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test

Expand Down
14 changes: 14 additions & 0 deletions ci/test_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,22 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from
RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Install cudf, pylibcudf, and test requirements"

# Constrain to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_cudf \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]"
Expand Down
11 changes: 11 additions & 0 deletions ci/test_wheel_cudf_polars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,20 @@ RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Installing cudf_polars and its dependencies"
# Constraint to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_cudf_polars \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
Expand Down
13 changes: 13 additions & 0 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,21 @@ RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from
RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist

rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements"
# Constraint to minimum dependency versions if job is set up as "oldest"
echo "" > ./constraints.txt
if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then
rapids-dependency-file-generator \
--output requirements \
--file-key py_test_dask_cudf \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
| tee ./constraints.txt
fi

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install \
-v \
--constraint ./constraints.txt \
"$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
"$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
"$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
Expand Down
22 changes: 22 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,28 @@ dependencies:
- pytest<8
- pytest-cov
- pytest-xdist
specific:
# Define additional constraints for testing with oldest dependencies.
- output_types: [conda, requirements]
matrices:
- matrix: {dependencies: "oldest"}
packages:
- numba==0.57.*
- numpy==1.23.*
- pandas==2.0.*
- pyarrow==14.0.0
- cupy==12.0.0 # ignored as pip constraint
- matrix:
packages:
- output_types: requirements
# Using --constraints for pip install, so we list cupy multiple times
matrices:
- matrix: {dependencies: "oldest"}
packages:
- cupy-cuda11x==12.0.0
- cupy-cuda12x==12.0.0
- matrix:
packages:
test_python_pylibcudf:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
4 changes: 4 additions & 0 deletions python/cudf/cudf/tests/indexes/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ def test_interval_range_periods_basic_dtype(start_t, end_t, periods_t):
assert_eq(pindex, gindex)


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="Does not warn on older versions of pandas",
)
def test_interval_range_periods_warnings():
start_val, end_val, periods_val = 0, 4, 1.0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.testing import assert_eq
from cudf.testing.dataset_generator import rand_dataframe

Expand Down Expand Up @@ -302,6 +303,10 @@ def get_days_from_epoch(date: datetime.date | None) -> int | None:
@pytest.mark.parametrize("namespace", [None, "root_ns"])
@pytest.mark.parametrize("nullable", [True, False])
@pytest.mark.parametrize("prepend_null", [True, False])
@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="Fails in older versions of pandas (datetime(9999, ...) too large)",
)
def test_can_parse_avro_date_logical_type(namespace, nullable, prepend_null):
avro_type = {"logicalType": "date", "type": "int"}
if nullable:
Expand Down
41 changes: 39 additions & 2 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@

import cudf
from cudf import Index, Series
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.core._compat import (
PANDAS_CURRENT_SUPPORTED_VERSION,
PANDAS_GE_220,
PANDAS_VERSION,
)
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.testing import _utils as utils, assert_eq
from cudf.utils.dtypes import (
Expand Down Expand Up @@ -1781,6 +1785,20 @@ def test_datetime_dateoffset_binaryop(
reason="https://github.com/pandas-dev/pandas/issues/57448",
)
)
if (
not PANDAS_GE_220
and dtype in {"datetime64[ms]", "datetime64[s]"}
and frequency in ("microseconds", "nanoseconds")
and n_periods != 0
):
pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
if (
not PANDAS_GE_220
and dtype == "datetime64[us]"
and frequency == "nanoseconds"
and n_periods != 0
):
pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")

date_col = [
f"2000-01-01 00:00:{components}",
Expand Down Expand Up @@ -1834,7 +1852,11 @@ def test_datetime_dateoffset_binaryop(
"ignore:Discarding nonzero nanoseconds:UserWarning"
)
@pytest.mark.parametrize("op", [operator.add, operator.sub])
def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="Fails in older versions of pandas",
)
def test_datetime_dateoffset_binaryop_multiple(request, date_col, kwargs, op):
gsr = cudf.Series(date_col, dtype="datetime64[ns]")
psr = gsr.to_pandas()

Expand Down Expand Up @@ -1873,6 +1895,21 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
def test_datetime_dateoffset_binaryop_reflected(
n_periods, frequency, dtype, components
):
if (
not PANDAS_GE_220
and dtype in {"datetime64[ms]", "datetime64[s]"}
and frequency in ("microseconds", "nanoseconds")
and n_periods != 0
):
pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")
if (
not PANDAS_GE_220
and dtype == "datetime64[us]"
and frequency == "nanoseconds"
and n_periods != 0
):
pytest.skip(reason="https://github.com/pandas-dev/pandas/pull/55595")

date_col = [
f"2000-01-01 00:00:{components}",
f"2000-01-31 00:00:{components}",
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.testing import assert_eq
from cudf.testing._utils import NUMERIC_TYPES, assert_exceptions_equal

Expand Down Expand Up @@ -858,6 +859,10 @@ def test_cat_from_scalar(scalar):
assert_eq(ps, gs)


@pytest.mark.skipif(
PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
reason="Does not warn on older versions of pandas",
)
def test_cat_groupby_fillna():
ps = pd.Series(["a", "b", "c"], dtype="category")
gs = cudf.from_pandas(ps)
Expand Down
99 changes: 65 additions & 34 deletions python/cudf/cudf/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_GE_220
from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
from cudf.testing import assert_eq
from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
Expand Down Expand Up @@ -451,45 +452,75 @@ def test_concat_mixed_input():
[pd.Series([1, 2, 3]), pd.DataFrame({"a": []})],
[pd.Series([], dtype="float64"), pd.DataFrame({"a": []})],
[pd.Series([], dtype="float64"), pd.DataFrame({"a": [1, 2]})],
[
pd.Series([1, 2, 3.0, 1.2], name="abc"),
pd.DataFrame({"a": [1, 2]}),
],
[
pd.Series(
[1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130]
),
pd.DataFrame({"a": [1, 2]}),
],
[
pd.Series(
[1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"]
pytest.param(
[
pd.Series([1, 2, 3.0, 1.2], name="abc"),
pd.DataFrame({"a": [1, 2]}),
],
marks=pytest.mark.skipif(
not PANDAS_GE_220,
reason="https://github.com/pandas-dev/pandas/pull/56365",
),
pd.DataFrame({"a": [1, 2]}, index=["a", "b"]),
],
[
pd.Series(
[1, 2, 3.0, 1.2, 8, 100],
name="New name",
index=["a", "b", "c", "d", "e", "f"],
),
pytest.param(
[
pd.Series(
[1, 2, 3.0, 1.2], name="abc", index=[100, 110, 120, 130]
),
pd.DataFrame({"a": [1, 2]}),
],
marks=pytest.mark.skipif(
not PANDAS_GE_220,
reason="https://github.com/pandas-dev/pandas/pull/56365",
),
pd.DataFrame(
{"a": [1, 2, 4, 10, 11, 12]},
index=["a", "b", "c", "d", "e", "f"],
),
pytest.param(
[
pd.Series(
[1, 2, 3.0, 1.2], name="abc", index=["a", "b", "c", "d"]
),
pd.DataFrame({"a": [1, 2]}, index=["a", "b"]),
],
marks=pytest.mark.skipif(
not PANDAS_GE_220,
reason="https://github.com/pandas-dev/pandas/pull/56365",
),
],
[
pd.Series(
[1, 2, 3.0, 1.2, 8, 100],
name="New name",
index=["a", "b", "c", "d", "e", "f"],
),
pytest.param(
[
pd.Series(
[1, 2, 3.0, 1.2, 8, 100],
name="New name",
index=["a", "b", "c", "d", "e", "f"],
),
pd.DataFrame(
{"a": [1, 2, 4, 10, 11, 12]},
index=["a", "b", "c", "d", "e", "f"],
),
],
marks=pytest.mark.skipif(
not PANDAS_GE_220,
reason="https://github.com/pandas-dev/pandas/pull/56365",
),
pd.DataFrame(
{"a": [1, 2, 4, 10, 11, 12]},
index=["a", "b", "c", "d", "e", "f"],
),
pytest.param(
[
pd.Series(
[1, 2, 3.0, 1.2, 8, 100],
name="New name",
index=["a", "b", "c", "d", "e", "f"],
),
pd.DataFrame(
{"a": [1, 2, 4, 10, 11, 12]},
index=["a", "b", "c", "d", "e", "f"],
),
]
* 7,
marks=pytest.mark.skipif(
not PANDAS_GE_220,
reason="https://github.com/pandas-dev/pandas/pull/56365",
),
]
* 7,
),
],
)
def test_concat_series_dataframe_input(objs):
Expand Down
12 changes: 8 additions & 4 deletions python/cudf/cudf/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@

import cudf
from cudf import read_csv
from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
from cudf.core._compat import (
PANDAS_CURRENT_SUPPORTED_VERSION,
PANDAS_GE_220,
PANDAS_VERSION,
)
from cudf.testing import assert_eq
from cudf.testing._utils import assert_exceptions_equal
from cudf.testing._utils import assert_exceptions_equal, expect_warning_if


def make_numeric_dataframe(nrows, dtype):
Expand Down Expand Up @@ -1270,14 +1274,14 @@ def test_csv_reader_delim_whitespace():
# with header row
with pytest.warns(FutureWarning):
cu_df = read_csv(StringIO(buffer), delim_whitespace=True)
with pytest.warns(FutureWarning):
with expect_warning_if(PANDAS_GE_220):
pd_df = pd.read_csv(StringIO(buffer), delim_whitespace=True)
assert_eq(pd_df, cu_df)

# without header row
with pytest.warns(FutureWarning):
cu_df = read_csv(StringIO(buffer), delim_whitespace=True, header=None)
with pytest.warns(FutureWarning):
with expect_warning_if(PANDAS_GE_220):
pd_df = pd.read_csv(
StringIO(buffer), delim_whitespace=True, header=None
)
Expand Down
Loading

0 comments on commit 8b29502

Please sign in to comment.