diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index dc4a547e61b..965e73cfce6 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -1948,19 +1948,25 @@ def as_column( raise TypeError( f"Cannot convert a {inferred_dtype} of object type" ) + elif inferred_dtype == "boolean": + if cudf.get_option("mode.pandas_compatible"): + if ( + dtype is not None + and dtype == np.dtype("bool") + and not pd.isna(arbitrary).any() + ): + pass + else: + raise MixedTypeError( + f"Cannot have mixed values with {inferred_dtype}" + ) + elif nan_as_null is False and _has_any_nan(arbitrary): + raise MixedTypeError( + f"Cannot have mixed values with {inferred_dtype}" + ) elif ( - cudf.get_option("mode.pandas_compatible") - and inferred_dtype == "boolean" - ): - raise MixedTypeError( - f"Cannot have mixed values with {inferred_dtype}" - ) - elif nan_as_null is False and ( - any( - (isinstance(x, (np.floating, float)) and np.isnan(x)) - or (inferred_dtype == "boolean" and pd.isna(arbitrary)) - for x in np.asarray(arbitrary) - ) + nan_as_null is False + and _has_any_nan(arbitrary) and inferred_dtype not in ("decimal", "empty") ): # Decimal can hold float("nan") @@ -2338,3 +2344,10 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: # Filter out inputs that have 0 length, then concatenate. return libcudf.concat.concat_columns([o for o in objs if len(o)]) + + +def _has_any_nan(arbitrary): + return any( + (isinstance(x, (np.floating, float)) and np.isnan(x)) + for x in np.asarray(arbitrary) + ) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index f491f882761..9aeae566730 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -2358,6 +2358,7 @@ def test_multi_dim_series_error(): def test_bool_series_mixed_dtype_error(): ps = pd.Series([True, False, None]) + all_bool_ps = pd.Series([True, False, True], dtype="object") # ps now has `object` dtype, which # isn't supported by `cudf`. with cudf.option_context("mode.pandas_compatible", True): @@ -2365,6 +2366,15 @@ def test_bool_series_mixed_dtype_error(): cudf.Series(ps) with pytest.raises(TypeError): cudf.from_pandas(ps) + with pytest.raises(TypeError): + cudf.Series(ps, dtype=bool) + expected = cudf.Series(all_bool_ps, dtype=bool) + assert_eq(expected, all_bool_ps.astype(bool)) + nan_bools_mix = pd.Series([True, False, True, np.nan], dtype="object") + gs = cudf.Series(nan_bools_mix, nan_as_null=True) + assert_eq(gs.to_pandas(nullable=True), nan_bools_mix.astype("boolean")) + with pytest.raises(TypeError): + cudf.Series(nan_bools_mix, nan_as_null=False) @pytest.mark.parametrize(