Skip to content

Commit

Permalink
separate booleans
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed May 14, 2024
1 parent e881370 commit 6570322
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 12 deletions.
37 changes: 25 additions & 12 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -1948,19 +1948,25 @@ def as_column(
raise TypeError(
f"Cannot convert a {inferred_dtype} of object type"
)
elif inferred_dtype == "boolean":
if cudf.get_option("mode.pandas_compatible"):
if (
dtype is not None
and dtype == np.dtype("bool")
and not pd.isna(arbitrary).any()
):
pass
else:
raise MixedTypeError(
f"Cannot have mixed values with {inferred_dtype}"
)
elif nan_as_null is False and _has_any_nan(arbitrary):
raise MixedTypeError(
f"Cannot have mixed values with {inferred_dtype}"
)
elif (
cudf.get_option("mode.pandas_compatible")
and inferred_dtype == "boolean"
):
raise MixedTypeError(
f"Cannot have mixed values with {inferred_dtype}"
)
elif nan_as_null is False and (
any(
(isinstance(x, (np.floating, float)) and np.isnan(x))
or (inferred_dtype == "boolean" and pd.isna(arbitrary))
for x in np.asarray(arbitrary)
)
nan_as_null is False
and _has_any_nan(arbitrary)
and inferred_dtype not in ("decimal", "empty")
):
# Decimal can hold float("nan")
Expand Down Expand Up @@ -2338,3 +2344,10 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:

# Filter out inputs that have 0 length, then concatenate.
return libcudf.concat.concat_columns([o for o in objs if len(o)])


def _has_any_nan(arbitrary):
return any(
(isinstance(x, (np.floating, float)) and np.isnan(x))
for x in np.asarray(arbitrary)
)
10 changes: 10 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2358,13 +2358,23 @@ def test_multi_dim_series_error():

def test_bool_series_mixed_dtype_error():
ps = pd.Series([True, False, None])
all_bool_ps = pd.Series([True, False, True], dtype="object")
# ps now has `object` dtype, which
# isn't supported by `cudf`.
with cudf.option_context("mode.pandas_compatible", True):
with pytest.raises(TypeError):
cudf.Series(ps)
with pytest.raises(TypeError):
cudf.from_pandas(ps)
with pytest.raises(TypeError):
cudf.Series(ps, dtype=bool)
expected = cudf.Series(all_bool_ps, dtype=bool)
assert_eq(expected, all_bool_ps.astype(bool))
nan_bools_mix = pd.Series([True, False, True, np.nan], dtype="object")
gs = cudf.Series(nan_bools_mix, nan_as_null=True)
assert_eq(gs.to_pandas(nullable=True), nan_bools_mix.astype("boolean"))
with pytest.raises(TypeError):
cudf.Series(nan_bools_mix, nan_as_null=False)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 6570322

Please sign in to comment.