diff --git a/src/nested_pandas/series/dtype.py b/src/nested_pandas/series/dtype.py index 0d85409..f1541db 100644 --- a/src/nested_pandas/series/dtype.py +++ b/src/nested_pandas/series/dtype.py @@ -70,17 +70,17 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define Raises ------ - ValueError + TypeError If the string is not a valid nested type string or if the element types are parametric pyarrow types. """ if not string.startswith("nested<") or not string.endswith(">"): - raise ValueError("Not a valid nested type string, expected 'nested<...>'") + raise TypeError("Not a valid nested type string, expected 'nested<...>'") fields_str = string.removeprefix("nested<").removesuffix(">") field_strings = fields_str.split(", ") if len(field_strings) == 0: - raise ValueError( + raise TypeError( "Not a valid nested type string, expected at least a single field inside " "'nested'" ) @@ -90,12 +90,12 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define try: field_name, field_type = field_string.split(": ", maxsplit=1) except ValueError as e: - raise ValueError( + raise TypeError( "Not a valid nested type string, expected 'nested', got invalid field " f"string '{field_string}'" ) from e if not field_type.startswith("[") or not field_type.endswith("]"): - raise ValueError( + raise TypeError( "Not a valid nested type string, expected 'nested', got invalid field " f"type string '{field_type}'" ) @@ -105,7 +105,7 @@ def construct_from_string(cls, string: str) -> Self: # type: ignore[name-define try: pa_value_type = pa.type_for_alias(value_type) except ValueError as e: - raise ValueError( + raise TypeError( f"Parsing pyarrow specific parameters in the string is not supported yet: {value_type}. " "Please use NestedDtype() or NestedDtype.from_fields() instead." ) from e diff --git a/tests/nested_pandas/series/test_accessor.py b/tests/nested_pandas/series/test_accessor.py index d3a736f..de65416 100644 --- a/tests/nested_pandas/series/test_accessor.py +++ b/tests/nested_pandas/series/test_accessor.py @@ -4,6 +4,7 @@ import pytest from nested_pandas import NestedDtype from nested_pandas.series.ext_array import NestedExtensionArray +from nested_pandas.series.packer import pack_flat from numpy.testing import assert_array_equal from pandas.testing import assert_frame_equal, assert_series_equal @@ -503,3 +504,28 @@ def test___len__(): series = pd.Series(struct_array, dtype=NestedDtype(struct_array.type), index=[0, 1]) assert len(series.nest) == 2 + + +def test_to_flat_dropna(): + """Test that to_flat() gives a valid dataframe, based on GH22 + + https://github.com/lincc-frameworks/nested-pandas/issues/22 + """ + + flat = pd.DataFrame( + data={"c": [0.0, 2, 4, 1, np.NaN, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 1, 2, 2, 2], + ) + nested = pack_flat(flat, name="nested") + + new_flat = nested.nest.to_flat() + # .dropna() was failing in the issue report + filtered = new_flat.dropna(subset="c") + + assert_frame_equal( + filtered, + pd.DataFrame( + data={"c": [0.0, 2, 4, 1, 3, 1, 4, 1], "d": [5, 4, 7, 5, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 2, 2, 2], + ), + ) diff --git a/tests/nested_pandas/series/test_dtype.py b/tests/nested_pandas/series/test_dtype.py index 54c40bf..bd11313 100644 --- a/tests/nested_pandas/series/test_dtype.py +++ b/tests/nested_pandas/series/test_dtype.py @@ -99,6 +99,29 @@ def test_name_vs_construct_from_string(fields): assert dtype == NestedDtype.construct_from_string(dtype.name) +@pytest.mark.parametrize( + "s", + [ + "float", # not a nested type + "nested(f: [int64])", # must be <> instead + "ts", # 'ts' was a previous name, now we use 'nested' + "nested", # no type specified + "nested", # no field specified + "nested", # no field name specified + "nested<[int64]>", # no field name specified + "nested", # separator must be ": " with space + "nested", # missed [] - nested list + "nested", # not an arrow type + "nested]>", # complex arrow types are not supported + ], +) +def test_construct_from_string_raises(s): + """Test that we raise an error when constructing NestedDtype from invalid string.""" + with pytest.raises(TypeError): + NestedDtype.construct_from_string(s) + + def test_construct_array_type(): """Test that NestedDtype.construct_array_type() returns NestedExtensionArray.""" assert NestedDtype.construct_array_type() is NestedExtensionArray