diff --git a/src/nested_pandas/series/dtype.py b/src/nested_pandas/series/dtype.py index 3559b7b..a798235 100644 --- a/src/nested_pandas/series/dtype.py +++ b/src/nested_pandas/series/dtype.py @@ -13,7 +13,6 @@ from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.base import ExtensionDtype -from nested_pandas.series.na import NA, NAType from nested_pandas.series.utils import is_pa_type_a_list __all__ = ["NestedDtype"] @@ -29,9 +28,9 @@ class NestedDtype(ExtensionDtype): """Attributes to use as metadata for __eq__ and __hash__""" @property - def na_value(self) -> NAType: + def na_value(self) -> Type[pd.NA]: """The missing value for this dtype""" - return NA + return pd.NA type = pd.DataFrame """The type of the array's elements, always pd.DataFrame""" diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py index df0b99f..a022bdc 100644 --- a/src/nested_pandas/series/ext_array.py +++ b/src/nested_pandas/series/ext_array.py @@ -160,8 +160,9 @@ def to_numpy(self, dtype: None = None, copy: bool = False, na_value: Any = no_de # Hack with np.empty is the only way to force numpy to create 1-d array of objects result = np.empty(shape=array.shape, dtype=object) + # We do copy=False here because user's 'copy' is already handled by ArrowExtensionArray.to_numpy - result[:] = [pd.DataFrame(value, copy=False) for value in array] + result[:] = [pd.DataFrame(value, copy=False) if not pd.isna(value) else pd.NA for value in array] return result def __setitem__(self, key, value) -> None: diff --git a/src/nested_pandas/series/na.py b/src/nested_pandas/series/na.py deleted file mode 100644 index 0b77bb4..0000000 --- a/src/nested_pandas/series/na.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Missing value for NestedDtype - -It i something between pandas' NA and NaN -""" - -__all__ = ["NAType", "NA"] - - -class _NAType: - pass - - -class NAType: - """Singleton class representing missing value for NestedDtype. - - It doesn't implement most of the arithmetics and boolean logic operations, - because they are ambiguous for missing values. - - The implementation is inspired both by pandas' NA and float number NaN. - - `NA` is a singleton instance of this class. - """ - - _instance = None - - def __new__(cls, *args, **kwargs): - """Create a new instance of NAType.""" - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - - def __repr__(self) -> str: - return "" - - def __format__(self, format_spec) -> str: - try: - return self.__repr__().__format__(format_spec) - except ValueError: - return self.__repr__() - - def __bool__(self): - raise TypeError("boolean value of NA is ambiguous") - - def __eq__(self, other): - return False - - def __ne__(self, other): - return True - - def __hash__(self): - return 0 - - -NA = NAType() -"""Missed value for NestedDtype, a singleton instance of `NAType` class.""" diff --git a/src/nested_pandas/utils/utils.py b/src/nested_pandas/utils/utils.py index 828afe8..259f3fc 100644 --- a/src/nested_pandas/utils/utils.py +++ b/src/nested_pandas/utils/utils.py @@ -6,6 +6,8 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame: """Counts the number of rows of a nested dataframe. + #TODO: Does not work when any nested dataframes are empty (NaN) + Parameters ---------- df: NestedFrame diff --git a/tests/nested_pandas/nestedframe/test_nestedframe.py b/tests/nested_pandas/nestedframe/test_nestedframe.py index 36afdad..7f39c8e 100644 --- a/tests/nested_pandas/nestedframe/test_nestedframe.py +++ b/tests/nested_pandas/nestedframe/test_nestedframe.py @@ -78,6 +78,22 @@ def test_add_nested(): assert base.nested.nest.to_flat().equals(nested) +def test_add_nested_with_mismatched_index(): + """Test add_nested when index values of base are missing matches in nested""" + + base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2]) + + nested = pd.DataFrame( + data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]}, + index=[0, 0, 0, 1, 1, 1, 1, 1, 1], # no data for index value of "2" + ) + + base = base.add_nested(nested, "nested") + + assert "nested" in base.columns + assert pd.isna(base.loc[2]["nested"]) + + def test_query(): """Test that NestedFrame.query handles nested queries correctly""" diff --git a/tests/nested_pandas/series/test_dtype.py b/tests/nested_pandas/series/test_dtype.py index 97aa32d..2b6b3b2 100644 --- a/tests/nested_pandas/series/test_dtype.py +++ b/tests/nested_pandas/series/test_dtype.py @@ -3,7 +3,6 @@ import pytest from nested_pandas.series.dtype import NestedDtype from nested_pandas.series.ext_array import NestedExtensionArray -from nested_pandas.series.na import NA @pytest.mark.parametrize( @@ -62,7 +61,7 @@ def test_from_fields(): def test_na_value(): """Test that NestedDtype.na_value is a singleton instance of NAType.""" dtype = NestedDtype(pa.struct([pa.field("a", pa.list_(pa.int64()))])) - assert dtype.na_value is NA + assert dtype.na_value is pd.NA def test_fields(): diff --git a/tests/nested_pandas/series/test_na.py b/tests/nested_pandas/series/test_na.py deleted file mode 100644 index b3a81b3..0000000 --- a/tests/nested_pandas/series/test_na.py +++ /dev/null @@ -1,50 +0,0 @@ -import pytest -from nested_pandas.series.na import NA - - -def test_na_is_singleton(): - """Test that NA is a singleton instance""" - assert NA is NA - - -def test_na_repr(): - """Test that NA has the correct representation.""" - assert repr(NA) == "" - - -def test_na_format(): - """Test that NA has the correct format.""" - assert f"{NA}" == "" - - -def test_na_bool(): - """Test that NA raises TypeError when converted to bool.""" - with pytest.raises(TypeError): - bool(NA) - - -def test_na_eq(): - """Test that NA is not equal to anything.""" - assert NA != 1 - assert NA != 1.0 - assert NA != "1" - assert NA != NA - - -def test_na_neq(): - """Test that NA is not equal to anything.""" - assert NA != 1 - assert NA != 1.0 - assert NA != "1" - assert [] != NA - assert {} != NA - assert NA != () - assert set() != NA - assert NA != NA - assert object() != NA - - -def test_hash(): - """Test that hash(NA) is always the same.""" - assert hash(NA) == hash(NA) - assert {NA, NA} == {NA}