Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
dougbrn committed May 21, 2024
1 parent 8192366 commit f6a21e3
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/dask_nested/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _check_series(series):
def fields(self) -> list[str]:
"""Names of the nested columns"""

return self._series.head(0).nest.fields # hacky
return list(self._series.dtype.fields)

def to_lists(self, fields: list[str] | None = None) -> dd.DataFrame:
"""Convert nested series into dataframe of list-array columns
Expand Down
4 changes: 3 additions & 1 deletion src/dask_nested/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
@make_meta_dispatch.register(npd.NestedFrame)
def make_meta_frame(x, index=None) -> npd.NestedFrame:
"""Create an empty NestedFrame to use as Dask's underlying object meta."""
result = x.head(0)

dtypes = x.dtypes.to_dict()
result = npd.NestedFrame({key: pd.Series(dtype=d) for key, d in dtypes.items()})
return result


Expand Down
24 changes: 21 additions & 3 deletions src/dask_nested/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ def from_nested_pandas(
the size and index of the dataframe, the output may have fewer
partitions than requested.
chunksize: `int`, optional
Size of the individual chunks of data in non-parallel objects that make up Dask frames.
The desired number of rows per index partition to use. Note that
depending on the size and index of the dataframe, actual partition
sizes may vary.
sort: `bool`, optional
Whether to sort the frame by a default index.
Expand Down Expand Up @@ -133,7 +135,7 @@ def nested_columns(self) -> list:
nest_cols.append(column)
return nest_cols

def add_nested(self, nested, name) -> NestedFrame: # type: ignore[name-defined] # noqa: F821
def add_nested(self, nested, name, how="outer") -> NestedFrame: # type: ignore[name-defined] # noqa: F821
"""Packs a dataframe into a nested column
Parameters
Expand All @@ -142,13 +144,29 @@ def add_nested(self, nested, name) -> NestedFrame: # type: ignore[name-defined]
A flat dataframe to pack into a nested column
name:
The name given to the nested column
how: {‘left’, ‘right’, ‘outer’, ‘inner’, ‘cross’}, default ‘outer’
How to handle the operation of the two objects.
* left: use calling frame’s index (or column if on is specified)
* right: use other’s index.
* outer: form union of calling frame’s index (or column if on is
specified) with other’s index, and sort it lexicographically.
* inner: form intersection of calling frame’s index (or column if
on is specified) with other’s index, preserving the order of the
calling’s one.
* cross: creates the cartesian product from both frames, preserves
the order of the left keys.
Returns
-------
`dask_nested.NestedFrame`
"""
nested = nested.map_partitions(lambda x: pack_flat(x)).rename(name)
return self.join(nested, how="outer")
return self.join(nested, how=how)

def query(self, expr) -> Self: # type: ignore # noqa: F821:
"""
Expand Down

0 comments on commit f6a21e3

Please sign in to comment.