From 29cb808dce0d40d5116cecfb287ed9acd468c044 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Fri, 27 Sep 2024 10:24:16 -0700 Subject: [PATCH] WIP: reduce meta handling --- src/nested_dask/core.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/nested_dask/core.py b/src/nested_dask/core.py index 8d27a32..c15ff46 100644 --- a/src/nested_dask/core.py +++ b/src/nested_dask/core.py @@ -442,6 +442,11 @@ def nested_columns(self) -> list: nest_cols.append(column) return nest_cols + # def map_partitions(self, *args, **kwargs) -> NestedFrame: + # """docstring""" + # res = super().map_partitions(*args, **kwargs) + # return res#.map_partitions(npd.NestedFrame, meta=npd.NestedFrame(res._meta.copy())) + def _is_known_hierarchical_column(self, colname) -> bool: """Determine whether a string is a known hierarchical column name""" if "." in colname: @@ -655,6 +660,14 @@ def reduce(self, func, *args, meta=None, **kwargs) -> NestedFrame: """ + # Handle meta shorthands to produce nestedframe output + # route standard dict meta to nestedframe + if isinstance(meta, dict): + meta = npd.NestedFrame(meta, index=[]) + # reroute series meta to nestedframe, per consistency with nested-pandas + elif isinstance(meta, tuple) and len(meta) == 2: # len 2 to only try on proper series meta + meta = npd.NestedFrame(pd.Series(name=meta[0], dtype=meta[1]).to_frame()) + # apply nested_pandas reduce via map_partitions # wrap the partition in a npd.NestedFrame call for: # https://github.com/lincc-frameworks/nested-dask/issues/21