Skip to content
This repository has been archived by the owner on Jan 14, 2025. It is now read-only.

Commit

Permalink
Repartition source frame with update_ensemble
Browse files Browse the repository at this point in the history
  • Loading branch information
wilsonbb committed Jan 17, 2024
1 parent 257391b commit 61b7734
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/tape/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def insert_sources(
if all(prev_div):
self.update_frame(self.source.repartition(divisions=prev_div))
elif self.source.npartitions != prev_num:
self.source = self.source.repartition(npartitions=prev_num)
self.update_frame(self.source.repartition(npartitions=prev_num))

return self

Expand Down
17 changes: 15 additions & 2 deletions src/tape/ensemble_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,14 @@ def compute(self, **kwargs):
self.ensemble._lazy_sync_tables_from_frame(self)
return super().compute(**kwargs)

def repartition(self, **kwargs):
def repartition(
self,
divisions=None,
npartitions=None,
partition_size=None,
freq=None,
force=False,
):
"""Repartition dataframe along new divisions
Doc string below derived from dask.dataframe.DataFrame
Expand Down Expand Up @@ -725,7 +732,13 @@ def repartition(self, **kwargs):
>>> df = df.repartition(divisions=[0, 5, 10, 20]) # doctest: +SKIP
>>> df = df.repartition(freq='7d') # doctest: +SKIP
"""
result = super().repartition(**kwargs)
result = super().repartition(
divisions=divisions,
npartitions=npartitions,
partition_size=partition_size,
freq=freq,
force=force,
)
return self._propagate_metadata(result)


Expand Down

0 comments on commit 61b7734

Please sign in to comment.