Skip to content

Commit

Permalink
add sorted flag
Browse files Browse the repository at this point in the history
  • Loading branch information
dougbrn committed Oct 23, 2023
1 parent e990da3 commit c0d86df
Showing 1 changed file with 21 additions and 8 deletions.
29 changes: 21 additions & 8 deletions src/tape/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,7 @@ def from_dask_dataframe(
sync_tables=True,
npartitions=None,
partition_size=None,
sorted=False,
sort=False,
**kwargs,
):
Expand All @@ -1039,9 +1040,12 @@ def from_dask_dataframe(
partition_size: `int`, optional
If specified, attempts to repartition the ensemble to partitions
of size `partition_size`.
sorted: bool, optional
If the index column is already sorted in increasing order.
Defaults to False
sort: `bool`, optional
If True, sorts the DataFrame by the id column. Otherwise set the index
on the individual existing partitions. Defaults to False.
If True, sorts the DataFrame by the id column. Otherwise set the
index on the individual existing partitions. Defaults to False.
Returns
----------
Expand All @@ -1051,14 +1055,14 @@ def from_dask_dataframe(
self._load_column_mapper(column_mapper, **kwargs)

# Set the index of the source frame and save the resulting table
self._source = source_frame.set_index(self._id_col, drop=True, sort=sort)
self._source = source_frame.set_index(self._id_col, drop=True, sorted=sorted, sort=sort)

if object_frame is None: # generate an indexed object table from source
self._object = self._generate_object_table()

else:
self._object = object_frame
self._object = self._object.set_index(self._id_col, sort=sort)
self._object = self._object.set_index(self._id_col, sorted=sorted, sort=sort)

# Optionally sync the tables, recalculates nobs columns
if sync_tables:
Expand Down Expand Up @@ -1205,6 +1209,7 @@ def from_parquet(
additional_cols=True,
npartitions=None,
partition_size=None,
sorted=False,
sort=False,
**kwargs,
):
Expand Down Expand Up @@ -1239,9 +1244,12 @@ def from_parquet(
partition_size: `int`, optional
If specified, attempts to repartition the ensemble to partitions
of size `partition_size`.
sorted: bool, optional
If the index column is already sorted in increasing order.
Defaults to False
sort: `bool`, optional
If True, sorts the DataFrame by the id column. Otherwise set the index
on the individual existing partitions. Defaults to False.
If True, sorts the DataFrame by the id column. Otherwise set the
index on the individual existing partitions. Defaults to False.
Returns
----------
Expand Down Expand Up @@ -1279,6 +1287,7 @@ def from_parquet(
sync_tables=sync_tables,
npartitions=npartitions,
partition_size=partition_size,
sorted=sorted,
sort=sort,
**kwargs,
)
Expand Down Expand Up @@ -1350,9 +1359,12 @@ def from_source_dict(self, source_dict, column_mapper=None, npartitions=1, sort=
npartitions: `int`, optional
If specified, attempts to repartition the ensemble to the specified
number of partitions
sorted: bool, optional
If the index column is already sorted in increasing order.
Defaults to False
sort: `bool`, optional
If True, sorts the DataFrame by the id column. Otherwise set the index
on the individual existing partitions. Defaults to False.
If True, sorts the DataFrame by the id column. Otherwise set the
index on the individual existing partitions. Defaults to False.
Returns
----------
Expand All @@ -1369,6 +1381,7 @@ def from_source_dict(self, source_dict, column_mapper=None, npartitions=1, sort=
column_mapper=column_mapper,
sync_tables=True,
npartitions=npartitions,
sorted=sorted,
sort=sort,
**kwargs,
)
Expand Down

0 comments on commit c0d86df

Please sign in to comment.