Skip to content

Commit

Permalink
Avoid unnecessary re-init of control dataset (#14)
Browse files Browse the repository at this point in the history
... when using multiple workers
  • Loading branch information
JackTemaki authored Dec 21, 2023
1 parent e4eedc3 commit 7fad621
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions returnn/datasets/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,10 +424,16 @@ def get_seq_len(s):

for dataset_key, dataset in self.datasets.items():
assert isinstance(dataset, Dataset)
if dataset is seq_order_dataset and not sharding_in_meta:
# only skip if we did not do sharding here, otherwise the sequence list
# of the control dataset needs to be rebuilt as well
continue
if dataset is seq_order_dataset:
if not sharding_in_meta:
# only skip if we did not do sharding here, otherwise the sequence list
# of the control dataset needs to be rebuilt as well
continue
else:
# we can do a faster init using the seq_order directly,
# as the seq_index directly relates to the seq_order_dataset
dataset.init_seq_order(epoch=epoch, seq_order=seq_index)
continue
dataset.init_seq_order(epoch=epoch, seq_list=self.seq_list_ordered[dataset_key])
return True

Expand Down

0 comments on commit 7fad621

Please sign in to comment.