diff --git a/fairseq/data/handwriting/scribblelens.py b/fairseq/data/handwriting/scribblelens.py index 7070b6d225..370ce43eb0 100644 --- a/fairseq/data/handwriting/scribblelens.py +++ b/fairseq/data/handwriting/scribblelens.py @@ -294,6 +294,7 @@ def __init__(self, self.root = root self.file = zipfile.ZipFile(root) + self.pid = os.getpid() root = 'scribblelens.corpus.v1' self.target_width = target_width @@ -657,8 +658,13 @@ def __len__(self): return len(self.data) def __getitem__(self, idx): - if not self.file: # Reopen to work with multiprocessing + # Reopen the ZipFile to work with multiprocessing + if self.pid != os.getpid() and self.file: + self.file.close() + self.file = None + if not self.file: self.file = zipfile.ZipFile(self.root) + self.pid = os.getpid() item = self.data[idx] df_item = self.data_frame.iloc[idx] diff --git a/uwr_related/configs/scribblelens_base.yaml b/uwr_related/configs/scribblelens_base.yaml index 3c860c3025..eba8835691 100644 --- a/uwr_related/configs/scribblelens_base.yaml +++ b/uwr_related/configs/scribblelens_base.yaml @@ -21,7 +21,7 @@ task: labels: True dataset: - num_workers: 0 + num_workers: 2 max_tokens: 10000 skip_invalid_size_inputs_valid_test: true valid_subset: test