Skip to content

Commit

Permalink
Allow multiprocesing file loading.
Browse files Browse the repository at this point in the history
  • Loading branch information
janchorowski committed Dec 30, 2020
1 parent 7a71cd5 commit a23fe32
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
8 changes: 7 additions & 1 deletion fairseq/data/handwriting/scribblelens.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ def __init__(self,
self.root = root

self.file = zipfile.ZipFile(root)
self.pid = os.getpid()
root = 'scribblelens.corpus.v1'

self.target_width = target_width
Expand Down Expand Up @@ -657,8 +658,13 @@ def __len__(self):
return len(self.data)

def __getitem__(self, idx):
if not self.file: # Reopen to work with multiprocessing
# Reopen the ZipFile to work with multiprocessing
if self.pid != os.getpid() and self.file:
self.file.close()
self.file = None
if not self.file:
self.file = zipfile.ZipFile(self.root)
self.pid = os.getpid()
item = self.data[idx]

df_item = self.data_frame.iloc[idx]
Expand Down
2 changes: 1 addition & 1 deletion uwr_related/configs/scribblelens_base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ task:
labels: True

dataset:
num_workers: 0
num_workers: 2
max_tokens: 10000
skip_invalid_size_inputs_valid_test: true
valid_subset: test
Expand Down

0 comments on commit a23fe32

Please sign in to comment.