You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
In the 3-logreg-nb-imdb.ipynb notebook from the Code-first Introduction to Natural Language Processing course, a call to TextList.from_folder() throws a BrokenProcessPool error. I am running Windows 10 64-bit.
Has anyone else encountered this problem and been able to solve it?
reviews_full = (TextList.from_folder(path)
#grab all the text files in path
.split_by_folder(valid='test')
#split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
.label_from_folder(classes=['neg', 'pos']))
#label them all with their folders
Below is the full error message:
BrokenProcessPool Traceback (most recent call last)
in
3 .split_by_folder(valid='test')
4 #split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
----> 5 .label_from_folder(classes=['neg', 'pos']))
6 #label them all with their folders
~\Anaconda3\envs\fastai\lib\site-packages\fastai\data_block.py in process(self)
532 "Process the inner datasets."
533 xp,yp = self.get_processors()
--> 534 for ds,n in zip(self.lists, ['train','valid','test']): ds.process(xp, yp, name=n)
535 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
536 for ds in self.lists:
~\Anaconda3\envs\fastai\lib\site-packages\fastai\data_block.py in process(self, processor)
82 if processor is not None: self.processor = processor
83 self.processor = listify(self.processor)
---> 84 for p in self.processor: p.process(self)
85 return self
86
~\Anaconda3\envs\fastai\lib\site-packages\fastai\text\data.py in process(self, ds)
295 tokens = []
296 for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
--> 297 tokens += self.tokenizer.process_all(ds.items[i:i+self.chunksize])
298 ds.items = tokens
299
~\Anaconda3\envs\fastai\lib\site-packages\fastai\text\transform.py in process_all(self, texts)
118 if self.n_cpus <= 1: return self._process_all_1(texts)
119 with ProcessPoolExecutor(self.n_cpus) as e:
--> 120 return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])
121
122 class Vocab():
~\Anaconda3\envs\fastai\lib\concurrent\futures\process.py in _chain_from_iterable_of_lists(iterable)
474 careful not to keep references to yielded objects.
475 """
--> 476 for element in iterable:
477 element.reverse()
478 while element:
~\Anaconda3\envs\fastai\lib\concurrent\futures_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
--> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.monotonic())
~\Anaconda3\envs\fastai\lib\concurrent\futures_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.
Further experimentation shows that the command sometimes succeeds without throwing the BrokenProcessPool error.
This is still a problem that should be addressed.
Breaking down the command that generates reviews_full into its three separate parts shows that the third part is the origin of the BrokenProcessPool error:
On Thu, Feb 27, 2020, 11:38 AM simon ***@***.***> wrote:
Is there any solution to this?
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<#32?email_source=notifications&email_token=ABOOLPUGOT2MVPRNWSWNIALRFAJEFA5CNFSM4KCGWA52YY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOENFVTDA#issuecomment-592140684>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/ABOOLPVEEHQWO2IVTMVBSFLRFAJEFANCNFSM4KCGWA5Q>
.
I'm still experiencing problems with label_from_folder that throws BrokenProcessPool (brute force method >1000 didnt work for me). Do you know any steps I can take to solve it?
In the
3-logreg-nb-imdb.ipynb
notebook from theCode-first Introduction to Natural Language Processing
course, a call toTextList.from_folder()
throws aBrokenProcessPool
error. I am running Windows 10 64-bit.Has anyone else encountered this problem and been able to solve it?
Below is the full error message:
BrokenProcessPool Traceback (most recent call last)
in
3 .split_by_folder(valid='test')
4 #split by train and valid folder (that only keeps 'train' and 'test' so no need to filter)
----> 5 .label_from_folder(classes=['neg', 'pos']))
6 #label them all with their folders
~\Anaconda3\envs\fastai\lib\site-packages\fastai\data_block.py in _inner(*args, **kwargs)
478 self.valid = fv(*args, from_item_lists=True, **kwargs)
479 self.class = LabelLists
--> 480 self.process()
481 return self
482 return _inner
~\Anaconda3\envs\fastai\lib\site-packages\fastai\data_block.py in process(self)
532 "Process the inner datasets."
533 xp,yp = self.get_processors()
--> 534 for ds,n in zip(self.lists, ['train','valid','test']): ds.process(xp, yp, name=n)
535 #progress_bar clear the outputs so in some case warnings issued during processing disappear.
536 for ds in self.lists:
~\Anaconda3\envs\fastai\lib\site-packages\fastai\data_block.py in process(self, xp, yp, name, max_warn_items)
712 p.warns = []
713 self.x,self.y = self.x[~filt],self.y[~filt]
--> 714 self.x.process(xp)
715 return self
716
~\Anaconda3\envs\fastai\lib\site-packages\fastai\data_block.py in process(self, processor)
82 if processor is not None: self.processor = processor
83 self.processor = listify(self.processor)
---> 84 for p in self.processor: p.process(self)
85 return self
86
~\Anaconda3\envs\fastai\lib\site-packages\fastai\text\data.py in process(self, ds)
295 tokens = []
296 for i in progress_bar(range(0,len(ds),self.chunksize), leave=False):
--> 297 tokens += self.tokenizer.process_all(ds.items[i:i+self.chunksize])
298 ds.items = tokens
299
~\Anaconda3\envs\fastai\lib\site-packages\fastai\text\transform.py in process_all(self, texts)
118 if self.n_cpus <= 1: return self._process_all_1(texts)
119 with ProcessPoolExecutor(self.n_cpus) as e:
--> 120 return sum(e.map(self._process_all_1, partition_by_cores(texts, self.n_cpus)), [])
121
122 class Vocab():
~\Anaconda3\envs\fastai\lib\concurrent\futures\process.py in _chain_from_iterable_of_lists(iterable)
474 careful not to keep references to yielded objects.
475 """
--> 476 for element in iterable:
477 element.reverse()
478 while element:
~\Anaconda3\envs\fastai\lib\concurrent\futures_base.py in result_iterator()
584 # Careful not to keep a reference to the popped future
585 if timeout is None:
--> 586 yield fs.pop().result()
587 else:
588 yield fs.pop().result(end_time - time.monotonic())
~\Anaconda3\envs\fastai\lib\concurrent\futures_base.py in result(self, timeout)
430 raise CancelledError()
431 elif self._state == FINISHED:
--> 432 return self.__get_result()
433 else:
434 raise TimeoutError()
~\Anaconda3\envs\fastai\lib\concurrent\futures_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
Further experimentation shows that the command sometimes succeeds without throwing the
BrokenProcessPool
error.This is still a problem that should be addressed.
Breaking down the command that generates
reviews_full
into its three separate parts shows that the third part is the origin of theBrokenProcessPool
error:reviews_full0 = TextList.from_folder(path)
reviews_full1 = reviews_full0.split_by_folder(valid=‘test’)
reviews_full = reviews_full1.label_from_folder(classes=[‘neg’, ‘pos’]))
The text was updated successfully, but these errors were encountered: