make multiprocessing in hifigan dataset creation optional

DigitalPhonetics · Jan 13, 2022 · 17d3dda · 17d3dda
1 parent b2c3e62
commit 17d3dda
Showing 1 changed file with 20 additions and 13 deletions.
diff --git a/TrainingInterfaces/Spectrogram_to_Wave/HiFIGAN/HiFiGANDataset.py b/TrainingInterfaces/Spectrogram_to_Wave/HiFIGAN/HiFiGANDataset.py
@@ -32,18 +32,22 @@ def __init__(self,
         # datasets and then concat them. If we just did all
         # datasets at once, there could be multiple sampling
         # rates.
-        resource_manager = Manager()
-        self.waves = resource_manager.list()
-        # make processes
-        path_splits = list()
-        process_list = list()
-        for i in range(loading_processes):
-            path_splits.append(list_of_paths[i * len(list_of_paths) // loading_processes:(i + 1) * len(list_of_paths) // loading_processes])
-        for path_split in path_splits:
-            process_list.append(Process(target=self.cache_builder_process, args=(path_split,), daemon=True))
-            process_list[-1].start()
-        for process in process_list:
-            process.join()
+        if loading_processes == 1:
+            self.waves = list()
+            self.cache_builder_process(list_of_paths)
+        else:
+            resource_manager = Manager()
+            self.waves = resource_manager.list()
+            # make processes
+            path_splits = list()
+            process_list = list()
+            for i in range(loading_processes):
+                path_splits.append(list_of_paths[i * len(list_of_paths) // loading_processes:(i + 1) * len(list_of_paths) // loading_processes])
+            for path_split in path_splits:
+                process_list.append(Process(target=self.cache_builder_process, args=(path_split,), daemon=True))
+                process_list[-1].start()
+            for process in process_list:
+                process.join()
         numpy_waves = list(self.waves)
         self.waves = list()
         for wave in numpy_waves:
@@ -56,7 +60,10 @@ def cache_builder_process(self, path_split):
                 wave, sr = sf.read(audio_file)
             if (len(wave) / sr) > ((self.samples_per_segment + 50) / self.desired_samplingrate):  # + 50 is just to be extra sure
                 # catch files that are too short to apply meaningful signal processing
-                self.waves.append(librosa.resample(y=wave, orig_sr=self._orig_sr, target_sr=self.desired_samplingrate))
+                try:
+                    self.waves.append(librosa.resample(y=wave, orig_sr=self._orig_sr, target_sr=self.desired_samplingrate))
+                except BrokenPipeError:
+                    print(f"Something is likely wrong with this file: {path} - Perhaps the audio is too short?")
 
     def __getitem__(self, index):
         """