make sure the channels are ordered correctly in a reference

DigitalPhonetics · Jun 27, 2024 · 7bc24a4 · 7bc24a4
1 parent ddda2c6
commit 7bc24a4
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/InferenceInterfaces/ToucanTTSInterface.py b/InferenceInterfaces/ToucanTTSInterface.py
@@ -105,6 +105,9 @@ def set_utterance_embedding(self, path_to_reference_audio="", embedding=None):
             speaker_embs = list()
             for path in path_to_reference_audio:
                 wave, sr = soundfile.read(path)
+                if len(wave.shape) > 1:  # oh no, we found a stereo audio!
+                    if len(wave[0]) == 2:  # let's figure out whether we need to switch the axes
+                        wave = wave.transpose()  # if yes, we switch the axes.
                 wave = librosa.to_mono(wave)
                 wave = Resample(orig_freq=sr, new_freq=16000).to(self.device)(torch.tensor(wave, device=self.device, dtype=torch.float32))
                 speaker_embedding = self.speaker_embedding_func_ecapa.encode_batch(wavs=wave.to(self.device).squeeze().unsqueeze(0)).squeeze()