From 7bc24a4f61c4d8336789f8b04a6805f4deae5da9 Mon Sep 17 00:00:00 2001 From: Florian Lux Date: Fri, 28 Jun 2024 00:52:09 +0200 Subject: [PATCH] make sure the channels are ordered correctly in a reference --- InferenceInterfaces/ToucanTTSInterface.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/InferenceInterfaces/ToucanTTSInterface.py b/InferenceInterfaces/ToucanTTSInterface.py index 20623505..5cde6e10 100644 --- a/InferenceInterfaces/ToucanTTSInterface.py +++ b/InferenceInterfaces/ToucanTTSInterface.py @@ -105,6 +105,9 @@ def set_utterance_embedding(self, path_to_reference_audio="", embedding=None): speaker_embs = list() for path in path_to_reference_audio: wave, sr = soundfile.read(path) + if len(wave.shape) > 1: # oh no, we found a stereo audio! + if len(wave[0]) == 2: # let's figure out whether we need to switch the axes + wave = wave.transpose() # if yes, we switch the axes. wave = librosa.to_mono(wave) wave = Resample(orig_freq=sr, new_freq=16000).to(self.device)(torch.tensor(wave, device=self.device, dtype=torch.float32)) speaker_embedding = self.speaker_embedding_func_ecapa.encode_batch(wavs=wave.to(self.device).squeeze().unsqueeze(0)).squeeze()