Skip to content

Commit

Permalink
create new advanced control GUI
Browse files Browse the repository at this point in the history
  • Loading branch information
Flux9665 committed Oct 7, 2024
1 parent 06b7d54 commit 627579e
Show file tree
Hide file tree
Showing 5 changed files with 685 additions and 2 deletions.
5 changes: 4 additions & 1 deletion InferenceInterfaces/ToucanTTSInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ def forward(self,
input_is_phones=False,
return_plot_as_filepath=False,
loudness_in_db=-29.0,
prosody_creativity=0.1):
prosody_creativity=0.1,
return_everything=False):
"""
duration_scaling_factor: reasonable values are 0.8 < scale < 1.2.
1.0 means no scaling happens, higher values increase durations for the whole
Expand Down Expand Up @@ -233,6 +234,8 @@ def forward(self,
plt.savefig("tmp.png")
plt.close()
return wave, sr, "tmp.png"
if return_everything:
return wave, mel, durations, pitch
return wave, sr

def read_to_file(self,
Expand Down
2 changes: 1 addition & 1 deletion Modules/ToucanTTS/InferenceToucanTTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def _forward(self,
mask=text_masks.float(),
n_timesteps=20,
temperature=prosody_creativity,
c=utterance_embedding)), min=0.0).long().squeeze(1) if gold_durations is None else gold_durations
c=utterance_embedding)), min=0.0).long().squeeze(1) if gold_durations is None else gold_durations.squeeze(1)

# modifying the predictions with control parameters
for phoneme_index, phoneme_vector in enumerate(text_tensors.squeeze(0)):
Expand Down
Binary file modified requirements.txt
Binary file not shown.
Loading

0 comments on commit 627579e

Please sign in to comment.