forked from everydaycodings/MimicMania
-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
69 lines (47 loc) · 3.18 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import streamlit as st
from helpers import ProcessModelList, ConvertTextToSpeech, AudioClonning, download_audio_file
import io
st.sidebar.title("Welcome to :blue[MimicMania]")
sidebar_options = ["Text To Speech", "Voice Clonning"]
choice = st.sidebar.selectbox(label="Select Your Usecase: ", options=sidebar_options)
if choice == sidebar_options[0]:
st.title("Convert your Text to Speech")
model_list = ProcessModelList()
selected_language = st.selectbox("Select The Language: ", options=model_list.get_langauge_labels())
selected_model = st.selectbox("Select The Model: ", options=model_list.get_model_name(selected_language=selected_language))
selected_model_path = model_list.get_model_path(selected_language=selected_language, selected_model=selected_model)
multi_speaker, multi_speaker_list = model_list.get_multi_speaker_model(model_path=selected_model_path)
if selected_language == "Multi Language":
speakers, languages = model_list.multi_language_selected(model_path=selected_model_path)
selected_speaker = st.selectbox("Select the voice: ", options=speakers)
selected_speaker_language = st.selectbox("Select the Speaker: ", options=languages)
elif multi_speaker == True:
selected_speaker = st.selectbox("Select the voice: ", options=multi_speaker_list)
else:
selected_speaker = None
selected_speaker_language = None
text = st.text_area("Enter Your Text which you want to convert to audio.")
if st.button("Convert"):
text_to_speech = ConvertTextToSpeech(model_name=selected_model, model_path=selected_model_path, text=text)
if selected_language == "Multi Language" or selected_language == "Popular Person":
text_to_speech.convert_text_to_speech_multi_langauge(speaker=selected_speaker, language=selected_speaker_language, model_name=selected_model, selected_langauge=selected_language)
else:
text_to_speech.convert_text_to_speech(speaker_id=selected_speaker)
read_audio = text_to_speech.read_audio_file()
st.audio(read_audio, format='audio/wav')
download_audio_file(audio=read_audio, file_name="TTS")
elif choice == sidebar_options[1]:
st.title("Clone Anyone's Voice")
st.subheader("The Better the quality and duration of the data the more realistic the sound will be.")
uploaded_music = st.file_uploader(label="Upload Your Audio File: ", type=["mp3", "wav"])
text = st.text_area(label="Enter The text you want to convert: ")
emotion = "Neutral"#st.selectbox(label="Select What will the voice emotion: ", options=["Neutral", "Happy", "Sad", "Angry", "Surprise", "Dull"])
if st.button("Start Clonning"):
if uploaded_music is not None:
audio_filename = uploaded_music.name
audio = io.BytesIO(uploaded_music.read())
audio_clonning = AudioClonning(audio=audio, audio_filename=audio_filename, text=text, emotion=emotion)
cloned_voice = audio_clonning.convert_text_to_speech()
#cloned_voice = audio_clonning.emotion_modification()
st.audio(cloned_voice, format="audio/wav")
download_audio_file(audio=cloned_voice, file_name="Voice-Cloned")