diff --git a/.gitignore b/.gitignore index b6e4761..cb4952c 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,9 @@ dmypy.json # Pyre type checker .pyre/ + + +# Model files +config.json +speakers.pth +model.pth \ No newline at end of file diff --git a/README.md b/README.md index 3d66a3b..189a0e8 100644 --- a/README.md +++ b/README.md @@ -19,4 +19,7 @@ Online demo: https://huggingface.co/spaces/robinhad/qirimli-tts # Attribution -Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration) +- Model training - [Yurii Paniv @robinhad](https://github.com/robinhad) +- Crimean Tatar dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) +- Huge thanks for voice to: Nuri, Arslan, Kemal +- Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration) diff --git a/app.py b/app.py index c515556..d0e60e1 100644 --- a/app.py +++ b/app.py @@ -1,41 +1,88 @@ -from unittest import result import gradio as gr from crh_transliterator.transliterator import transliterate from crh_preprocessor.preprocessor import preprocess from datetime import datetime +import tempfile +import gradio as gr +from datetime import datetime +from enum import Enum +from crh_tts.tts import TTS, Voices +from torch.cuda import is_available -def tts(text: str) -> str: - result = transliterate(text) - text = preprocess(result) - print("============================") - print("Original text:", text) - print("Time:", datetime.utcnow()) - return text + +class VoiceOption(Enum): + Nuri = "Нурі (жіночий) 👩" + Arslan = "Арслан (чоловічий) 👨" + Kemal = "Кемаль (чоловічий) 👨" + + +print(f"CUDA available? {is_available()}") badge = ( "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts" ) +crh_tts = TTS(use_cuda=is_available()) + + +def tts(text: str, voice: str): + print("============================") + print("Original text:", text) + print("Voice", voice) + print("Time:", datetime.utcnow()) + + voice_mapping = { + VoiceOption.Nuri.value: Voices.Nuri.value, + VoiceOption.Arslan.value: Voices.Arslan.value, + VoiceOption.Kemal.value: Voices.Kemal.value, + } + + speaker_name = voice_mapping[voice] + text_limit = 7200 + text = ( + text if len(text) < text_limit else text[0:text_limit] + ) # mitigate crashes on hf space + result = transliterate(text) + text = preprocess(result) + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: + _, text = crh_tts.tts(text, speaker_name, fp) + return fp.name, text + + with open("README.md") as file: article = file.read() article = article[article.find("---\n", 4) + 5 : :] + iface = gr.Interface( fn=tts, inputs=[ gr.components.Textbox( label="Input", - value="Please input your sentence.", + value="Qırımtatarlar! Селям! Ишлер насыл?", + ), + gr.components.Radio( + label="Голос", + choices=[option.value for option in VoiceOption], + value=VoiceOption.Nuri.value, ), ], - outputs="text", - examples=[ - ["Selâm! İşler nasıl?"], - ["Sağlıqnen qalıñız! Sağlıqnen barıñız! "], - ["Селям! Ишлер насыл?"], + outputs=[ + gr.components.Audio(label="Output"), + gr.components.Textbox(label="Наголошений текст"), ], + title="Кримськотатарський синтез мовлення", + description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS", article=article + f'\n
visitors badge
', + examples=[ + ["Selâm! İşler nasıl?", VoiceOption.Kemal.value], + [ + "Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.", + VoiceOption.Arslan.value, + ], + ["Селям! Ишлер насыл?", VoiceOption.Nuri.value], + ], ) iface.launch() diff --git a/crh_tts/__init__.py b/crh_tts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/crh_tts/tts.py b/crh_tts/tts.py new file mode 100644 index 0000000..7632a8d --- /dev/null +++ b/crh_tts/tts.py @@ -0,0 +1,84 @@ +from io import BytesIO +import requests +from os.path import exists, join +from TTS.utils.synthesizer import Synthesizer +from enum import Enum +from crh_preprocessor.preprocessor import preprocess +from torch import no_grad + + +class Voices(Enum): + """List of available voices for the model.""" + + Arslan = "arslan" + Nuri = "nuri" + Kemal = "kemal" + + +class TTS: + """ """ + + def __init__(self, use_cuda=False) -> None: + """ + Class to setup a text-to-speech engine, from download to model creation. \n + Downloads or uses files from `cache_folder` directory. \n + By default stores in current directory.""" + self.__setup_cache(use_cuda=use_cuda) + + def tts(self, text: str, voice: str, output_fp=BytesIO()): + """ + Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object. + - `text` - your model input text. + - `voice` - one of predefined voices from `Voices` enum. + - `output_fp` - file-like object output. Stores in RAM by default. + """ + + if voice not in [option.value for option in Voices]: + raise ValueError( + f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}." + ) + + text = preprocess(text) + + with no_grad(): + wavs = self.synthesizer.tts(text, speaker_name=voice) + self.synthesizer.save_wav(wavs, output_fp) + + output_fp.seek(0) + + return output_fp, text + + def __setup_cache(self, use_cuda=False): + """Downloads models and stores them into `cache_folder`. By default stores in current directory.""" + print("downloading uk/crh/vits-tts") + release_number = "v0.0.1" + model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth" + config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json" + speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth" + + cache_folder = "." + + model_path = join(cache_folder, "model.pth") + config_path = join(cache_folder, "config.json") + speakers_path = join(cache_folder, "speakers.pth") + + self.__download(model_link, model_path) + self.__download(config_link, config_path) + self.__download(speakers_link, speakers_path) + + self.synthesizer = Synthesizer( + model_path, config_path, speakers_path, None, None, use_cuda=use_cuda + ) + + if self.synthesizer is None: + raise NameError("Model not found") + + def __download(self, url, file_name): + """Downloads file from `url` into local `file_name` file.""" + if not exists(file_name): + print(f"Downloading {file_name}") + r = requests.get(url, allow_redirects=True) + with open(file_name, "wb") as file: + file.write(r.content) + else: + print(f"Found {file_name}. Skipping download...") diff --git a/requirements.txt b/requirements.txt index a4e28ec..df28115 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ -gradio==3.6 \ No newline at end of file +gradio==3.6 +torch>=1.13 +TTS==0.8.0 \ No newline at end of file