diff --git a/.gitignore b/.gitignore
index b6e4761..cb4952c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,9 @@ dmypy.json
# Pyre type checker
.pyre/
+
+
+# Model files
+config.json
+speakers.pth
+model.pth
\ No newline at end of file
diff --git a/README.md b/README.md
index 3d66a3b..189a0e8 100644
--- a/README.md
+++ b/README.md
@@ -19,4 +19,7 @@ Online demo: https://huggingface.co/spaces/robinhad/qirimli-tts
# Attribution
-Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
+- Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)
+- Crimean Tatar dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
+- Huge thanks for voice to: Nuri, Arslan, Kemal
+- Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
diff --git a/app.py b/app.py
index c515556..d0e60e1 100644
--- a/app.py
+++ b/app.py
@@ -1,41 +1,88 @@
-from unittest import result
import gradio as gr
from crh_transliterator.transliterator import transliterate
from crh_preprocessor.preprocessor import preprocess
from datetime import datetime
+import tempfile
+import gradio as gr
+from datetime import datetime
+from enum import Enum
+from crh_tts.tts import TTS, Voices
+from torch.cuda import is_available
-def tts(text: str) -> str:
- result = transliterate(text)
- text = preprocess(result)
- print("============================")
- print("Original text:", text)
- print("Time:", datetime.utcnow())
- return text
+
+class VoiceOption(Enum):
+ Nuri = "Нурі (жіночий) 👩"
+ Arslan = "Арслан (чоловічий) 👨"
+ Kemal = "Кемаль (чоловічий) 👨"
+
+
+print(f"CUDA available? {is_available()}")
badge = (
"https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
)
+crh_tts = TTS(use_cuda=is_available())
+
+
+def tts(text: str, voice: str):
+ print("============================")
+ print("Original text:", text)
+ print("Voice", voice)
+ print("Time:", datetime.utcnow())
+
+ voice_mapping = {
+ VoiceOption.Nuri.value: Voices.Nuri.value,
+ VoiceOption.Arslan.value: Voices.Arslan.value,
+ VoiceOption.Kemal.value: Voices.Kemal.value,
+ }
+
+ speaker_name = voice_mapping[voice]
+ text_limit = 7200
+ text = (
+ text if len(text) < text_limit else text[0:text_limit]
+ ) # mitigate crashes on hf space
+ result = transliterate(text)
+ text = preprocess(result)
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+ _, text = crh_tts.tts(text, speaker_name, fp)
+ return fp.name, text
+
+
with open("README.md") as file:
article = file.read()
article = article[article.find("---\n", 4) + 5 : :]
+
iface = gr.Interface(
fn=tts,
inputs=[
gr.components.Textbox(
label="Input",
- value="Please input your sentence.",
+ value="Qırımtatarlar! Селям! Ишлер насыл?",
+ ),
+ gr.components.Radio(
+ label="Голос",
+ choices=[option.value for option in VoiceOption],
+ value=VoiceOption.Nuri.value,
),
],
- outputs="text",
- examples=[
- ["Selâm! İşler nasıl?"],
- ["Sağlıqnen qalıñız! Sağlıqnen barıñız! "],
- ["Селям! Ишлер насыл?"],
+ outputs=[
+ gr.components.Audio(label="Output"),
+ gr.components.Textbox(label="Наголошений текст"),
],
+ title="Кримськотатарський синтез мовлення",
+ description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS",
article=article + f'\n
',
+ examples=[
+ ["Selâm! İşler nasıl?", VoiceOption.Kemal.value],
+ [
+ "Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.",
+ VoiceOption.Arslan.value,
+ ],
+ ["Селям! Ишлер насыл?", VoiceOption.Nuri.value],
+ ],
)
iface.launch()
diff --git a/crh_tts/__init__.py b/crh_tts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/crh_tts/tts.py b/crh_tts/tts.py
new file mode 100644
index 0000000..7632a8d
--- /dev/null
+++ b/crh_tts/tts.py
@@ -0,0 +1,84 @@
+from io import BytesIO
+import requests
+from os.path import exists, join
+from TTS.utils.synthesizer import Synthesizer
+from enum import Enum
+from crh_preprocessor.preprocessor import preprocess
+from torch import no_grad
+
+
+class Voices(Enum):
+ """List of available voices for the model."""
+
+ Arslan = "arslan"
+ Nuri = "nuri"
+ Kemal = "kemal"
+
+
+class TTS:
+ """ """
+
+ def __init__(self, use_cuda=False) -> None:
+ """
+ Class to setup a text-to-speech engine, from download to model creation. \n
+ Downloads or uses files from `cache_folder` directory. \n
+ By default stores in current directory."""
+ self.__setup_cache(use_cuda=use_cuda)
+
+ def tts(self, text: str, voice: str, output_fp=BytesIO()):
+ """
+ Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
+ - `text` - your model input text.
+ - `voice` - one of predefined voices from `Voices` enum.
+ - `output_fp` - file-like object output. Stores in RAM by default.
+ """
+
+ if voice not in [option.value for option in Voices]:
+ raise ValueError(
+ f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}."
+ )
+
+ text = preprocess(text)
+
+ with no_grad():
+ wavs = self.synthesizer.tts(text, speaker_name=voice)
+ self.synthesizer.save_wav(wavs, output_fp)
+
+ output_fp.seek(0)
+
+ return output_fp, text
+
+ def __setup_cache(self, use_cuda=False):
+ """Downloads models and stores them into `cache_folder`. By default stores in current directory."""
+ print("downloading uk/crh/vits-tts")
+ release_number = "v0.0.1"
+ model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth"
+ config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json"
+ speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth"
+
+ cache_folder = "."
+
+ model_path = join(cache_folder, "model.pth")
+ config_path = join(cache_folder, "config.json")
+ speakers_path = join(cache_folder, "speakers.pth")
+
+ self.__download(model_link, model_path)
+ self.__download(config_link, config_path)
+ self.__download(speakers_link, speakers_path)
+
+ self.synthesizer = Synthesizer(
+ model_path, config_path, speakers_path, None, None, use_cuda=use_cuda
+ )
+
+ if self.synthesizer is None:
+ raise NameError("Model not found")
+
+ def __download(self, url, file_name):
+ """Downloads file from `url` into local `file_name` file."""
+ if not exists(file_name):
+ print(f"Downloading {file_name}")
+ r = requests.get(url, allow_redirects=True)
+ with open(file_name, "wb") as file:
+ file.write(r.content)
+ else:
+ print(f"Found {file_name}. Skipping download...")
diff --git a/requirements.txt b/requirements.txt
index a4e28ec..df28115 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
-gradio==3.6
\ No newline at end of file
+gradio==3.6
+torch>=1.13
+TTS==0.8.0
\ No newline at end of file