-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
158 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,3 +127,9 @@ dmypy.json | |
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
|
||
# Model files | ||
config.json | ||
speakers.pth | ||
model.pth |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,88 @@ | ||
from unittest import result | ||
import gradio as gr | ||
from crh_transliterator.transliterator import transliterate | ||
from crh_preprocessor.preprocessor import preprocess | ||
from datetime import datetime | ||
|
||
import tempfile | ||
import gradio as gr | ||
from datetime import datetime | ||
from enum import Enum | ||
from crh_tts.tts import TTS, Voices | ||
from torch.cuda import is_available | ||
|
||
def tts(text: str) -> str: | ||
result = transliterate(text) | ||
text = preprocess(result) | ||
print("============================") | ||
print("Original text:", text) | ||
print("Time:", datetime.utcnow()) | ||
return text | ||
|
||
class VoiceOption(Enum): | ||
Nuri = "Нурі (жіночий) 👩" | ||
Arslan = "Арслан (чоловічий) 👨" | ||
Kemal = "Кемаль (чоловічий) 👨" | ||
|
||
|
||
print(f"CUDA available? {is_available()}") | ||
|
||
|
||
badge = ( | ||
"https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts" | ||
) | ||
|
||
crh_tts = TTS(use_cuda=is_available()) | ||
|
||
|
||
def tts(text: str, voice: str): | ||
print("============================") | ||
print("Original text:", text) | ||
print("Voice", voice) | ||
print("Time:", datetime.utcnow()) | ||
|
||
voice_mapping = { | ||
VoiceOption.Nuri.value: Voices.Nuri.value, | ||
VoiceOption.Arslan.value: Voices.Arslan.value, | ||
VoiceOption.Kemal.value: Voices.Kemal.value, | ||
} | ||
|
||
speaker_name = voice_mapping[voice] | ||
text_limit = 7200 | ||
text = ( | ||
text if len(text) < text_limit else text[0:text_limit] | ||
) # mitigate crashes on hf space | ||
result = transliterate(text) | ||
text = preprocess(result) | ||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | ||
_, text = crh_tts.tts(text, speaker_name, fp) | ||
return fp.name, text | ||
|
||
|
||
with open("README.md") as file: | ||
article = file.read() | ||
article = article[article.find("---\n", 4) + 5 : :] | ||
|
||
|
||
iface = gr.Interface( | ||
fn=tts, | ||
inputs=[ | ||
gr.components.Textbox( | ||
label="Input", | ||
value="Please input your sentence.", | ||
value="Qırımtatarlar! Селям! Ишлер насыл?", | ||
), | ||
gr.components.Radio( | ||
label="Голос", | ||
choices=[option.value for option in VoiceOption], | ||
value=VoiceOption.Nuri.value, | ||
), | ||
], | ||
outputs="text", | ||
examples=[ | ||
["Selâm! İşler nasıl?"], | ||
["Sağlıqnen qalıñız! Sağlıqnen barıñız! "], | ||
["Селям! Ишлер насыл?"], | ||
outputs=[ | ||
gr.components.Audio(label="Output"), | ||
gr.components.Textbox(label="Наголошений текст"), | ||
], | ||
title="Кримськотатарський синтез мовлення", | ||
description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS", | ||
article=article + f'\n <center><img src="{badge}" alt="visitors badge"/></center>', | ||
examples=[ | ||
["Selâm! İşler nasıl?", VoiceOption.Kemal.value], | ||
[ | ||
"Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.", | ||
VoiceOption.Arslan.value, | ||
], | ||
["Селям! Ишлер насыл?", VoiceOption.Nuri.value], | ||
], | ||
) | ||
iface.launch() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
from io import BytesIO | ||
import requests | ||
from os.path import exists, join | ||
from TTS.utils.synthesizer import Synthesizer | ||
from enum import Enum | ||
from crh_preprocessor.preprocessor import preprocess | ||
from torch import no_grad | ||
|
||
|
||
class Voices(Enum): | ||
"""List of available voices for the model.""" | ||
|
||
Arslan = "arslan" | ||
Nuri = "nuri" | ||
Kemal = "kemal" | ||
|
||
|
||
class TTS: | ||
""" """ | ||
|
||
def __init__(self, use_cuda=False) -> None: | ||
""" | ||
Class to setup a text-to-speech engine, from download to model creation. \n | ||
Downloads or uses files from `cache_folder` directory. \n | ||
By default stores in current directory.""" | ||
self.__setup_cache(use_cuda=use_cuda) | ||
|
||
def tts(self, text: str, voice: str, output_fp=BytesIO()): | ||
""" | ||
Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object. | ||
- `text` - your model input text. | ||
- `voice` - one of predefined voices from `Voices` enum. | ||
- `output_fp` - file-like object output. Stores in RAM by default. | ||
""" | ||
|
||
if voice not in [option.value for option in Voices]: | ||
raise ValueError( | ||
f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}." | ||
) | ||
|
||
text = preprocess(text) | ||
|
||
with no_grad(): | ||
wavs = self.synthesizer.tts(text, speaker_name=voice) | ||
self.synthesizer.save_wav(wavs, output_fp) | ||
|
||
output_fp.seek(0) | ||
|
||
return output_fp, text | ||
|
||
def __setup_cache(self, use_cuda=False): | ||
"""Downloads models and stores them into `cache_folder`. By default stores in current directory.""" | ||
print("downloading uk/crh/vits-tts") | ||
release_number = "v0.0.1" | ||
model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth" | ||
config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json" | ||
speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth" | ||
|
||
cache_folder = "." | ||
|
||
model_path = join(cache_folder, "model.pth") | ||
config_path = join(cache_folder, "config.json") | ||
speakers_path = join(cache_folder, "speakers.pth") | ||
|
||
self.__download(model_link, model_path) | ||
self.__download(config_link, config_path) | ||
self.__download(speakers_link, speakers_path) | ||
|
||
self.synthesizer = Synthesizer( | ||
model_path, config_path, speakers_path, None, None, use_cuda=use_cuda | ||
) | ||
|
||
if self.synthesizer is None: | ||
raise NameError("Model not found") | ||
|
||
def __download(self, url, file_name): | ||
"""Downloads file from `url` into local `file_name` file.""" | ||
if not exists(file_name): | ||
print(f"Downloading {file_name}") | ||
r = requests.get(url, allow_redirects=True) | ||
with open(file_name, "wb") as file: | ||
file.write(r.content) | ||
else: | ||
print(f"Found {file_name}. Skipping download...") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
gradio==3.6 | ||
gradio==3.6 | ||
torch>=1.13 | ||
TTS==0.8.0 |