Skip to content

Commit

Permalink
Add initial model
Browse files Browse the repository at this point in the history
  • Loading branch information
robinhad committed Nov 4, 2022
1 parent b812929 commit 01e1229
Show file tree
Hide file tree
Showing 6 changed files with 158 additions and 16 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,9 @@ dmypy.json

# Pyre type checker
.pyre/


# Model files
config.json
speakers.pth
model.pth
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,7 @@ Online demo: https://huggingface.co/spaces/robinhad/qirimli-tts

# Attribution

Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
- Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)
- Crimean Tatar dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
- Huge thanks for voice to: Nuri, Arslan, Kemal
- Transliteration: [prosvita/crh.transliteration](https://github.com/prosvita/crh.transliteration)
75 changes: 61 additions & 14 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,88 @@
from unittest import result
import gradio as gr
from crh_transliterator.transliterator import transliterate
from crh_preprocessor.preprocessor import preprocess
from datetime import datetime

import tempfile
import gradio as gr
from datetime import datetime
from enum import Enum
from crh_tts.tts import TTS, Voices
from torch.cuda import is_available

def tts(text: str) -> str:
result = transliterate(text)
text = preprocess(result)
print("============================")
print("Original text:", text)
print("Time:", datetime.utcnow())
return text

class VoiceOption(Enum):
Nuri = "Нурі (жіночий) 👩"
Arslan = "Арслан (чоловічий) 👨"
Kemal = "Кемаль (чоловічий) 👨"


print(f"CUDA available? {is_available()}")


badge = (
"https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.qirimli-tts"
)

crh_tts = TTS(use_cuda=is_available())


def tts(text: str, voice: str):
print("============================")
print("Original text:", text)
print("Voice", voice)
print("Time:", datetime.utcnow())

voice_mapping = {
VoiceOption.Nuri.value: Voices.Nuri.value,
VoiceOption.Arslan.value: Voices.Arslan.value,
VoiceOption.Kemal.value: Voices.Kemal.value,
}

speaker_name = voice_mapping[voice]
text_limit = 7200
text = (
text if len(text) < text_limit else text[0:text_limit]
) # mitigate crashes on hf space
result = transliterate(text)
text = preprocess(result)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
_, text = crh_tts.tts(text, speaker_name, fp)
return fp.name, text


with open("README.md") as file:
article = file.read()
article = article[article.find("---\n", 4) + 5 : :]


iface = gr.Interface(
fn=tts,
inputs=[
gr.components.Textbox(
label="Input",
value="Please input your sentence.",
value="Qırımtatarlar! Селям! Ишлер насыл?",
),
gr.components.Radio(
label="Голос",
choices=[option.value for option in VoiceOption],
value=VoiceOption.Nuri.value,
),
],
outputs="text",
examples=[
["Selâm! İşler nasıl?"],
["Sağlıqnen qalıñız! Sağlıqnen barıñız! "],
["Селям! Ишлер насыл?"],
outputs=[
gr.components.Audio(label="Output"),
gr.components.Textbox(label="Наголошений текст"),
],
title="Кримськотатарський синтез мовлення",
description="Кримськотатарський Text-to-Speech за допомогою Coqui TTS",
article=article + f'\n <center><img src="{badge}" alt="visitors badge"/></center>',
examples=[
["Selâm! İşler nasıl?", VoiceOption.Kemal.value],
[
"Qırımtatarlar üç subetnik gruppasından er birisiniñ (tatlar, noğaylar ve yalıboylular) öz şivesi bar.",
VoiceOption.Arslan.value,
],
["Селям! Ишлер насыл?", VoiceOption.Nuri.value],
],
)
iface.launch()
Empty file added crh_tts/__init__.py
Empty file.
84 changes: 84 additions & 0 deletions crh_tts/tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from io import BytesIO
import requests
from os.path import exists, join
from TTS.utils.synthesizer import Synthesizer
from enum import Enum
from crh_preprocessor.preprocessor import preprocess
from torch import no_grad


class Voices(Enum):
"""List of available voices for the model."""

Arslan = "arslan"
Nuri = "nuri"
Kemal = "kemal"


class TTS:
""" """

def __init__(self, use_cuda=False) -> None:
"""
Class to setup a text-to-speech engine, from download to model creation. \n
Downloads or uses files from `cache_folder` directory. \n
By default stores in current directory."""
self.__setup_cache(use_cuda=use_cuda)

def tts(self, text: str, voice: str, output_fp=BytesIO()):
"""
Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
- `text` - your model input text.
- `voice` - one of predefined voices from `Voices` enum.
- `output_fp` - file-like object output. Stores in RAM by default.
"""

if voice not in [option.value for option in Voices]:
raise ValueError(
f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}."
)

text = preprocess(text)

with no_grad():
wavs = self.synthesizer.tts(text, speaker_name=voice)
self.synthesizer.save_wav(wavs, output_fp)

output_fp.seek(0)

return output_fp, text

def __setup_cache(self, use_cuda=False):
"""Downloads models and stores them into `cache_folder`. By default stores in current directory."""
print("downloading uk/crh/vits-tts")
release_number = "v0.0.1"
model_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/model.pth"
config_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/config.json"
speakers_link = f"https://github.com/robinhad/qirimli-tts/releases/download/{release_number}/speakers.pth"

cache_folder = "."

model_path = join(cache_folder, "model.pth")
config_path = join(cache_folder, "config.json")
speakers_path = join(cache_folder, "speakers.pth")

self.__download(model_link, model_path)
self.__download(config_link, config_path)
self.__download(speakers_link, speakers_path)

self.synthesizer = Synthesizer(
model_path, config_path, speakers_path, None, None, use_cuda=use_cuda
)

if self.synthesizer is None:
raise NameError("Model not found")

def __download(self, url, file_name):
"""Downloads file from `url` into local `file_name` file."""
if not exists(file_name):
print(f"Downloading {file_name}")
r = requests.get(url, allow_redirects=True)
with open(file_name, "wb") as file:
file.write(r.content)
else:
print(f"Found {file_name}. Skipping download...")
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
gradio==3.6
gradio==3.6
torch>=1.13
TTS==0.8.0

0 comments on commit 01e1229

Please sign in to comment.