Skip to content

Commit

Permalink
Add Android Demo for MatchaTTS models.
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Jan 5, 2025
1 parent 3eced3e commit 5c892ed
Show file tree
Hide file tree
Showing 10 changed files with 219 additions and 42 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/apk-tts-engine.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
total: ["40"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
# total: ["40"]
# index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
total: ["1"]
index: ["0"]

steps:
- uses: actions/checkout@v4
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/apk-tts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
total: ["40"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
# total: ["40"]
# index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
total: ["1"]
index: ["0"]

steps:
- uses: actions/checkout@v4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ class MainActivity : AppCompatActivity() {
private fun initTts() {
var modelDir: String?
var modelName: String?
var acousticModelName: String?
var vocoder: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
Expand All @@ -193,8 +195,18 @@ class MainActivity : AppCompatActivity() {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null

// VITS -- begin
modelName = null
// VITS -- end

// Matcha -- begin
acousticModelName = null
vocoder = null
// Matcha -- end


modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
Expand All @@ -217,7 +229,6 @@ class MainActivity : AppCompatActivity() {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"

Expand All @@ -233,24 +244,47 @@ class MainActivity : AppCompatActivity() {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"

// Example 6
// vits-melo-tts-zh_en
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
// modelDir = "vits-melo-tts-zh_en"
// modelName = "model.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-melo-tts-zh_en/dict"

// Example 7
// matcha-icefall-zh-baker
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = "matcha-icefall-zh-baker"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// lexicon = "lexicon.txt"
// dictDir = "matcha-icefall-zh-baker/dict"

// Example 8
// matcha-icefall-en_US-ljspeech
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = "matcha-icefall-en_US-ljspeech"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"

if (dataDir != null) {
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
val newDir = copyDataDir(dataDir!!)
dataDir = "$newDir/$dataDir"
}

if (dictDir != null) {
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
val newDir = copyDataDir(dictDir!!)
dictDir = "$newDir/$dictDir"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!,
modelName = modelName!!,
modelName = modelName ?: "",
acousticModelName = acousticModelName ?: "",
vocoder = vocoder ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ class MainActivity : ComponentActivity() {
color = MaterialTheme.colorScheme.background
) {
Scaffold(topBar = {
TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
}) {
Box(modifier = Modifier.padding(it)) {
Column(modifier = Modifier.padding(16.dp)) {
Column {
Text("Speed " + String.format("%.1f", TtsEngine.speed))
Slider(
value = TtsEngine.speedState.value,
onValueChange = {
TtsEngine.speed = it
onValueChange = {
TtsEngine.speed = it
preferenceHelper.setSpeed(it)
},
valueRange = 0.2F..3.0F,
Expand Down Expand Up @@ -138,7 +138,9 @@ class MainActivity : ComponentActivity() {
val filename =
application.filesDir.absolutePath + "/generated.wav"
val ok =
audio.samples.isNotEmpty() && audio.save(filename)
audio.samples.isNotEmpty() && audio.save(
filename
)

if (ok) {
stopMediaPlayer()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.k2fsa.sherpa.onnx.tts.engine

import PreferenceHelper
import android.content.Context
import android.content.res.AssetManager
import android.util.Log
Expand All @@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
import PreferenceHelper

object TtsEngine {
var tts: OfflineTts? = null
Expand Down Expand Up @@ -41,6 +41,8 @@ object TtsEngine {

private var modelDir: String? = null
private var modelName: String? = null
private var acousticModelName: String? = null
private var vocoder: String? = null
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
Expand All @@ -52,8 +54,17 @@ object TtsEngine {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null
//
// For VITS -- begin
modelName = null
// For VITS -- end

// For Matcha -- begin
acousticModelName = null
vocoder = null
// For Matcha -- end

modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
Expand Down Expand Up @@ -82,7 +93,6 @@ object TtsEngine {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"
Expand All @@ -101,8 +111,35 @@ object TtsEngine {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// lang = "deu"
}

// Example 6
// vits-melo-tts-zh_en
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
// modelDir = "vits-melo-tts-zh_en"
// modelName = "model.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-melo-tts-zh_en/dict"
// lang = "zho"

// Example 7
// matcha-icefall-zh-baker
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = "matcha-icefall-zh-baker"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// lexicon = "lexicon.txt"
// dictDir = "matcha-icefall-zh-baker/dict"
// lang = "zho"

// Example 8
// matcha-icefall-en_US-ljspeech
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = "matcha-icefall-en_US-ljspeech"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
// lang = "eng"
}

fun createTts(context: Context) {
Log.i(TAG, "Init Next-gen Kaldi TTS")
Expand All @@ -115,22 +152,22 @@ object TtsEngine {
assets = context.assets

if (dataDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = "$newDir/$modelDir"
val newDir = copyDataDir(context, dataDir!!)
dataDir = "$newDir/$dataDir"
assets = null
}

if (dictDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = "$newDir/$modelDir"
dictDir = "$modelDir/dict"
val newDir = copyDataDir(context, dictDir!!)
dictDir = "$newDir/$dictDir"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
modelDir = modelDir!!,
modelName = modelName ?: "",
acousticModelName = acousticModelName ?: "",
vocoder = vocoder ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
Expand Down
15 changes: 14 additions & 1 deletion scripts/apk/build-apk-tts-engine.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ mkdir -p apks
pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
lang={{ tts_model.lang }}
lang_iso_639_3={{ tts_model.lang_iso_639_3 }}

Expand All @@ -50,9 +52,20 @@ popd
git checkout .
pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt

{% if tts_model.model_name %}
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.model_name %}
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.vocoder %}
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.rule_fsts %}
rule_fsts={{ tts_model.rule_fsts }}
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
Expand Down
17 changes: 16 additions & 1 deletion scripts/apk/build-apk-tts.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ mkdir -p apks
pushd ./android/SherpaOnnxTts/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
lang={{ tts_model.lang }}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
Expand All @@ -49,7 +51,20 @@ popd
git checkout .
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt


{% if tts_model.model_name %}
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.acoustic_model_name %}
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.vocoder %}
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./MainActivity.kt
{% endif %}


{% if tts_model.rule_fsts %}
rule_fsts={{ tts_model.rule_fsts }}
Expand Down
37 changes: 36 additions & 1 deletion scripts/apk/generate-tts-apk-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def get_args():
@dataclass
class TtsModel:
model_dir: str
model_name: str = ""
model_name: str = "" # for vits
acoustic_model_name: str = "" # for matcha
vocoder: str = "" # for matcha
lang: str = "" # en, zh, fr, de, etc.
rule_fsts: Optional[List[str]] = None
rule_fars: Optional[List[str]] = None
Expand Down Expand Up @@ -378,6 +380,35 @@ def get_vits_models() -> List[TtsModel]:
return all_models


def get_matcha_models() -> List[TtsModel]:
chinese_models = [
TtsModel(
model_dir="matcha-icefall-zh-baker",
acoustic_model_name="model-steps-3.onnx",
lang="zh",
)
]
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
for m in chinese_models:
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
m.rule_fsts = ",".join(s)
m.dict_dir = m.model_dir + "/dict"
m.vocoder = "hifigan_v2.onnx"

english_models = [
TtsModel(
model_dir="matcha-icefall-en_US-ljspeech",
acoustic_model_name="model-steps-3.onnx",
lang="en",
)
]
for m in english_models:
m.data_dir = f"{m.model_dir}/espeak-ng-data"
m.vocoder = "hifigan_v2.onnx"

return chinese_models + english_models


def main():
args = get_args()
index = args.index
Expand All @@ -389,7 +420,11 @@ def main():
all_model_list += get_piper_models()
all_model_list += get_mimic3_models()
all_model_list += get_coqui_models()

all_model_list = get_matcha_models()

convert_lang_to_iso_639_3(all_model_list)
print(all_model_list)

num_models = len(all_model_list)

Expand Down
Loading

0 comments on commit 5c892ed

Please sign in to comment.