From d1a450bf825b62a327e6678cfbe124b6cf29a107 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 5 Nov 2023 08:59:03 +0800 Subject: [PATCH] Support text normalization via rule FST (#407) --- CMakeLists.txt | 5 ++- cmake/kaldifst.cmake | 16 ++++----- cmake/onnxruntime-linux-aarch64.cmake | 2 +- cmake/onnxruntime-linux-x86_64-gpu.cmake | 2 +- cmake/onnxruntime-linux-x86_64.cmake | 2 +- cmake/onnxruntime-osx-arm64.cmake | 2 +- cmake/onnxruntime-osx-universal.cmake | 2 +- cmake/onnxruntime-osx-x86_64.cmake | 2 +- sherpa-onnx/csrc/offline-tts-vits-impl.h | 46 +++++++++++++++++++++--- sherpa-onnx/csrc/offline-tts.cc | 31 ++++++++++++++-- sherpa-onnx/csrc/offline-tts.h | 11 ++++-- sherpa-onnx/python/csrc/offline-tts.cc | 6 +++- 12 files changed, 101 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b2a66066e..5ef384fe5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,8 +100,11 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") -set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") +endif() set(CMAKE_CXX_EXTENSIONS OFF) +message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") include(CheckIncludeFileCXX) check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) diff --git a/cmake/kaldifst.cmake b/cmake/kaldifst.cmake index 7f9fceef3..3038f3bd8 100644 --- a/cmake/kaldifst.cmake +++ b/cmake/kaldifst.cmake @@ -1,18 +1,18 @@ function(download_kaldifst) include(FetchContent) - set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.6.tar.gz") - set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.6.tar.gz") - set(kaldifst_HASH "SHA256=79280c0bb08b5ed1a2ab7c21320a2b071f1f0eb10d2f047e8d6f027f0d32b4d2") + set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.8.tar.gz") + set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.8.tar.gz") + set(kaldifst_HASH "SHA256=94613923568ef9a240ba1059b8b9dfe3082daad794934635d99e66248a6687b5") # If you don't have access to the Internet, # please pre-download kaldifst set(possible_file_locations - $ENV{HOME}/Downloads/kaldifst-1.7.6.tar.gz - ${PROJECT_SOURCE_DIR}/kaldifst-1.7.6.tar.gz - ${PROJECT_BINARY_DIR}/kaldifst-1.7.6.tar.gz - /tmp/kaldifst-1.7.6.tar.gz - /star-fj/fangjun/download/github/kaldifst-1.7.6.tar.gz + $ENV{HOME}/Downloads/kaldifst-1.7.8.tar.gz + ${PROJECT_SOURCE_DIR}/kaldifst-1.7.8.tar.gz + ${PROJECT_BINARY_DIR}/kaldifst-1.7.8.tar.gz + /tmp/kaldifst-1.7.8.tar.gz + /star-fj/fangjun/download/github/kaldifst-1.7.8.tar.gz ) foreach(f IN LISTS possible_file_locations) diff --git a/cmake/onnxruntime-linux-aarch64.cmake b/cmake/onnxruntime-linux-aarch64.cmake index c41962724..616a35ed5 100644 --- a/cmake/onnxruntime-linux-aarch64.cmake +++ b/cmake/onnxruntime-linux-aarch64.cmake @@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-linux-aarch64-1.16.1.tgz") +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-aarch64-1.16.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-1.16.1.tgz") set(onnxruntime_HASH "SHA256=f10851b62eb44f9e811134737e7c6edd15733d2c1549cb6ce403808e9c047385") diff --git a/cmake/onnxruntime-linux-x86_64-gpu.cmake b/cmake/onnxruntime-linux-x86_64-gpu.cmake index e30ff987d..6c02c9aeb 100644 --- a/cmake/onnxruntime-linux-x86_64-gpu.cmake +++ b/cmake/onnxruntime-linux-x86_64-gpu.cmake @@ -18,7 +18,7 @@ if(NOT SHERPA_ONNX_ENABLE_GPU) message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-linux-x64-gpu-1.16.1.tgz") +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-x64-gpu-1.16.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.16.1.tgz") set(onnxruntime_HASH "SHA256=474d5d74b588d54aa3e167f38acc9b1b8d20c292d0db92299bdc33a81eb4492d") diff --git a/cmake/onnxruntime-linux-x86_64.cmake b/cmake/onnxruntime-linux-x86_64.cmake index 693c9798b..9fc86ac99 100644 --- a/cmake/onnxruntime-linux-x86_64.cmake +++ b/cmake/onnxruntime-linux-x86_64.cmake @@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-linux-x64-1.16.1.tgz") +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-x64-1.16.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-1.16.1.tgz") set(onnxruntime_HASH "SHA256=53a0f03f71587ed602e99e82773132fc634b74c2d227316fbfd4bf67181e72ed") diff --git a/cmake/onnxruntime-osx-arm64.cmake b/cmake/onnxruntime-osx-arm64.cmake index d63e5d0b5..f0a220f26 100644 --- a/cmake/onnxruntime-osx-arm64.cmake +++ b/cmake/onnxruntime-osx-arm64.cmake @@ -12,7 +12,7 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-osx-arm64-1.16.1.tgz") +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-osx-arm64-1.16.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-1.16.1.tgz") set(onnxruntime_HASH "SHA256=56ca6b8de3a220ea606c2067ba65d11dfa6e4f722e01ac7dc75f7152b81445e0") diff --git a/cmake/onnxruntime-osx-universal.cmake b/cmake/onnxruntime-osx-universal.cmake index cd4e18510..678d2bedf 100644 --- a/cmake/onnxruntime-osx-universal.cmake +++ b/cmake/onnxruntime-osx-universal.cmake @@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-osx-universal2-1.16.1.tgz") +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-osx-universal2-1.16.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-1.16.1.tgz") set(onnxruntime_HASH "SHA256=e8568a4a3f602c25ea7c3bbd2f085340dff5bb68fa7c859fd763d944105e3d76") diff --git a/cmake/onnxruntime-osx-x86_64.cmake b/cmake/onnxruntime-osx-x86_64.cmake index 1ad24e534..6f13d149a 100644 --- a/cmake/onnxruntime-osx-x86_64.cmake +++ b/cmake/onnxruntime-osx-x86_64.cmake @@ -12,7 +12,7 @@ if(NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") endif() -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-osx-x86_64-1.16.1.tgz") +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-osx-x86_64-1.16.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-1.16.1.tgz") set(onnxruntime_HASH "SHA256=0b8ae24401a8f75e1c4f75257d4eaeb1b6d44055e027df4aa4a84e67e0f9b9e3") diff --git a/sherpa-onnx/csrc/offline-tts-vits-impl.h b/sherpa-onnx/csrc/offline-tts-vits-impl.h index 1845cf2a7..d93f53400 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-impl.h +++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h @@ -14,30 +14,50 @@ #include "android/asset_manager_jni.h" #endif +#include "kaldifst/csrc/text-normalizer.h" #include "sherpa-onnx/csrc/lexicon.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-tts-impl.h" #include "sherpa-onnx/csrc/offline-tts-vits-model.h" +#include "sherpa-onnx/csrc/text-utils.h" namespace sherpa_onnx { class OfflineTtsVitsImpl : public OfflineTtsImpl { public: explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config) - : model_(std::make_unique(config.model)), + : config_(config), + model_(std::make_unique(config.model)), lexicon_(config.model.vits.lexicon, config.model.vits.tokens, model_->Punctuations(), model_->Language(), config.model.debug, - model_->IsPiper()) {} + model_->IsPiper()) { + if (!config.rule_fsts.empty()) { + std::vector files; + SplitStringToVector(config.rule_fsts, ",", false, &files); + tn_list_.reserve(files.size()); + for (const auto &f : files) { + if (config.model.debug) { + SHERPA_ONNX_LOGE("rule fst: %s", f.c_str()); + } + tn_list_.push_back(std::make_unique(f)); + } + } + } #if __ANDROID_API__ >= 9 OfflineTtsVitsImpl(AAssetManager *mgr, const OfflineTtsConfig &config) - : model_(std::make_unique(mgr, config.model)), + : config_(config), + model_(std::make_unique(mgr, config.model)), lexicon_(mgr, config.model.vits.lexicon, config.model.vits.tokens, model_->Punctuations(), model_->Language(), config.model.debug, - model_->IsPiper()) {} + model_->IsPiper()) { + if (!config.rule_fsts.empty()) { + SHERPA_ONNX_LOGE("TODO(fangjun): Implement rule FST for Android"); + } + } #endif - GeneratedAudio Generate(const std::string &text, int64_t sid = 0, + GeneratedAudio Generate(const std::string &_text, int64_t sid = 0, float speed = 1.0) const override { int32_t num_speakers = model_->NumSpeakers(); if (num_speakers == 0 && sid != 0) { @@ -55,6 +75,20 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { sid = 0; } + std::string text = _text; + if (config_.model.debug) { + SHERPA_ONNX_LOGE("Raw text: %s", text.c_str()); + } + + if (!tn_list_.empty()) { + for (const auto &tn : tn_list_) { + text = tn->Normalize(text); + if (config_.model.debug) { + SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str()); + } + } + } + std::vector x = lexicon_.ConvertTextToTokenIds(text); if (x.empty()) { SHERPA_ONNX_LOGE("Failed to convert %s to token IDs", text.c_str()); @@ -98,7 +132,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { } private: + OfflineTtsConfig config_; std::unique_ptr model_; + std::vector> tn_list_; Lexicon lexicon_; }; diff --git a/sherpa-onnx/csrc/offline-tts.cc b/sherpa-onnx/csrc/offline-tts.cc index b4d19476e..b8536e26e 100644 --- a/sherpa-onnx/csrc/offline-tts.cc +++ b/sherpa-onnx/csrc/offline-tts.cc @@ -6,19 +6,44 @@ #include +#include "sherpa-onnx/csrc/file-utils.h" +#include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-tts-impl.h" +#include "sherpa-onnx/csrc/text-utils.h" namespace sherpa_onnx { -void OfflineTtsConfig::Register(ParseOptions *po) { model.Register(po); } +void OfflineTtsConfig::Register(ParseOptions *po) { + model.Register(po); -bool OfflineTtsConfig::Validate() const { return model.Validate(); } + po->Register("tts-rule-fsts", &rule_fsts, + "It not empty, it contains a list of rule FST filenames." + "Multiple filenames are separated by a comma and they are " + "applied from left to right. An example value: " + "rule1.fst,rule2,fst,rule3.fst"); +} + +bool OfflineTtsConfig::Validate() const { + if (!rule_fsts.empty()) { + std::vector files; + SplitStringToVector(rule_fsts, ",", false, &files); + for (const auto &f : files) { + if (!FileExists(f)) { + SHERPA_ONNX_LOGE("Rule fst %s does not exist. ", f.c_str()); + return false; + } + } + } + + return model.Validate(); +} std::string OfflineTtsConfig::ToString() const { std::ostringstream os; os << "OfflineTtsConfig("; - os << "model=" << model.ToString() << ")"; + os << "model=" << model.ToString() << ", "; + os << "rule_fsts=\"" << rule_fsts << "\")"; return os.str(); } diff --git a/sherpa-onnx/csrc/offline-tts.h b/sherpa-onnx/csrc/offline-tts.h index 0b6427aa5..f581ea04e 100644 --- a/sherpa-onnx/csrc/offline-tts.h +++ b/sherpa-onnx/csrc/offline-tts.h @@ -21,10 +21,17 @@ namespace sherpa_onnx { struct OfflineTtsConfig { OfflineTtsModelConfig model; + // If not empty, it contains a list of rule FST filenames. + // Filenames are separated by a comma. + // Example value: rule1.fst,rule2,fst,rule3.fst + // + // If there are multiple rules, they are applied from left to right. + std::string rule_fsts; OfflineTtsConfig() = default; - explicit OfflineTtsConfig(const OfflineTtsModelConfig &model) - : model(model) {} + OfflineTtsConfig(const OfflineTtsModelConfig &model, + const std::string &rule_fsts) + : model(model), rule_fsts(rule_fsts) {} void Register(ParseOptions *po); bool Validate() const; diff --git a/sherpa-onnx/python/csrc/offline-tts.cc b/sherpa-onnx/python/csrc/offline-tts.cc index 11f91c07d..e58ca3113 100644 --- a/sherpa-onnx/python/csrc/offline-tts.cc +++ b/sherpa-onnx/python/csrc/offline-tts.cc @@ -3,6 +3,8 @@ // Copyright (c) 2023 Xiaomi Corporation #include "sherpa-onnx/python/csrc/offline-tts.h" +#include + #include "sherpa-onnx/csrc/offline-tts.h" #include "sherpa-onnx/python/csrc/offline-tts-model-config.h" @@ -28,8 +30,10 @@ static void PybindOfflineTtsConfig(py::module *m) { using PyClass = OfflineTtsConfig; py::class_(*m, "OfflineTtsConfig") .def(py::init<>()) - .def(py::init(), py::arg("model")) + .def(py::init(), + py::arg("model"), py::arg("rule_fsts") = "") .def_readwrite("model", &PyClass::model) + .def_readwrite("rule_fsts", &PyClass::rule_fsts) .def("__str__", &PyClass::ToString); }