diff --git a/.github/workflows/lazarus.yaml b/.github/workflows/lazarus.yaml index e7cef16ef..f327c99e2 100644 --- a/.github/workflows/lazarus.yaml +++ b/.github/workflows/lazarus.yaml @@ -56,7 +56,7 @@ jobs: key: ${{ matrix.os }} # See https://github.com/gcarreno/setup-lazarus - - uses: gcarreno/setup-lazarus@v3 + - uses: gcarreno/setup-lazarus@v3.3.1 with: lazarus-version: "stable" with-cache: true diff --git a/.github/workflows/test-nodejs-npm.yaml b/.github/workflows/test-nodejs-npm.yaml index cc49ac0c4..e1358fd8d 100644 --- a/.github/workflows/test-nodejs-npm.yaml +++ b/.github/workflows/test-nodejs-npm.yaml @@ -26,7 +26,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-2019] - python-version: ["3.8"] + python-version: ["3.10"] steps: - uses: actions/checkout@v4 diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets index 502c150fd..c6564edc7 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/BuildProfile.ets @@ -2,8 +2,8 @@ * Use these variables when you tailor your ArkTS code. They must be of the const type. */ export const HAR_VERSION = '1.10.32'; -export const BUILD_MODE_NAME = 'release'; -export const DEBUG = false; +export const BUILD_MODE_NAME = 'debug'; +export const DEBUG = true; export const TARGET_NAME = 'default'; /** diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets index 185aa51fd..959b6ba02 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets @@ -38,3 +38,12 @@ export { OnlineRecognizerResult, OnlineRecognizer, } from './src/main/ets/components/StreamingAsr'; + +export { + OfflineTtsVitsModelConfig, + OfflineTtsModelConfig, + OfflineTtsConfig, + OfflineTts, + TtsOutput, + TtsInput, +} from './src/main/ets/components/NonStreamingTts'; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h index 7a6cc93e6..fc70abb01 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/macros.h @@ -8,8 +8,8 @@ #include #if __OHOS__ -#include "rawfile/raw_file_manager.h" #include "hilog/log.h" +#include "rawfile/raw_file_manager.h" #undef LOG_DOMAIN #undef LOG_TAG diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc index c7d9560a2..a34139aa0 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc @@ -236,7 +236,10 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty); #if __OHOS__ - std::unique_ptr mgr (OH_ResourceManager_InitNativeResourceManager(env, info[1]), &OH_ResourceManager_ReleaseNativeResourceManager); + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), + &OH_ResourceManager_ReleaseNativeResourceManager); const SherpaOnnxOfflineRecognizer *recognizer = SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get()); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc index 70d97cddb..da70e662c 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc @@ -63,6 +63,17 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( static Napi::External CreateOfflineTtsWrapper( const Napi::CallbackInfo &info) { Napi::Env env = info.Env(); +#if __OHOS__ + // the last argument is the NativeResourceManager + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } +#else if (info.Length() != 1) { std::ostringstream os; os << "Expect only 1 argument. Given: " << info.Length(); @@ -71,6 +82,7 @@ static Napi::External CreateOfflineTtsWrapper( return {}; } +#endif if (!info[0].IsObject()) { Napi::TypeError::New(env, "Expect an object as the argument") @@ -90,7 +102,15 @@ static Napi::External CreateOfflineTtsWrapper( SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences); SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); +#if __OHOS__ + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), + &OH_ResourceManager_ReleaseNativeResourceManager); + SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTtsOHOS(&c, mgr.get()); +#else SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c); +#endif if (c.model.vits.model) { delete[] c.model.vits.model; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc index ffe562d79..59ad5ce52 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc @@ -211,7 +211,10 @@ static Napi::External CreateOnlineRecognizerWrapper( c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o); #if __OHOS__ - std::unique_ptr mgr (OH_ResourceManager_InitNativeResourceManager(env, info[1]), &OH_ResourceManager_ReleaseNativeResourceManager); + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), + &OH_ResourceManager_ReleaseNativeResourceManager); const SherpaOnnxOnlineRecognizer *recognizer = SherpaOnnxCreateOnlineRecognizerOHOS(&c, mgr.get()); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts index 10ff7745c..f44ade356 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts @@ -33,3 +33,8 @@ export const decodeOnlineStream: (handle: object, streamHandle: object) => void; export const isEndpoint: (handle: object, streamHandle: object) => boolean; export const reset: (handle: object, streamHandle: object) => void; export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) => string; + +export const createOfflineTts: (config: object, mgr?: object) => object; +export const getOfflineTtsNumSpeakers: (handle: object) => number; +export const getOfflineTtsSampleRate: (handle: object) => number; +export const offlineTtsGenerate: (handle: object, input: object) => object; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc index b505c53d2..b1defcac0 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc @@ -70,8 +70,10 @@ static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) { #if __OHOS__ // Note(fangjun): Normally, we don't need to divied it by sizeof(float). - // However, data.ElementLength() here returns number of bytes, not number of elements. - SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength() / sizeof(float)); + // However, data.ElementLength() here returns number of bytes, not number of + // elements. + SherpaOnnxCircularBufferPush(buf, data.Data(), + data.ElementLength() / sizeof(float)); #else SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength()); #endif @@ -353,10 +355,14 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) { float buffer_size_in_seconds = info[1].As().FloatValue(); #if __OHOS__ - std::unique_ptr mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]), &OH_ResourceManager_ReleaseNativeResourceManager); + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]), + &OH_ResourceManager_ReleaseNativeResourceManager); SherpaOnnxVoiceActivityDetector *vad = - SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds, mgr.get()); + SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds, + mgr.get()); #else SherpaOnnxVoiceActivityDetector *vad = SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds); @@ -410,9 +416,10 @@ static void VoiceActivityDetectorAcceptWaveformWrapper( Napi::Float32Array samples = info[1].As(); #if __OHOS__ - // Note(fangjun): For unknown reasons, we need to use `/sizeof(float)` here for Huawei - SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(), - samples.ElementLength() / sizeof(float)); + // Note(fangjun): For unknown reasons, we need to use `/sizeof(float)` here + // for Huawei + SherpaOnnxVoiceActivityDetectorAcceptWaveform( + vad, samples.Data(), samples.ElementLength() / sizeof(float)); #else SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(), samples.ElementLength()); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc index 2973c6169..23b3a7242 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-reader.cc @@ -102,10 +102,11 @@ static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) { return {}; } - + Napi::Uint8Array data = info[0].As(); int32_t n = data.ElementLength(); - const SherpaOnnxWave *wave = SherpaOnnxReadWaveFromBinaryData(reinterpret_cast(data.Data()), n); + const SherpaOnnxWave *wave = SherpaOnnxReadWaveFromBinaryData( + reinterpret_cast(data.Data()), n); if (!wave) { std::ostringstream os; os << "Failed to read wave"; @@ -113,7 +114,7 @@ static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) { return {}; } - + bool enable_external_buffer = true; if (info.Length() == 2) { if (info[1].IsBoolean()) { @@ -165,7 +166,7 @@ static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) { void InitWaveReader(Napi::Env env, Napi::Object exports) { exports.Set(Napi::String::New(env, "readWave"), Napi::Function::New(env, ReadWaveWrapper)); - + exports.Set(Napi::String::New(env, "readWaveFromBinary"), - Napi::Function::New(env, ReadWaveFromBinaryWrapper)); -} \ No newline at end of file + Napi::Function::New(env, ReadWaveFromBinaryWrapper)); +} diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets index 0cc8466a9..d3f849cca 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingAsr.ets @@ -79,7 +79,7 @@ export class OfflineModelConfig { public tokens: string = ''; public numThreads: number = 1; public debug: boolean = false; - public provider: string = "cpu"; + public provider: string = 'cpu'; public modelType: string = ''; public modelingUnit: string = "cjkchar"; public bpeVocab: string = ''; diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets new file mode 100644 index 000000000..c568b9990 --- /dev/null +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets @@ -0,0 +1,66 @@ +import { + createOfflineTts, + getOfflineTtsNumSpeakers, + getOfflineTtsSampleRate, + offlineTtsGenerate, +} from "libsherpa_onnx.so"; + +export class OfflineTtsVitsModelConfig { + public model: string = ''; + public lexicon: string = ''; + public tokens: string = ''; + public dataDir: string = ''; + public dictDir: String = ''; + public noiseScale: number = 0.667; + public noiseScaleW: number = 0.8; + public lengthScale: number = 1.0; +} + +export class OfflineTtsModelConfig{ + public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig(); + public numThreads: number = 1; + public debug: boolean = false; + public provider: string = 'cpu'; +} + +export class OfflineTtsConfig{ + public model: OfflineTtsModelConfig = new OfflineTtsModelConfig(); + public ruleFsts: string = ''; + public ruleFars: string = ''; + public maxNumSentences: number = 1; +} + +export class TtsOutput { + public samples: Float32Array = new Float32Array(0); + public sampleRate: number = 0; +} + +export class TtsInput { + public text: string = ''; + public sid: number = 0; + public speed: number = 1.0; +} + +export class OfflineTts { + private handle: object; + public config: OfflineTtsConfig; + public numSpeakers: number; + public sampleRate: number; + constructor(config: OfflineTtsConfig, mgr?: object) { + this.handle = createOfflineTts(config, mgr); + this.config = config; + + this.numSpeakers = getOfflineTtsNumSpeakers(this.handle); + this.sampleRate = getOfflineTtsSampleRate(this.handle); + } + + /* + input obj: {text: "xxxx", sid: 0, speed: 1.0} + where text is a string, sid is a int32, speed is a float + + return an object {samples: Float32Array, sampleRate: } + */ + generate(input: TtsInput): TtsOutput { + return offlineTtsGenerate(this.handle, input) as TtsOutput; + } +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets index 7ecc552ca..3b2985771 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets @@ -52,7 +52,7 @@ export class OnlineModelConfig { public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig(); public tokens: string = ''; public numThreads: number = 1; - public provider: string = "cpu"; + public provider: string = 'cpu'; public debug: boolean = false; public modelType: string = ''; public modelingUnit: string = "cjkchar"; @@ -67,7 +67,7 @@ export class OnlineCtcFstDecoderConfig { export class OnlineRecognizerConfig { public featConfig: FeatureConfig = new FeatureConfig(); public modelConfig: OnlineModelConfig = new OnlineModelConfig(); - public decodingMethod: string = "greedy_search"; + public decodingMethod: string = 'greedy_search'; public maxActivePaths: number = 4; public enableEndpoint: boolean = false; public rule1MinTrailingSilence: number = 2.4; diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 b/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 index e8c24aeba..660c2bb47 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 @@ -54,11 +54,11 @@ "reason": "$string:mic_reason", "usedScene": { "abilities": [ - "FormAbility", + "EntryAbility", ], - "when": "always", + "when": "inuse", } } ] } -} \ No newline at end of file +} diff --git a/scripts/check_style_cpplint.sh b/scripts/check_style_cpplint.sh index ea419242a..dcadcd991 100755 --- a/scripts/check_style_cpplint.sh +++ b/scripts/check_style_cpplint.sh @@ -103,6 +103,7 @@ function do_check() { 2) echo "Check all files" files=$(find $sherpa_onnx_dir/cxx-api-examples $sherpa_onnx_dir/c-api-examples $sherpa_onnx_dir/sherpa-onnx/csrc $sherpa_onnx_dir/sherpa-onnx/python $sherpa_onnx_dir/scripts/node-addon-api/src $sherpa_onnx_dir/sherpa-onnx/jni $sherpa_onnx_dir/sherpa-onnx/c-api -name "*.h" -o -name "*.cc") + files2=$(find $sherpa_onnx_dir/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/ -name "*.cc") ;; *) echo "Check last commit" @@ -110,7 +111,7 @@ function do_check() { ;; esac - for f in $files; do + for f in $files $files2; do need_check=$(is_source_code_file $f) if $need_check; then [[ -f $f ]] && check_style $f diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index a6ad0772e..166430da4 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -485,9 +485,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( if (config->model_config.debug) { #if __OHOS__ - SHERPA_ONNX_LOGE("%{public}s", recognizer_config.ToString().c_str()); + SHERPA_ONNX_LOGE("%{public}s\n", recognizer_config.ToString().c_str()); #else - SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); + SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); #endif } @@ -967,9 +967,9 @@ sherpa_onnx::VadModelConfig GetVadModelConfig( if (vad_config.debug) { #if __OHOS__ - SHERPA_ONNX_LOGE("%{public}s", vad_config.ToString().c_str()); + SHERPA_ONNX_LOGE("%{public}s\n", vad_config.ToString().c_str()); #else - SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str()); + SHERPA_ONNX_LOGE("%s\n", vad_config.ToString().c_str()); #endif } @@ -1053,7 +1053,7 @@ struct SherpaOnnxOfflineTts { std::unique_ptr impl; }; -SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( +static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig( const SherpaOnnxOfflineTtsConfig *config) { sherpa_onnx::OfflineTtsConfig tts_config; @@ -1084,9 +1084,20 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); if (tts_config.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s\n", tts_config.ToString().c_str()); +#else SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str()); +#endif } + return tts_config; +} + +SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( + const SherpaOnnxOfflineTtsConfig *config) { + auto tts_config = GetOfflineTtsConfig(config); + if (!tts_config.Validate()) { SHERPA_ONNX_LOGE("Errors in config"); return nullptr; @@ -1908,6 +1919,7 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( return ans; } +#endif #ifdef __OHOS__ @@ -1959,6 +1971,23 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS( return p; } -#endif -#endif +#if SHERPA_ONNX_ENABLE_TTS == 1 +SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( + const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) { + if (!mgr) { + return SherpaOnnxCreateOfflineTts(config); + } + + auto tts_config = GetOfflineTtsConfig(config); + + SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; + + tts->impl = std::make_unique(mgr, tts_config); + + return tts; +} + +#endif // #if SHERPA_ONNX_ENABLE_TTS == 1 + +#endif // #ifdef __OHOS__ diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 5251413a8..e9cd5be0a 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -1558,6 +1558,9 @@ SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector * SherpaOnnxCreateVoiceActivityDetectorOHOS( const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds, NativeResourceManager *mgr); + +SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( + const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr); #endif #if defined(__GNUC__) diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc index 499f17f68..fe5e595b9 100644 --- a/sherpa-onnx/csrc/lexicon.cc +++ b/sherpa-onnx/csrc/lexicon.cc @@ -7,17 +7,19 @@ #include #include #include +#include #include +#include #include #if __ANDROID_API__ >= 9 -#include - #include "android/asset_manager.h" #include "android/asset_manager_jni.h" #endif -#include +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/onnx-utils.h" @@ -110,8 +112,8 @@ Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens, InitPunctuations(punctuations); } -#if __ANDROID_API__ >= 9 -Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon, +template +Lexicon::Lexicon(Manager *mgr, const std::string &lexicon, const std::string &tokens, const std::string &punctuations, const std::string &language, bool debug /*= false*/ ) @@ -132,7 +134,6 @@ Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon, InitPunctuations(punctuations); } -#endif std::vector Lexicon::ConvertTextToTokenIds( const std::string &text, const std::string & /*voice*/ /*= ""*/) const { @@ -371,4 +372,18 @@ void Lexicon::InitPunctuations(const std::string &punctuations) { } } +#if __ANDROID_API__ >= 9 +template Lexicon::Lexicon(AAssetManager *mgr, const std::string &lexicon, + const std::string &tokens, + const std::string &punctuations, + const std::string &language, bool debug = false); +#endif + +#if __OHOS__ +template Lexicon::Lexicon(NativeResourceManager *mgr, + const std::string &lexicon, const std::string &tokens, + const std::string &punctuations, + const std::string &language, bool debug = false); +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/lexicon.h b/sherpa-onnx/csrc/lexicon.h index fb60cdb7f..39329694b 100644 --- a/sherpa-onnx/csrc/lexicon.h +++ b/sherpa-onnx/csrc/lexicon.h @@ -13,11 +13,6 @@ #include #include -#if __ANDROID_API__ >= 9 -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "sherpa-onnx/csrc/offline-tts-frontend.h" namespace sherpa_onnx { @@ -31,11 +26,10 @@ class Lexicon : public OfflineTtsFrontend { const std::string &punctuations, const std::string &language, bool debug = false); -#if __ANDROID_API__ >= 9 - Lexicon(AAssetManager *mgr, const std::string &lexicon, - const std::string &tokens, const std::string &punctuations, - const std::string &language, bool debug = false); -#endif + template + Lexicon(Manager *mgr, const std::string &lexicon, const std::string &tokens, + const std::string &punctuations, const std::string &language, + bool debug = false); std::vector ConvertTextToTokenIds( const std::string &text, const std::string &voice = "") const override; diff --git a/sherpa-onnx/csrc/offline-ctc-model.cc b/sherpa-onnx/csrc/offline-ctc-model.cc index daff5654a..6ca5f0054 100644 --- a/sherpa-onnx/csrc/offline-ctc-model.cc +++ b/sherpa-onnx/csrc/offline-ctc-model.cc @@ -136,7 +136,6 @@ std::unique_ptr OfflineCtcModel::Create( switch (model_type) { case ModelType::kEncDecCTCModelBPE: - return std::make_unique(config); case ModelType::kEncDecCTCModel: return std::make_unique(config); case ModelType::kEncDecHybridRNNTCTCBPEModel: @@ -187,7 +186,6 @@ std::unique_ptr OfflineCtcModel::Create( switch (model_type) { case ModelType::kEncDecCTCModelBPE: - return std::make_unique(mgr, config); case ModelType::kEncDecCTCModel: return std::make_unique(mgr, config); case ModelType::kEncDecHybridRNNTCTCBPEModel: diff --git a/sherpa-onnx/csrc/offline-tts-character-frontend.cc b/sherpa-onnx/csrc/offline-tts-character-frontend.cc index 72481e094..0806a9fa2 100644 --- a/sherpa-onnx/csrc/offline-tts-character-frontend.cc +++ b/sherpa-onnx/csrc/offline-tts-character-frontend.cc @@ -2,20 +2,24 @@ // // Copyright (c) 2023 Xiaomi Corporation -#if __ANDROID_API__ >= 9 -#include - -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif #include #include #include #include #include #include +#include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-tts-character-frontend.h" #include "sherpa-onnx/csrc/onnx-utils.h" @@ -82,9 +86,9 @@ OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend( token2id_ = ReadTokens(is); } -#if __ANDROID_API__ >= 9 +template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend( - AAssetManager *mgr, const std::string &tokens, + Manager *mgr, const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data) : meta_data_(meta_data) { auto buf = ReadFile(mgr, tokens); @@ -92,8 +96,6 @@ OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend( token2id_ = ReadTokens(is); } -#endif - std::vector OfflineTtsCharacterFrontend::ConvertTextToTokenIds( const std::string &_text, const std::string & /*voice = ""*/) const { // see @@ -189,4 +191,18 @@ std::vector OfflineTtsCharacterFrontend::ConvertTextToTokenIds( return ans; } +#if __ANDROID_API__ >= 9 +template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend( + AAssetManager *mgr, const std::string &tokens, + const OfflineTtsVitsModelMetaData &meta_data); + +#endif + +#if __OHOS__ +template OfflineTtsCharacterFrontend::OfflineTtsCharacterFrontend( + NativeResourceManager *mgr, const std::string &tokens, + const OfflineTtsVitsModelMetaData &meta_data); + +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-tts-character-frontend.h b/sherpa-onnx/csrc/offline-tts-character-frontend.h index ffd2bb5f4..fcd2f6dd5 100644 --- a/sherpa-onnx/csrc/offline-tts-character-frontend.h +++ b/sherpa-onnx/csrc/offline-tts-character-frontend.h @@ -9,11 +9,6 @@ #include #include -#if __ANDROID_API__ >= 9 -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "sherpa-onnx/csrc/offline-tts-frontend.h" #include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h" @@ -24,11 +19,10 @@ class OfflineTtsCharacterFrontend : public OfflineTtsFrontend { OfflineTtsCharacterFrontend(const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data); -#if __ANDROID_API__ >= 9 - OfflineTtsCharacterFrontend(AAssetManager *mgr, const std::string &tokens, + template + OfflineTtsCharacterFrontend(Manager *mgr, const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data); -#endif /** Convert a string to token IDs. * * @param text The input text. diff --git a/sherpa-onnx/csrc/offline-tts-impl.cc b/sherpa-onnx/csrc/offline-tts-impl.cc index 063730db8..62b6eebba 100644 --- a/sherpa-onnx/csrc/offline-tts-impl.cc +++ b/sherpa-onnx/csrc/offline-tts-impl.cc @@ -6,6 +6,15 @@ #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + #include "sherpa-onnx/csrc/offline-tts-vits-impl.h" namespace sherpa_onnx { @@ -16,12 +25,21 @@ std::unique_ptr OfflineTtsImpl::Create( return std::make_unique(config); } -#if __ANDROID_API__ >= 9 +template std::unique_ptr OfflineTtsImpl::Create( - AAssetManager *mgr, const OfflineTtsConfig &config) { + Manager *mgr, const OfflineTtsConfig &config) { // TODO(fangjun): Support other types return std::make_unique(mgr, config); } + +#if __ANDROID_API__ >= 9 +template std::unique_ptr OfflineTtsImpl::Create( + AAssetManager *mgr, const OfflineTtsConfig &config); +#endif + +#if __OHOS__ +template std::unique_ptr OfflineTtsImpl::Create( + NativeResourceManager *mgr, const OfflineTtsConfig &config); #endif } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-tts-impl.h b/sherpa-onnx/csrc/offline-tts-impl.h index 3c9e27b1b..db8b7162d 100644 --- a/sherpa-onnx/csrc/offline-tts-impl.h +++ b/sherpa-onnx/csrc/offline-tts-impl.h @@ -8,11 +8,6 @@ #include #include -#if __ANDROID_API__ >= 9 -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "sherpa-onnx/csrc/offline-tts.h" namespace sherpa_onnx { @@ -23,10 +18,9 @@ class OfflineTtsImpl { static std::unique_ptr Create(const OfflineTtsConfig &config); -#if __ANDROID_API__ >= 9 - static std::unique_ptr Create(AAssetManager *mgr, + template + static std::unique_ptr Create(Manager *mgr, const OfflineTtsConfig &config); -#endif virtual GeneratedAudio Generate( const std::string &text, int64_t sid = 0, float speed = 1.0, diff --git a/sherpa-onnx/csrc/offline-tts-vits-impl.h b/sherpa-onnx/csrc/offline-tts-vits-impl.h index 72b21a3b5..972303dd4 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-impl.h +++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h @@ -6,16 +6,10 @@ #include #include +#include #include #include -#if __ANDROID_API__ >= 9 -#include - -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "fst/extensions/far/far.h" #include "kaldifst/csrc/kaldi-fst-io.h" #include "kaldifst/csrc/text-normalizer.h" @@ -82,8 +76,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { } } -#if __ANDROID_API__ >= 9 - OfflineTtsVitsImpl(AAssetManager *mgr, const OfflineTtsConfig &config) + template + OfflineTtsVitsImpl(Manager *mgr, const OfflineTtsConfig &config) : config_(config), model_(std::make_unique(mgr, config.model)) { InitFrontend(mgr); @@ -130,7 +124,6 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { } // for (const auto &f : files) } // if (!config.rule_fars.empty()) } -#endif int32_t SampleRate() const override { return model_->GetMetaData().sample_rate; @@ -297,8 +290,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { } private: -#if __ANDROID_API__ >= 9 - void InitFrontend(AAssetManager *mgr) { + template + void InitFrontend(Manager *mgr) { const auto &meta_data = model_->GetMetaData(); if (meta_data.frontend == "characters") { @@ -323,7 +316,6 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { meta_data.punctuations, meta_data.language, config_.model.debug); } } -#endif void InitFrontend() { const auto &meta_data = model_->GetMetaData(); diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.cc b/sherpa-onnx/csrc/offline-tts-vits-model.cc index 90e0993d6..38efc6204 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-model.cc +++ b/sherpa-onnx/csrc/offline-tts-vits-model.cc @@ -9,6 +9,15 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/session.h" @@ -26,8 +35,8 @@ class OfflineTtsVitsModel::Impl { Init(buf.data(), buf.size()); } -#if __ANDROID_API__ >= 9 - Impl(AAssetManager *mgr, const OfflineTtsModelConfig &config) + template + Impl(Manager *mgr, const OfflineTtsModelConfig &config) : config_(config), env_(ORT_LOGGING_LEVEL_ERROR), sess_opts_(GetSessionOptions(config)), @@ -35,7 +44,6 @@ class OfflineTtsVitsModel::Impl { auto buf = ReadFile(mgr, config.vits.model); Init(buf.data(), buf.size()); } -#endif Ort::Value Run(Ort::Value x, int64_t sid, float speed) { if (meta_data_.is_piper || meta_data_.is_coqui) { @@ -336,11 +344,10 @@ class OfflineTtsVitsModel::Impl { OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config) : impl_(std::make_unique(config)) {} -#if __ANDROID_API__ >= 9 -OfflineTtsVitsModel::OfflineTtsVitsModel(AAssetManager *mgr, +template +OfflineTtsVitsModel::OfflineTtsVitsModel(Manager *mgr, const OfflineTtsModelConfig &config) : impl_(std::make_unique(mgr, config)) {} -#endif OfflineTtsVitsModel::~OfflineTtsVitsModel() = default; @@ -359,4 +366,14 @@ const OfflineTtsVitsModelMetaData &OfflineTtsVitsModel::GetMetaData() const { return impl_->GetMetaData(); } +#if __ANDROID_API__ >= 9 +template OfflineTtsVitsModel::OfflineTtsVitsModel( + AAssetManager *mgr, const OfflineTtsModelConfig &config); +#endif + +#if __OHOS__ +template OfflineTtsVitsModel::OfflineTtsVitsModel( + NativeResourceManager *mgr, const OfflineTtsModelConfig &config); +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-tts-vits-model.h b/sherpa-onnx/csrc/offline-tts-vits-model.h index 543963c9d..a880934ef 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-model.h +++ b/sherpa-onnx/csrc/offline-tts-vits-model.h @@ -8,11 +8,6 @@ #include #include -#if __ANDROID_API__ >= 9 -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "onnxruntime_cxx_api.h" // NOLINT #include "sherpa-onnx/csrc/offline-tts-model-config.h" #include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h" @@ -24,9 +19,9 @@ class OfflineTtsVitsModel { ~OfflineTtsVitsModel(); explicit OfflineTtsVitsModel(const OfflineTtsModelConfig &config); -#if __ANDROID_API__ >= 9 - OfflineTtsVitsModel(AAssetManager *mgr, const OfflineTtsModelConfig &config); -#endif + + template + OfflineTtsVitsModel(Manager *mgr, const OfflineTtsModelConfig &config); /** Run the model. * diff --git a/sherpa-onnx/csrc/offline-tts.cc b/sherpa-onnx/csrc/offline-tts.cc index 12feda0b7..ec2c69523 100644 --- a/sherpa-onnx/csrc/offline-tts.cc +++ b/sherpa-onnx/csrc/offline-tts.cc @@ -7,6 +7,15 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + #include "sherpa-onnx/csrc/file-utils.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-tts-impl.h" @@ -78,10 +87,9 @@ std::string OfflineTtsConfig::ToString() const { OfflineTts::OfflineTts(const OfflineTtsConfig &config) : impl_(OfflineTtsImpl::Create(config)) {} -#if __ANDROID_API__ >= 9 -OfflineTts::OfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config) +template +OfflineTts::OfflineTts(Manager *mgr, const OfflineTtsConfig &config) : impl_(OfflineTtsImpl::Create(mgr, config)) {} -#endif OfflineTts::~OfflineTts() = default; @@ -95,4 +103,14 @@ int32_t OfflineTts::SampleRate() const { return impl_->SampleRate(); } int32_t OfflineTts::NumSpeakers() const { return impl_->NumSpeakers(); } +#if __ANDROID_API__ >= 9 +template OfflineTts::OfflineTts(AAssetManager *mgr, + const OfflineTtsConfig &config); +#endif + +#if __OHOS__ +template OfflineTts::OfflineTts(NativeResourceManager *mgr, + const OfflineTtsConfig &config); +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-tts.h b/sherpa-onnx/csrc/offline-tts.h index 03a13a159..8399443c0 100644 --- a/sherpa-onnx/csrc/offline-tts.h +++ b/sherpa-onnx/csrc/offline-tts.h @@ -10,11 +10,6 @@ #include #include -#if __ANDROID_API__ >= 9 -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "sherpa-onnx/csrc/offline-tts-model-config.h" #include "sherpa-onnx/csrc/parse-options.h" @@ -69,9 +64,8 @@ class OfflineTts { ~OfflineTts(); explicit OfflineTts(const OfflineTtsConfig &config); -#if __ANDROID_API__ >= 9 - OfflineTts(AAssetManager *mgr, const OfflineTtsConfig &config); -#endif + template + OfflineTts(Manager *mgr, const OfflineTtsConfig &config); // @param text A string containing words separated by spaces // @param sid Speaker ID. Used only for multi-speaker models, e.g., models diff --git a/sherpa-onnx/csrc/piper-phonemize-lexicon.cc b/sherpa-onnx/csrc/piper-phonemize-lexicon.cc index de753db61..298274654 100644 --- a/sherpa-onnx/csrc/piper-phonemize-lexicon.cc +++ b/sherpa-onnx/csrc/piper-phonemize-lexicon.cc @@ -11,16 +11,19 @@ #include // NOLINT #include #include +#include #include #include #if __ANDROID_API__ >= 9 -#include - #include "android/asset_manager.h" #include "android/asset_manager_jni.h" #endif +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + #include "espeak-ng/speak_lib.h" #include "phoneme_ids.hpp" #include "phonemize.hpp" @@ -196,9 +199,9 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon( InitEspeak(data_dir); } -#if __ANDROID_API__ >= 9 +template PiperPhonemizeLexicon::PiperPhonemizeLexicon( - AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, + Manager *mgr, const std::string &tokens, const std::string &data_dir, const OfflineTtsVitsModelMetaData &meta_data) : meta_data_(meta_data) { { @@ -212,7 +215,6 @@ PiperPhonemizeLexicon::PiperPhonemizeLexicon( // data_dir. InitEspeak(data_dir); } -#endif std::vector PiperPhonemizeLexicon::ConvertTextToTokenIds( const std::string &text, const std::string &voice /*= ""*/) const { @@ -255,4 +257,16 @@ std::vector PiperPhonemizeLexicon::ConvertTextToTokenIds( return ans; } +#if __ANDROID_API__ >= 9 +template PiperPhonemizeLexicon::PiperPhonemizeLexicon( + AAssetManager *mgr, const std::string &tokens, const std::string &data_dir, + const OfflineTtsVitsModelMetaData &meta_data); +#endif + +#if __OHOS__ +template PiperPhonemizeLexicon::PiperPhonemizeLexicon( + NativeResourceManager *mgr, const std::string &tokens, + const std::string &data_dir, const OfflineTtsVitsModelMetaData &meta_data); +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/piper-phonemize-lexicon.h b/sherpa-onnx/csrc/piper-phonemize-lexicon.h index 34922de29..ccd790a96 100644 --- a/sherpa-onnx/csrc/piper-phonemize-lexicon.h +++ b/sherpa-onnx/csrc/piper-phonemize-lexicon.h @@ -9,11 +9,6 @@ #include #include -#if __ANDROID_API__ >= 9 -#include "android/asset_manager.h" -#include "android/asset_manager_jni.h" -#endif - #include "sherpa-onnx/csrc/offline-tts-frontend.h" #include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h" @@ -24,11 +19,10 @@ class PiperPhonemizeLexicon : public OfflineTtsFrontend { PiperPhonemizeLexicon(const std::string &tokens, const std::string &data_dir, const OfflineTtsVitsModelMetaData &meta_data); -#if __ANDROID_API__ >= 9 - PiperPhonemizeLexicon(AAssetManager *mgr, const std::string &tokens, + template + PiperPhonemizeLexicon(Manager *mgr, const std::string &tokens, const std::string &data_dir, const OfflineTtsVitsModelMetaData &meta_data); -#endif std::vector ConvertTextToTokenIds( const std::string &text, const std::string &voice = "") const override; diff --git a/sherpa-onnx/csrc/silero-vad-model.cc b/sherpa-onnx/csrc/silero-vad-model.cc index 80f0cbd65..1b281e5db 100644 --- a/sherpa-onnx/csrc/silero-vad-model.cc +++ b/sherpa-onnx/csrc/silero-vad-model.cc @@ -51,11 +51,11 @@ class SileroVadModel::Impl { : config_(config), env_(ORT_LOGGING_LEVEL_ERROR), sess_opts_(GetSessionOptions(config)), - allocator_{} { + allocator_{}, + sample_rate_(config.sample_rate) { auto buf = ReadFile(mgr, config.silero_vad.model); Init(buf.data(), buf.size()); - sample_rate_ = config.sample_rate; if (sample_rate_ != 16000) { SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d", config.sample_rate);