From 4022167427872c260be1ba708f44add3938ff278 Mon Sep 17 00:00:00 2001 From: sevenc-nanashi Date: Sun, 17 Mar 2024 02:08:00 +0900 Subject: [PATCH] =?UTF-8?q?Add:=20tts=E3=81=BE=E3=81=A7=E3=81=A7=E3=81=8D?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/infer/runtimes.rs | 6 +- .../src/infer/runtimes/onnxruntime_wasm.rs | 169 ++++++++++++++++-- crates/voicevox_core/src/infer/status.rs | 10 ++ crates/voicevox_core/wasm_library.js | 156 ++++++++++++++++ crates/voicevox_core_c_api/build.rs | 16 +- crates/voicevox_core_c_api/wasm_library.js | 7 - example/wasm/package.json | 1 + example/wasm/pnpm-lock.yaml | 105 ++++++++++- .../src/artifacts/voicevox_core_wasm_api.d.ts | 32 +++- example/wasm/src/components/HelloWorld.vue | 22 ++- example/wasm/src/style.css | 4 + example/wasm/src/voicevoxCore.ts | 103 +++++++++-- 12 files changed, 567 insertions(+), 64 deletions(-) create mode 100644 crates/voicevox_core/wasm_library.js delete mode 100644 crates/voicevox_core_c_api/wasm_library.js diff --git a/crates/voicevox_core/src/infer/runtimes.rs b/crates/voicevox_core/src/infer/runtimes.rs index 9d5d59087..33b62bba4 100644 --- a/crates/voicevox_core/src/infer/runtimes.rs +++ b/crates/voicevox_core/src/infer/runtimes.rs @@ -1,6 +1,6 @@ -// #[cfg(not(target_family = "wasm"))] -// mod onnxruntime; -// #[cfg(target_family = "wasm")] +#[cfg(not(target_family = "wasm"))] +mod onnxruntime; +#[cfg(target_family = "wasm")] #[path = "runtimes/onnxruntime_wasm.rs"] mod onnxruntime; diff --git a/crates/voicevox_core/src/infer/runtimes/onnxruntime_wasm.rs b/crates/voicevox_core/src/infer/runtimes/onnxruntime_wasm.rs index 831995f82..2e1c6e107 100644 --- a/crates/voicevox_core/src/infer/runtimes/onnxruntime_wasm.rs +++ b/crates/voicevox_core/src/infer/runtimes/onnxruntime_wasm.rs @@ -1,12 +1,15 @@ -use std::any::Any; -use std::mem::ManuallyDrop; -use std::sync::Arc; +#![allow(unsafe_code)] +use std::collections::HashMap; +use std::ffi::{CStr, CString}; +use std::sync::Mutex; use std::{fmt::Debug, vec}; use anyhow::anyhow; use duplicate::duplicate_item; use ndarray::{Array, Dimension}; use once_cell::sync::Lazy; +use serde::{Deserialize, Serialize}; +use tracing::info; use crate::devices::SupportedDevices; @@ -15,6 +18,34 @@ use super::super::{ OutputScalarKind, OutputTensor, ParamInfo, PushInputTensor, }; +static RESULTS: Lazy>> = Lazy::new(|| Mutex::new(HashMap::new())); + +#[derive(Debug, Deserialize)] +struct SessionNewResult { + handle: String, +} +#[derive(Debug, Deserialize)] +#[serde(tag = "type", content = "payload", rename_all = "camelCase")] +enum JsResult { + Ok(T), + Err(String), +} + +extern "C" { + fn onnxruntime_inference_session_new( + model: *const u8, + model_len: usize, + use_gpu: bool, + callback: extern "C" fn(*const u8, *const u8) -> (), + ) -> *const u8; + fn onnxruntime_inference_session_run( + handle: *const u8, + inputs: *const u8, + callback: extern "C" fn(*const u8, *const u8) -> (), + ) -> *const u8; + fn emscripten_sleep(millis: i32); +} + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] pub(crate) enum Onnxruntime {} @@ -25,7 +56,7 @@ impl InferenceRuntime for Onnxruntime { fn supported_devices() -> crate::Result { Ok(SupportedDevices { cpu: true, - cuda: false, + cuda: true, dml: false, }) } @@ -38,39 +69,145 @@ impl InferenceRuntime for Onnxruntime { Vec>, Vec>, )> { - todo!() + unsafe { + info!("creating new session"); + let model = model()?; + let model_len = model.len(); + let cpu_num_threads = options.cpu_num_threads as usize; + let use_gpu = options.use_gpu; + let nonce = + onnxruntime_inference_session_new(model.as_ptr(), model_len, use_gpu, js_callback); + + let nonce = CStr::from_ptr(nonce as *const i8) + .to_str() + .map_err(|err| anyhow!(err))? + .to_string(); + info!("nonce: {}", nonce); + + let result = loop { + let result = RESULTS.lock().expect("mutex poisoned").remove(&nonce); + if let Some(result) = result { + break result; + } + emscripten_sleep(10); + }; + + let result: JsResult = serde_json::from_str(&result)?; + let result = match result { + JsResult::Ok(result) => result, + JsResult::Err(err) => return Err(anyhow!(err)), + }; + + let handle = result.handle; + let session = OnnxruntimeSession { handle }; + Ok((session, vec![], vec![])) + } } fn run(ctx: OnnxruntimeRunContext<'_>) -> anyhow::Result> { - todo!() + unsafe { + let handle_cstr = CString::new(ctx.session.handle.clone())?; + let inputs = serde_json::to_string(&ctx.inputs)?; + let inputs_cstr = CString::new(inputs)?; + let nonce = onnxruntime_inference_session_run( + handle_cstr.into_raw() as _, + inputs_cstr.into_raw() as _, + js_callback, + ); + let nonce = CStr::from_ptr(nonce as *const i8) + .to_str() + .map_err(|err| anyhow!(err))? + .to_string(); + + let result = loop { + let result = RESULTS.lock().expect("mutex poisoned").remove(&nonce); + if let Some(result) = result { + break result; + } + emscripten_sleep(10); + }; + let result: JsResult> = serde_json::from_str(&result)?; + let result = match result { + JsResult::Ok(result) => result, + JsResult::Err(err) => return Err(anyhow!(err)), + }; + + Ok(result + .into_iter() + .map(|tensor| { + let shape = tensor.shape; + match tensor.data { + TensorData::Int64(data) => { + unimplemented!() + } + TensorData::Float32(data) => { + OutputTensor::Float32(Array::from_shape_vec(shape, data).unwrap()) + } + } + }) + .collect()) + } } } -pub(crate) struct OnnxruntimeSession {} +extern "C" fn js_callback(nonce: *const u8, result: *const u8) { + let nonce = unsafe { CStr::from_ptr(nonce as *const i8) } + .to_str() + .expect("invalid handle") + .to_string(); + let result = unsafe { CStr::from_ptr(result as *const i8) } + .to_str() + .expect("invalid result") + .to_string(); + info!("callback called with nonce: {}", nonce); + RESULTS + .lock() + .expect("mutex poisoned") + .insert(nonce, result); +} + +pub(crate) struct OnnxruntimeSession { + handle: String, +} -impl Drop for OnnxruntimeSession { - fn drop(&mut self) { - todo!() - } +#[derive(Serialize, Deserialize)] +#[serde(tag = "kind", content = "array", rename_all = "camelCase")] +pub(crate) enum TensorData { + Int64(Vec), + Float32(Vec), +} +#[derive(Serialize, Deserialize)] +pub(crate) struct Tensor { + data: TensorData, + shape: Vec, } pub(crate) struct OnnxruntimeRunContext<'sess> { session: &'sess mut OnnxruntimeSession, + inputs: Vec, } impl<'sess> From<&'sess mut OnnxruntimeSession> for OnnxruntimeRunContext<'sess> { fn from(sess: &'sess mut OnnxruntimeSession) -> Self { - todo!() + Self { + session: sess, + inputs: vec![], + } } } impl PushInputTensor for OnnxruntimeRunContext<'_> { #[duplicate_item( - method T; - [ push_int64 ] [ i64 ]; - [ push_float32 ] [ f32 ]; + method T kind_item; + [ push_int64 ] [ i64 ] [ Int64 ]; + [ push_float32 ] [ f32 ] [ Float32 ]; )] fn method(&mut self, tensor: Array) { - todo!() + let shape = tensor.shape().to_vec(); + let tensor_vec = tensor.into_raw_vec(); + self.inputs.push(Tensor { + data: TensorData::kind_item(tensor_vec), + shape, + }); } } diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index 2a575153d..d4dfc094a 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -286,6 +286,16 @@ impl SessionSet { sessions.remove(&k.into_usize()).expect("should exist") }))); + #[cfg(target_family = "wasm")] + fn check_param_infos( + _expected: &[ParamInfo], + _actual: &[ParamInfo], + ) -> anyhow::Result<()> { + // onnxruntime-web ではパラメータ情報を取れないので、チェックをスキップする + // ref: https://github.com/microsoft/onnxruntime/discussions/17682 + Ok(()) + } + #[cfg(not(target_family = "wasm"))] fn check_param_infos( expected: &[ParamInfo], actual: &[ParamInfo], diff --git a/crates/voicevox_core/wasm_library.js b/crates/voicevox_core/wasm_library.js new file mode 100644 index 000000000..317d9214b --- /dev/null +++ b/crates/voicevox_core/wasm_library.js @@ -0,0 +1,156 @@ +addToLibrary({ + $onnxruntime_injection__postset: "onnxruntime_injection();", + $onnxruntime_injection: function () { + let onnxruntime; + import("onnxruntime-web").then((onnxruntime_) => { + onnxruntime = onnxruntime_; + console.log("onnxruntime-web loaded"); + console.log(onnxruntime_); + onnxruntime_.env.wasm.wasmPaths = "/node_modules/onnxruntime-web/dist/"; + }); + + let nonce = 0; + const generateNonce = () => { + return (nonce++).toString(16); + }; + const toCharPtr = (str) => { + const bin = new TextEncoder().encode(str); + const ptr = _malloc(bin.length + 1); + HEAP8.set(bin, ptr); + HEAP8[ptr + bin.length] = 0; + return ptr; + }; + + const sessions = {}; + class Onnxruntime { + newSession( + /** @type {number} */ model, + /** @type {number} */ modelLen, + /** @type {number} */ useGpu, + /** @type {number} */ callback + ) { + const nonce = generateNonce(); + const modelDataRef = new Uint8Array(HEAPU8.buffer, model, modelLen); + const modelData = modelDataRef.slice().buffer; + (async () => { + try { + const session = await onnxruntime.InferenceSession.create( + modelData, + { + executionProviders: useGpu + ? ["webgpu", "webgl", "wasm"] + : ["wasm"], + } + ); + sessions[nonce] = session; + console.log("onnxruntime session created"); + console.log(session); + const result = { + handle: nonce, + }; + + dynCall("vii", callback, [ + toCharPtr(nonce), + toCharPtr( + JSON.stringify({ + type: "ok", + payload: result, + }) + ), + ]); + } catch (e) { + const result = { + type: "err", + payload: e.message, + }; + dynCall("vii", callback, [ + toCharPtr(nonce), + toCharPtr(JSON.stringify(result)), + ]); + } + })(); + + console.log("newSession called", nonce); + + return toCharPtr(nonce); + } + + sessionRun( + /** @type {number} */ sessionHandle, + /** @type {number} */ inputs, + /** @type {number} */ callback + ) { + const session = sessions[UTF8ToString(sessionHandle)]; + const inputsObj = + /** @type {{shape: number[], data: {kind: string, array: number[]}}[] */ ( + JSON.parse(UTF8ToString(inputs)) + ); + const nonce = generateNonce(); + + (async () => { + try { + console.log("onnxruntime session run"); + if (!session) { + throw new Error("session not found"); + } + console.log(inputsObj); + const result = + /** @type {{[key: string]: {cpuData: {[key: number]: number}, dims: number[], type: string}}} */ ( + await session.run( + Object.fromEntries( + inputsObj.map((input, i) => [ + session.inputNames[i], + new onnxruntime.Tensor( + input.data.kind, + input.data.array, + input.shape + ), + ]) + ) + ) + ); + console.log("onnxruntime session run result"); + console.log(result); + const resultStr = JSON.stringify({ + type: "ok", + payload: Object.values(result).map((tensor) => ({ + shape: tensor.dims, + data: { + kind: tensor.type, + array: Object.entries(tensor.cpuData) + .sort(([a], [b]) => a - b) + .map(([, value]) => value), + }, + })), + }); + dynCall("vii", callback, [toCharPtr(nonce), toCharPtr(resultStr)]); + } catch (e) { + const result = { + type: "err", + payload: e.message, + }; + dynCall("vii", callback, [ + toCharPtr(""), + toCharPtr(JSON.stringify(result)), + ]); + } + })(); + + console.log("sessionRun called", nonce); + + return toCharPtr(nonce); + } + } + + const inst = new Onnxruntime(); + _onnxruntime_inference_session_new = inst.newSession.bind(inst); + _onnxruntime_inference_session_run = inst.sessionRun.bind(inst); + }, + onnxruntime_inference_session_new: function () {}, + onnxruntime_inference_session_new__deps: ["$onnxruntime_injection"], + onnxruntime_inference_session_run: function () {}, + onnxruntime_inference_session_run__deps: ["$onnxruntime_injection"], + + emscripten_memcpy_js: (dest, src, num) => + HEAPU8.copyWithin(dest, src, src + num), +}); diff --git a/crates/voicevox_core_c_api/build.rs b/crates/voicevox_core_c_api/build.rs index 895b16319..89c8ee3c5 100644 --- a/crates/voicevox_core_c_api/build.rs +++ b/crates/voicevox_core_c_api/build.rs @@ -21,13 +21,9 @@ fn main() { // println!("cargo:rustc-link-arg=-sEXPORTED_RUNTIME_METHODS=['ccall']"); // println!("cargo:rustc-link-arg=-sEXPORT_NAME=\"RawVoicevoxCore\""); // println!("cargo:rustc-link-arg=-sMODULARIZE=1"); - // println!("cargo:rustc-link-arg=-sTOTAL_STACK=128MB"); - // println!("cargo:rustc-link-arg=-sINITIAL_MEMORY=256MB"); + println!("cargo:rustc-link-arg=-sTOTAL_STACK=128MB"); + println!("cargo:rustc-link-arg=-sINITIAL_MEMORY=256MB"); println!("cargo:rustc-link-arg=-sALLOW_MEMORY_GROWTH=1"); - println!( - "cargo:rustc-link-arg=--js-library={}", - std::env::var("CARGO_MANIFEST_DIR").unwrap() + "/wasm_library.js" - ); let re = Regex::new(r#"pub (?:unsafe )?extern "C" fn (\w+)"#).unwrap(); let mut functions = vec![ @@ -77,7 +73,13 @@ fn main() { ); println!("cargo:rustc-link-arg=-sERROR_ON_UNDEFINED_SYMBOLS=0"); println!("cargo:rustc-link-arg=-sEXPORT_NAME=VoicevoxCore"); - println!("cargo:rustc-link-arg=-DEMSCRIPTEN_STANDALONE_WASM"); + println!("cargo:rustc-link-arg=-sASYNCIFY=1"); println!("cargo:rustc-link-arg=--no-entry"); + + // 本当はvoicevox_core/build.rsに置きたいけどできない(当社調べ)なのでここに置く + println!( + "cargo:rustc-link-arg=--js-library={}", + std::env::var("CARGO_MANIFEST_DIR").unwrap() + "/../voicevox_core/wasm_library.js" + ); } } diff --git a/crates/voicevox_core_c_api/wasm_library.js b/crates/voicevox_core_c_api/wasm_library.js deleted file mode 100644 index cf2116e3b..000000000 --- a/crates/voicevox_core_c_api/wasm_library.js +++ /dev/null @@ -1,7 +0,0 @@ -addToLibrary({ - __cxx_global_var_init: function () {/* noop */}, - - emscripten_memcpy_js: function (dest, src, num) { - return HEAPU8.set(HEAPU8.subarray(src, src + num), dest); - }, -}); diff --git a/example/wasm/package.json b/example/wasm/package.json index 332b90f9f..40b04a867 100644 --- a/example/wasm/package.json +++ b/example/wasm/package.json @@ -10,6 +10,7 @@ }, "dependencies": { "jszip": "3.10.1", + "onnxruntime-web": "^1.17.1", "vue": "^3.3.8" }, "devDependencies": { diff --git a/example/wasm/pnpm-lock.yaml b/example/wasm/pnpm-lock.yaml index 529c6458c..53afa90ad 100644 --- a/example/wasm/pnpm-lock.yaml +++ b/example/wasm/pnpm-lock.yaml @@ -8,6 +8,9 @@ dependencies: jszip: specifier: 3.10.1 version: 3.10.1 + onnxruntime-web: + specifier: ^1.17.1 + version: 1.17.1 vue: specifier: ^3.3.8 version: 3.3.8(typescript@5.3.2) @@ -28,9 +31,6 @@ devDependencies: vite: specifier: ^5.0.0 version: 5.0.2(@types/node@18.18.13) - vite-plugin-arraybuffer: - specifier: ^0.0.6 - version: 0.0.6 vue-tsc: specifier: ^1.8.22 version: 1.8.22(typescript@5.3.2) @@ -261,6 +261,49 @@ packages: /@jridgewell/sourcemap-codec@1.4.15: resolution: {integrity: sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==} + /@protobufjs/aspromise@1.1.2: + resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} + dev: false + + /@protobufjs/base64@1.1.2: + resolution: {integrity: sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==} + dev: false + + /@protobufjs/codegen@2.0.4: + resolution: {integrity: sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==} + dev: false + + /@protobufjs/eventemitter@1.1.0: + resolution: {integrity: sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==} + dev: false + + /@protobufjs/fetch@1.1.0: + resolution: {integrity: sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==} + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/inquire': 1.1.0 + dev: false + + /@protobufjs/float@1.0.2: + resolution: {integrity: sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==} + dev: false + + /@protobufjs/inquire@1.1.0: + resolution: {integrity: sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==} + dev: false + + /@protobufjs/path@1.1.2: + resolution: {integrity: sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==} + dev: false + + /@protobufjs/pool@1.1.0: + resolution: {integrity: sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==} + dev: false + + /@protobufjs/utf8@1.1.0: + resolution: {integrity: sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==} + dev: false + /@rollup/rollup-android-arm-eabi@4.5.2: resolution: {integrity: sha512-ee7BudTwwrglFYSc3UnqInDDjCLWHKrFmGNi4aK7jlEyg4CyPa1DCMrZfsN1O13YT76UFEqXz2CoN7BCGpUlJw==} cpu: [arm] @@ -365,7 +408,6 @@ packages: resolution: {integrity: sha512-vXYZGRrSCreZmq1rEjMRLXJhiy8MrIeVasx+PCVlP414N7CJLHnMf+juVvjdprHyH+XRy3zKZLHeNueOpJCn0g==} dependencies: undici-types: 5.26.5 - dev: true /@vitejs/plugin-vue@4.5.0(vite@5.0.2)(vue@3.3.8): resolution: {integrity: sha512-a2WSpP8X8HTEww/U00bU4mX1QpLINNuz/2KMNpLsdu3BzOpak3AGI1CJYBTXcc4SPhaD0eNRUp7IyQK405L5dQ==} @@ -547,6 +589,10 @@ packages: /estree-walker@2.0.2: resolution: {integrity: sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==} + /flatbuffers@1.12.0: + resolution: {integrity: sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==} + dev: false + /fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -555,6 +601,10 @@ packages: dev: true optional: true + /guid-typescript@1.0.9: + resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==} + dev: false + /he@1.2.0: resolution: {integrity: sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==} hasBin: true @@ -587,6 +637,10 @@ packages: immediate: 3.0.6 dev: false + /long@5.2.3: + resolution: {integrity: sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==} + dev: false + /lru-cache@6.0.0: resolution: {integrity: sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==} engines: {node: '>=10'} @@ -616,6 +670,21 @@ packages: engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} hasBin: true + /onnxruntime-common@1.17.1: + resolution: {integrity: sha512-6wLNhpn+1hnsKN+jq6ulqUEJ61TdRmyFkGCvtRNnZkAupH8Yfr805UeNxjl9jtiX9B1q48pq6Q/67fEFpxT7Dw==} + dev: false + + /onnxruntime-web@1.17.1: + resolution: {integrity: sha512-EotY9uJU4xFY/ZVZ2Zrl2OZmBcbTVTWn/2OOh4cCWODPwtsYN2xeJYgoz8LfCgZSrhenGg0q4ceYUWATXqEsYQ==} + dependencies: + flatbuffers: 1.12.0 + guid-typescript: 1.0.9 + long: 5.2.3 + onnxruntime-common: 1.17.1 + platform: 1.3.6 + protobufjs: 7.2.6 + dev: false + /pako@1.0.11: resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} dev: false @@ -627,6 +696,10 @@ packages: /picocolors@1.0.0: resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==} + /platform@1.3.6: + resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==} + dev: false + /postcss@8.4.31: resolution: {integrity: sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==} engines: {node: ^10 || ^12 || >=14} @@ -639,6 +712,25 @@ packages: resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} dev: false + /protobufjs@7.2.6: + resolution: {integrity: sha512-dgJaEDDL6x8ASUZ1YqWciTRrdOuYNzoOf27oHNfdyvKqHr5i0FV7FSLU+aIeFjyFgVxrpTOtQUi0BLLBymZaBw==} + engines: {node: '>=12.0.0'} + requiresBuild: true + dependencies: + '@protobufjs/aspromise': 1.1.2 + '@protobufjs/base64': 1.1.2 + '@protobufjs/codegen': 2.0.4 + '@protobufjs/eventemitter': 1.1.0 + '@protobufjs/fetch': 1.1.0 + '@protobufjs/float': 1.0.2 + '@protobufjs/inquire': 1.1.0 + '@protobufjs/path': 1.1.2 + '@protobufjs/pool': 1.1.0 + '@protobufjs/utf8': 1.1.0 + '@types/node': 18.18.13 + long: 5.2.3 + dev: false + /readable-stream@2.3.8: resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==} dependencies: @@ -708,16 +800,11 @@ packages: /undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} - dev: true /util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} dev: false - /vite-plugin-arraybuffer@0.0.6: - resolution: {integrity: sha512-2TXAUuHREy29seo6qgm/lGjUpYxJjzsIGHi7cOScMALK6GHIpV2/3KHZ6uaLVAUGwbYQ8rDhNqYgnPkFLxZgnQ==} - dev: true - /vite@5.0.2(@types/node@18.18.13): resolution: {integrity: sha512-6CCq1CAJCNM1ya2ZZA7+jS2KgnhbzvxakmlIjN24cF/PXhRMzpM/z8QgsVJA/Dm5fWUWnVEsmtBoMhmerPxT0g==} engines: {node: ^18.0.0 || >=20.0.0} diff --git a/example/wasm/src/artifacts/voicevox_core_wasm_api.d.ts b/example/wasm/src/artifacts/voicevox_core_wasm_api.d.ts index 7c319bb2a..7da5f121f 100644 --- a/example/wasm/src/artifacts/voicevox_core_wasm_api.d.ts +++ b/example/wasm/src/artifacts/voicevox_core_wasm_api.d.ts @@ -9,7 +9,7 @@ type Functions = { voicevox_get_version: () => string; voicevox_open_jtalk_rc_new: ( path: string, - pointer: Pointer<"OpenJtalkRc"> + pointer: Pointer<"OpenJtalkRc*"> ) => VoicevoxResultCode; voicevox_open_jtalk_rc_delete: ( pointer: Pointer<"OpenJtalkRc"> @@ -19,26 +19,45 @@ type Functions = { acceleration_mode: Pointer<"AccelerationMode">, cpu_num_threads: Pointer<"i32"> ) => void; + voicevox_make_default_tts_options_wasm: ( + enable_interrogative_upspeak: Pointer<"boolean"> + ) => void; voicevox_synthesizer_new_wasm: ( open_jtalk: Pointer<"OpenJtalkRc">, - options_acceleration_mode: i32, - options_cpu_num_threads: i32, - out_synthesizer: Pointer<"VoicevoxSynthesizer"> + options_acceleration_mode: number, + options_cpu_num_threads: number, + out_synthesizer: Pointer<"VoicevoxSynthesizer*"> ) => VoicevoxResultCode; voicevox_synthesizer_load_voice_model: ( pointer: Pointer<"VoicevoxSynthesizer">, model: Pointer<"VoicevoxVoiceModel"> - ) => VoicevoxResultCode; + ) => Promise; + voicevox_synthesizer_create_metas_json: ( + pointer: Pointer<"VoicevoxSynthesizer"> + ) => Pointer<"string">; + voicevox_synthesizer_tts: ( + pointer: Pointer<"VoicevoxSynthesizer">, + text: string, + speaker: number, + options_enable_interrogative_upspeak: boolean, + output_wav_length: Pointer<"i32">, + output_wav: Pointer<"u8*"> + ) => Promise; voicevox_synthesizer_delete: ( pointer: Pointer<"VoicevoxSynthesizer"> ) => void; voicevox_voice_model_new_from_path: ( path: string, - pointer: Pointer<"VoicevoxVoiceModel"> + pointer: Pointer<"VoicevoxVoiceModel*"> ) => VoicevoxResultCode; + voicevox_voice_model_get_metas_json: ( + pointer: Pointer<"VoicevoxVoiceModel"> + ) => Pointer<"string">; voicevox_voice_model_delete: ( pointer: Pointer<"VoicevoxVoiceModel"> ) => VoicevoxResultCode; + voicevox_json_free: (pointer: Pointer<"string">) => void; + voicevox_wav_free: (pointer: Pointer<"u8">) => void; setenv: (name: string, value: string) => number; }; type Ccall = ( @@ -54,6 +73,7 @@ type VoicevoxCore = EmscriptenModule & { stackSave: typeof stackSave; stackRestore: typeof stackRestore; stackAlloc: typeof stackAlloc; + UTF8ToString: typeof UTF8ToString; FS: typeof FS; }; export default function (): Promise< diff --git a/example/wasm/src/components/HelloWorld.vue b/example/wasm/src/components/HelloWorld.vue index 689313d59..dc64a2310 100644 --- a/example/wasm/src/components/HelloWorld.vue +++ b/example/wasm/src/components/HelloWorld.vue @@ -12,6 +12,7 @@ const modelLoad = async () => { const modelUint8Array = new Uint8Array(await modelBlob.arrayBuffer()); const id = await VoiceModel.newFromPath(modelUint8Array); model.value = id; + console.log(await model.value.metas()); }; const synthesizerCreate = async () => { @@ -32,8 +33,15 @@ const synthesizerTts = async () => { if (synthesizer.value === undefined) { return; } - const audio = await synthesizer.value.tts("ハローワールド"); - audioSrc.value = "data:audio/wav;base64," + audio; + const audio = await synthesizer.value.tts("ハローワールド", 0); + audioSrc.value = + "data:audio/wav;base64," + + btoa( + new Uint8Array(audio).reduce( + (data, byte) => data + String.fromCharCode(byte), + "" + ) + ); }; const version = ref(undefined); @@ -50,7 +58,7 @@ const audioSrc = ref(undefined);
{{ version }}
-
Model
+
VoiceModel
{{ model }}
@@ -60,9 +68,13 @@ const audioSrc = ref(undefined);
- + - diff --git a/example/wasm/src/style.css b/example/wasm/src/style.css index bb131d6b8..fc674b20b 100644 --- a/example/wasm/src/style.css +++ b/example/wasm/src/style.css @@ -53,6 +53,10 @@ button:focus, button:focus-visible { outline: 4px auto -webkit-focus-ring-color; } +button:disabled { + opacity: 0.5; + cursor: not-allowed; +} .card { padding: 2em; diff --git a/example/wasm/src/voicevoxCore.ts b/example/wasm/src/voicevoxCore.ts index 011c1a49e..5201108b2 100644 --- a/example/wasm/src/voicevoxCore.ts +++ b/example/wasm/src/voicevoxCore.ts @@ -56,6 +56,15 @@ function throwIfError(vvc: VoicevoxCore, code: VoicevoxResultCode) { function allocPointer(vvc: VoicevoxCore) { return vvc._malloc(4) as Pointer; } +function getPointerValue( + vvc: VoicevoxCore, + pointer: Pointer<`${T}*`> +) { + return vvc.getValue(pointer, "i32") as Pointer; +} +function utf8ToString(vvc: VoicevoxCore, pointer: Pointer<"string">) { + return vvc.UTF8ToString(pointer); +} function fileExists(vvc: VoicevoxCore, path: string) { try { @@ -93,7 +102,7 @@ export class OpenJtalkRc { } } } - const returnPtr = allocPointer<"OpenJtalkRc">(vvc); + const returnPtr = allocPointer<"OpenJtalkRc*">(vvc); throwIfError( vvc, vvc.ccall( @@ -119,6 +128,7 @@ const synthesizerFinalizer = new FinalizationRegistry( (pointer: Pointer<"VoicevoxSynthesizer">) => { const vvc = _voicevoxCore; if (vvc) { + console.log("Deleting synthesizer", pointer); vvc.ccall("voicevox_synthesizer_delete", "void", ["number"], [pointer]); } } @@ -136,10 +146,10 @@ export class Synthesizer { ["number", "number"], [accelerationModePtr, cpuNumThreadsPtr] ); - const accelerationMode = vvc.getValue(accelerationModePtr, "i32"); + const accelerationMode = 1; // vvc.getValue(accelerationModePtr, "i32"); const cpuNumThreads = vvc.getValue(cpuNumThreadsPtr, "i32"); - const returnPtr = allocPointer<"VoicevoxSynthesizer">(vvc); + const returnPtr = allocPointer<"VoicevoxSynthesizer*">(vvc); throwIfError( vvc, vvc.ccall( @@ -150,10 +160,11 @@ export class Synthesizer { ) ); - const synthesizer = new Synthesizer(openJtalkRc, returnPtr); + const returnPtrValue = getPointerValue(vvc, returnPtr); + const synthesizer = new Synthesizer(openJtalkRc, returnPtrValue); console.log("Initialized Synthesizer", synthesizer); - synthesizerFinalizer.register(synthesizer, returnPtr); + synthesizerFinalizer.register(synthesizer, returnPtrValue); return synthesizer; } @@ -166,7 +177,7 @@ export class Synthesizer { const vvc = await voicevoxCore(); throwIfError( vvc, - vvc.ccall( + await vvc.ccall( "voicevox_synthesizer_load_voice_model", "number", ["number", "number"], @@ -174,11 +185,72 @@ export class Synthesizer { ) ); } + + async tts(text: string, speaker: number) { + const vvc = await voicevoxCore(); + const enableInterrogativeUpspeakPtr = allocPointer<"boolean">(vvc); + vvc.ccall( + "voicevox_make_default_tts_options_wasm", + "void", + ["number"], + [enableInterrogativeUpspeakPtr] + ); + + const enableInterrogativeUpspeak = + vvc.getValue(enableInterrogativeUpspeakPtr, "i8") !== 0; + + const outputWavLengthPtr = allocPointer<"i32">(vvc); + const outputWavPtrPtr = allocPointer<"u8*">(vvc); + + throwIfError( + vvc, + await vvc.ccall( + "voicevox_synthesizer_tts", + "number", + ["number", "string", "number", "boolean", "number", "number"], + [ + this._pointer, + text, + speaker, + enableInterrogativeUpspeak, + outputWavLengthPtr, + outputWavPtrPtr, + ] + ) + ); + + const outputWavLength = vvc.getValue(outputWavLengthPtr, "i32"); + const outputWavPtr = getPointerValue(vvc, outputWavPtrPtr); + + const outputWavRef = new Uint8Array( + vvc.HEAPU8.buffer, + outputWavPtr, + outputWavLength + ); + const outputWav = outputWavRef.slice(); + vvc.ccall("voicevox_wav_free", "void", ["number"], [outputWavPtr]); + + return outputWav; + } + + async metas() { + const vvc = await voicevoxCore(); + const returnPtr = vvc.ccall( + "voicevox_synthesizer_create_metas_json", + "number", + ["number"], + [this._pointer] + ); + const metas = utf8ToString(vvc, returnPtr); + vvc.ccall("voicevox_json_free", "void", ["number"], [returnPtr]); + return JSON.parse(metas); + } } const voiceModelFinalizer = new FinalizationRegistry( (pointer: Pointer<"VoicevoxVoiceModel">) => { const vvc = _voicevoxCore; if (vvc) { + console.log("Deleting voice model", pointer); vvc.ccall("voicevox_voice_model_delete", "void", ["number"], [pointer]); } } @@ -188,7 +260,7 @@ export class VoiceModel { const vvc = await voicevoxCore(); const nonce = Math.floor(Math.random() * 1000000); vvc.FS.writeFile(`/data/voice_model_${nonce}.vvm`, model, { flags: "w" }); - const returnPtr = allocPointer<"VoicevoxVoiceModel">(vvc); + const returnPtr = allocPointer<"VoicevoxVoiceModel*">(vvc); throwIfError( vvc, vvc.ccall( @@ -198,14 +270,23 @@ export class VoiceModel { [`/data/voice_model_${nonce}.vvm`, returnPtr] ) ); - const pointer = vvc.getValue( - returnPtr, - "i32" - ) as Pointer<"VoicevoxVoiceModel">; + const pointer = getPointerValue(vvc, returnPtr); const voiceModel = new VoiceModel(pointer); console.log("Initialized VoiceModel", voiceModel); voiceModelFinalizer.register(voiceModel, pointer); return voiceModel; } constructor(public _pointer: Pointer<"VoicevoxVoiceModel">) {} + + async metas() { + const vvc = await voicevoxCore(); + const returnPtr = vvc.ccall( + "voicevox_voice_model_get_metas_json", + "number", + ["number"], + [this._pointer] + ); + const metas = utf8ToString(vvc, returnPtr); + return JSON.parse(metas); + } }