diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml index 920d43389..491d1968c 100644 --- a/.github/workflows/build_and_deploy.yml +++ b/.github/workflows/build_and_deploy.yml @@ -146,6 +146,7 @@ jobs: # libonnxruntimeについてはバージョン付のshared libraryを使用するためバージョンがついてないものを削除する rm -f artifact/${{ env.ASSET_NAME }}/libonnxruntime.{so,dylib} cp -v README.md "artifact/${{ env.ASSET_NAME }}/README.txt" + cp -vr model "artifact/${{ env.ASSET_NAME }}/" echo "${{ env.VERSION }}" > "artifact/${{ env.ASSET_NAME }}/VERSION" - name: Code signing (Windows) if: startsWith(matrix.os, 'windows') && github.event.inputs.code_signing == 'true' diff --git a/.gitignore b/.gitignore index 830f978dd..0306d8d38 100644 --- a/.gitignore +++ b/.gitignore @@ -24,11 +24,6 @@ core/_core.cpp __pycache__/ *.egg-info -# Maturin -*.abi3.dll -*.abi3.dylib -*.abi3.so - # CMake CMakeFiles/ CMakeCache.txt diff --git a/Cargo.lock b/Cargo.lock index 3632e9901..e059f86a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -883,6 +883,18 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0845fa252299212f0389d64ba26f34fa32cfe41588355f21ed507c59a0f64541" + +[[package]] +name = "fs_extra" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" + [[package]] name = "futures" version = "0.3.24" @@ -1802,6 +1814,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "process_path" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f676f11eb0b3e2ea0fbaee218fa6b806689e2297b8c8adc5bf73df465c4f6171" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "pyo3" version = "0.17.2" @@ -2776,12 +2798,14 @@ dependencies = [ "derive-new", "easy-ext", "flate2", + "fs-err", "heck", "humansize", "once_cell", "onnxruntime", "open_jtalk", "pretty_assertions", + "process_path", "regex", "rstest", "serde", @@ -2812,7 +2836,9 @@ dependencies = [ name = "voicevox_core_python_api" version = "0.0.0" dependencies = [ + "anyhow", "easy-ext", + "fs_extra", "log", "numpy", "pyo3", diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index 248c6a7d4..b558004d7 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -15,11 +15,14 @@ cfg-if = "1.0.0" derive-getters = "0.2.0" derive-new = "0.5.9" easy-ext.workspace = true +fs-err = "2.9.0" once_cell.workspace = true onnxruntime = { git = "https://github.com/VOICEVOX/onnxruntime-rs.git", rev="405f62fb53df1b59b0e69adafbd1c28e4d5c2787" } +process_path = "0.1.4" serde.workspace = true serde_json.workspace = true thiserror.workspace = true +tracing.workspace = true open_jtalk = { git = "https://github.com/VOICEVOX/open_jtalk-rs.git", rev="9edab53f0bfa877dbb37224d17fd0f3efbe32abd" } regex = "1.6.0" @@ -34,5 +37,4 @@ heck = "0.4.0" [target."cfg(windows)".dependencies] humansize = "2.1.2" -tracing.workspace = true windows = { version = "0.43.0", features = ["Win32_Foundation", "Win32_Graphics_Dxgi"] } diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs index 0dee3db41..3835b678d 100644 --- a/crates/voicevox_core/src/error.rs +++ b/crates/voicevox_core/src/error.rs @@ -2,6 +2,7 @@ use self::engine::{FullContextLabelError, KanaParseError}; use self::result_code::VoicevoxResultCode::{self, *}; use super::*; //use engine:: +use std::path::PathBuf; use thiserror::Error; /* @@ -23,8 +24,12 @@ pub enum Error { #[error("{}", base_error_message(VOICEVOX_RESULT_GPU_SUPPORT_ERROR))] GpuSupport, - #[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR))] - LoadModel(#[source] anyhow::Error), + #[error("{} ({}): {source}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR), path.display())] + LoadModel { + path: PathBuf, + #[source] + source: anyhow::Error, + }, #[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_METAS_ERROR))] LoadMetas(#[source] anyhow::Error), @@ -70,8 +75,17 @@ impl PartialEq for Error { | (Self::GpuSupport, Self::GpuSupport) | (Self::UninitializedStatus, Self::UninitializedStatus) | (Self::InferenceFailed, Self::InferenceFailed) => true, - (Self::LoadModel(e1), Self::LoadModel(e2)) - | (Self::LoadMetas(e1), Self::LoadMetas(e2)) + ( + Self::LoadModel { + path: path1, + source: source1, + }, + Self::LoadModel { + path: path2, + source: source2, + }, + ) => (path1, source1.to_string()) == (path2, source2.to_string()), + (Self::LoadMetas(e1), Self::LoadMetas(e2)) | (Self::GetSupportedDevices(e1), Self::GetSupportedDevices(e2)) => { e1.to_string() == e2.to_string() } diff --git a/crates/voicevox_core/src/include_models.rs b/crates/voicevox_core/src/include_models.rs deleted file mode 100644 index 2d80e2deb..000000000 --- a/crates/voicevox_core/src/include_models.rs +++ /dev/null @@ -1,30 +0,0 @@ -[ - Model{ - predict_duration_model: include_bytes!(concat!( - env!("CARGO_WORKSPACE_DIR"), - "/model/predict_duration-0.onnx" - )), - predict_intonation_model: include_bytes!(concat!( - env!("CARGO_WORKSPACE_DIR"), - "/model/predict_intonation-0.onnx" - )), - decode_model: include_bytes!(concat!( - env!("CARGO_WORKSPACE_DIR"), - "/model/decode-0.onnx" - )), - }, - Model{ - predict_duration_model: include_bytes!(concat!( - env!("CARGO_WORKSPACE_DIR"), - "/model/predict_duration-1.onnx" - )), - predict_intonation_model: include_bytes!(concat!( - env!("CARGO_WORKSPACE_DIR"), - "/model/predict_intonation-1.onnx" - )), - decode_model: include_bytes!(concat!( - env!("CARGO_WORKSPACE_DIR"), - "/model/decode-1.onnx" - )), - }, -] diff --git a/crates/voicevox_core/src/include_speaker_id_map.rs b/crates/voicevox_core/src/include_speaker_id_map.rs deleted file mode 100644 index 3b7720c6a..000000000 --- a/crates/voicevox_core/src/include_speaker_id_map.rs +++ /dev/null @@ -1,6 +0,0 @@ -[ -(0, (0, 0)), -(1, (0, 1)), -(2, (1, 0)), -(3, (1, 1)), -] diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs index 304772e51..081c7626a 100644 --- a/crates/voicevox_core/src/publish.rs +++ b/crates/voicevox_core/src/publish.rs @@ -8,14 +8,11 @@ use onnxruntime::{ session::{AnyArray, NdArray}, }; use std::ffi::{CStr, CString}; +use std::path::PathBuf; use std::sync::Mutex; -use std::{collections::BTreeMap, path::PathBuf}; const PHONEME_LENGTH_MINIMAL: f32 = 0.01; -static SPEAKER_ID_MAP: Lazy> = - Lazy::new(|| include!("include_speaker_id_map.rs").into_iter().collect()); - pub struct VoicevoxCore { synthesis_engine: SynthesisEngine, use_gpu: bool, @@ -284,7 +281,7 @@ impl InferenceCore { status.load_metas()?; if load_all_models { - for model_index in 0..Status::MODELS_COUNT { + for model_index in 0..MODEL_FILE_SET.models_count() { status.load_model(model_index)?; } } @@ -363,7 +360,7 @@ impl InferenceCore { return Err(Error::InvalidSpeakerId { speaker_id }); }; - if model_index >= Status::MODELS_COUNT { + if model_index >= MODEL_FILE_SET.models_count() { return Err(Error::InvalidModelIndex { model_index }); } @@ -416,7 +413,7 @@ impl InferenceCore { return Err(Error::InvalidSpeakerId { speaker_id }); }; - if model_index >= Status::MODELS_COUNT { + if model_index >= MODEL_FILE_SET.models_count() { return Err(Error::InvalidModelIndex { model_index }); } @@ -474,7 +471,7 @@ impl InferenceCore { return Err(Error::InvalidSpeakerId { speaker_id }); }; - if model_index >= Status::MODELS_COUNT { + if model_index >= MODEL_FILE_SET.models_count() { return Err(Error::InvalidModelIndex { model_index }); } @@ -563,9 +560,13 @@ impl InferenceCore { } } -pub static METAS: &str = Status::METAS_STR; +pub static METAS: &Lazy<&str> = { + static METAS: Lazy<&str> = Lazy::new(|| &MODEL_FILE_SET.metas_str); + &METAS +}; -pub static METAS_CSTRING: Lazy = Lazy::new(|| CString::new(METAS).unwrap()); +pub static METAS_CSTRING: Lazy = + Lazy::new(|| CString::new(&*MODEL_FILE_SET.metas_str).unwrap()); pub static SUPPORTED_DEVICES: Lazy = Lazy::new(|| SupportedDevices::get_supported_devices().unwrap()); @@ -574,7 +575,7 @@ pub static SUPPORTED_DEVICES_CSTRING: Lazy = Lazy::new(|| CString::new(SUPPORTED_DEVICES.to_json().to_string()).unwrap()); fn get_model_index_and_speaker_id(speaker_id: u32) -> Option<(usize, u32)> { - SPEAKER_ID_MAP.get(&speaker_id).copied() + MODEL_FILE_SET.speaker_id_map.get(&speaker_id).copied() } pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'static str { @@ -584,9 +585,7 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => { "OpenJTalkの辞書が読み込まれていません\0" } - VOICEVOX_RESULT_LOAD_MODEL_ERROR => { - "modelデータ読み込み中にOnnxruntimeエラーが発生しました\0" - } + VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0", VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0", VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0", diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index 9e5a8cec0..16a717904 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -1,4 +1,5 @@ use super::*; +use anyhow::Context as _; use once_cell::sync::Lazy; use onnxruntime::{ environment::Environment, @@ -6,6 +7,13 @@ use onnxruntime::{ GraphOptimizationLevel, LoggingLevel, }; use serde::{Deserialize, Serialize}; +use std::{ + env, + path::{Path, PathBuf}, +}; +use tracing::error; + +mod model_file; cfg_if! { if #[cfg(not(feature="directml"))]{ @@ -14,6 +22,14 @@ cfg_if! { } use std::collections::{BTreeMap, BTreeSet}; +pub(crate) static MODEL_FILE_SET: Lazy = Lazy::new(|| { + let result = ModelFileSet::new(); + if let Err(err) = &result { + error!("ファイルを読み込めなかったためクラッシュします: {err}"); + } + result.unwrap() +}); + pub struct Status { models: StatusModels, light_session_options: SessionOptions, // 軽いモデルはこちらを使う @@ -33,10 +49,96 @@ struct SessionOptions { use_gpu: bool, } +pub(crate) struct ModelFileSet { + pub(crate) speaker_id_map: BTreeMap, + pub(crate) metas_str: String, + models: Vec, +} + +impl ModelFileSet { + fn new() -> anyhow::Result { + let path = { + let root_dir = if cfg!(test) { + Path::new(env!("CARGO_WORKSPACE_DIR")).join("model") + } else if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) { + root_dir.into() + } else { + process_path::get_dylib_path() + .or_else(process_path::get_executable_path) + .with_context(|| "Could not get the current dynamic library/executable path")? + .parent() + .unwrap_or_else(|| "".as_ref()) + .join("model") + }; + + move |rel_path| root_dir.join(rel_path) + }; + + let metas_str = fs_err::read_to_string(path("metas.json"))?; + + let models = model_file::MODEL_FILE_NAMES + .iter() + .map( + |&ModelFileNames { + predict_duration_model, + predict_intonation_model, + decode_model, + }| { + let predict_duration_model = ModelFile::new(&path(predict_duration_model))?; + let predict_intonation_model = ModelFile::new(&path(predict_intonation_model))?; + let decode_model = ModelFile::new(&path(decode_model))?; + Ok(Model { + predict_duration_model, + predict_intonation_model, + decode_model, + }) + }, + ) + .collect::>()?; + + return Ok(Self { + speaker_id_map: model_file::SPEAKER_ID_MAP.iter().copied().collect(), + metas_str, + models, + }); + + const ROOT_DIR_ENV_NAME: &str = "VV_MODELS_ROOT_DIR"; + } + + pub(crate) fn models_count(&self) -> usize { + self.models.len() + } +} + +struct ModelFileNames { + predict_duration_model: &'static str, + predict_intonation_model: &'static str, + decode_model: &'static str, +} + +#[derive(thiserror::Error, Debug)] +#[error("不正なモデルファイルです")] +struct DecryptModelError; + struct Model { - predict_duration_model: &'static [u8], - predict_intonation_model: &'static [u8], - decode_model: &'static [u8], + predict_duration_model: ModelFile, + predict_intonation_model: ModelFile, + decode_model: ModelFile, +} + +struct ModelFile { + path: PathBuf, + content: Vec, +} + +impl ModelFile { + fn new(path: &Path) -> anyhow::Result { + let content = fs_err::read(path)?; + Ok(Self { + path: path.to_owned(), + content, + }) + } } #[derive(Deserialize, Getters)] @@ -101,13 +203,6 @@ impl SupportedDevices { unsafe impl Send for Status {} impl Status { - const MODELS: &'static [Model] = &include!("include_models.rs"); - - pub const METAS_STR: &'static str = - include_str!(concat!(env!("CARGO_WORKSPACE_DIR"), "/model/metas.json")); - - pub const MODELS_COUNT: usize = Self::MODELS.len(); - pub fn new(use_gpu: bool, cpu_num_threads: u16) -> Self { Self { models: StatusModels { @@ -122,8 +217,8 @@ impl Status { } pub fn load_metas(&mut self) -> Result<()> { - let metas: Vec = - serde_json::from_str(Self::METAS_STR).map_err(|e| Error::LoadMetas(e.into()))?; + let metas: Vec = serde_json::from_str(&MODEL_FILE_SET.metas_str) + .map_err(|e| Error::LoadMetas(e.into()))?; for meta in metas.iter() { for style in meta.styles().iter() { @@ -135,17 +230,14 @@ impl Status { } pub fn load_model(&mut self, model_index: usize) -> Result<()> { - if model_index < Self::MODELS.len() { - let model = &Self::MODELS[model_index]; - let predict_duration_session = self - .new_session(model.predict_duration_model, &self.light_session_options) - .map_err(Error::LoadModel)?; - let predict_intonation_session = self - .new_session(model.predict_intonation_model, &self.light_session_options) - .map_err(Error::LoadModel)?; - let decode_model = self - .new_session(model.decode_model, &self.heavy_session_options) - .map_err(Error::LoadModel)?; + if model_index < MODEL_FILE_SET.models.len() { + let model = &MODEL_FILE_SET.models[model_index]; + let predict_duration_session = + self.new_session(&model.predict_duration_model, &self.light_session_options)?; + let predict_intonation_session = + self.new_session(&model.predict_intonation_model, &self.light_session_options)?; + let decode_model = + self.new_session(&model.decode_model, &self.heavy_session_options)?; self.models .predict_duration @@ -168,9 +260,21 @@ impl Status { && self.models.decode.contains_key(&model_index) } - fn new_session>( + fn new_session( + &self, + model_file: &ModelFile, + session_options: &SessionOptions, + ) -> Result> { + self.new_session_from_bytes(|| model_file::decrypt(&model_file.content), session_options) + .map_err(|source| Error::LoadModel { + path: model_file.path.clone(), + source, + }) + } + + fn new_session_from_bytes( &self, - model_bytes: B, + model_bytes: impl FnOnce() -> std::result::Result, DecryptModelError>, session_options: &SessionOptions, ) -> anyhow::Result> { let session_builder = ENVIRONMENT @@ -195,7 +299,7 @@ impl Status { session_builder }; - Ok(session_builder.with_model_from_memory(model_bytes)?) + Ok(session_builder.with_model_from_memory(model_bytes()?)?) } pub fn validate_speaker_id(&self, speaker_id: u32) -> bool { diff --git a/crates/voicevox_core/src/status/model_file.rs b/crates/voicevox_core/src/status/model_file.rs new file mode 100644 index 000000000..f5dce926a --- /dev/null +++ b/crates/voicevox_core/src/status/model_file.rs @@ -0,0 +1,21 @@ +use super::{DecryptModelError, ModelFileNames}; + +pub(super) fn decrypt(content: &[u8]) -> std::result::Result, DecryptModelError> { + Ok(content.to_owned()) +} + +pub(super) const SPEAKER_ID_MAP: &[(u32, (usize, u32))] = + &[(0, (0, 0)), (1, (0, 1)), (2, (1, 0)), (3, (1, 1))]; + +pub(super) const MODEL_FILE_NAMES: &[ModelFileNames] = &[ + ModelFileNames { + predict_duration_model: "predict_duration-0.onnx", + predict_intonation_model: "predict_intonation-0.onnx", + decode_model: "decode-0.onnx", + }, + ModelFileNames { + predict_duration_model: "predict_duration-1.onnx", + predict_intonation_model: "predict_intonation-1.onnx", + decode_model: "decode-1.onnx", + }, +]; diff --git a/crates/voicevox_core_c_api/src/helpers.rs b/crates/voicevox_core_c_api/src/helpers.rs index bb39bd99f..558ee8d98 100644 --- a/crates/voicevox_core_c_api/src/helpers.rs +++ b/crates/voicevox_core_c_api/src/helpers.rs @@ -23,7 +23,7 @@ pub(crate) fn into_result_code_with_error(result: CApiResult<()>) -> VoicevoxRes Ok(()) => VOICEVOX_RESULT_OK, Err(RustApi(NotLoadedOpenjtalkDict)) => VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR, Err(RustApi(GpuSupport)) => VOICEVOX_RESULT_GPU_SUPPORT_ERROR, - Err(RustApi(LoadModel(_))) => VOICEVOX_RESULT_LOAD_MODEL_ERROR, + Err(RustApi(LoadModel { .. })) => VOICEVOX_RESULT_LOAD_MODEL_ERROR, Err(RustApi(LoadMetas(_))) => VOICEVOX_RESULT_LOAD_METAS_ERROR, Err(RustApi(GetSupportedDevices(_))) => VOICEVOX_RESULT_GET_SUPPORTED_DEVICES_ERROR, Err(RustApi(UninitializedStatus)) => VOICEVOX_RESULT_UNINITIALIZED_STATUS_ERROR, diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index 55992f98b..7ac2b38b9 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -471,7 +471,10 @@ mod tests { VoicevoxResultCode::VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR )] #[case( - Err(Error::LoadModel(anyhow!("some load model error"))), + Err(Error::LoadModel { + path: "path/to/model.onnx".into(), + source: anyhow!("some load model error"), + }), VoicevoxResultCode::VOICEVOX_RESULT_LOAD_MODEL_ERROR )] #[case( diff --git a/crates/voicevox_core_python_api/.gitignore b/crates/voicevox_core_python_api/.gitignore new file mode 100644 index 000000000..2caff6041 --- /dev/null +++ b/crates/voicevox_core_python_api/.gitignore @@ -0,0 +1,7 @@ +# build.rs +/python/voicevox_core/model/ + +# Maturin +*.abi3.dll +*.abi3.dylib +*.abi3.so diff --git a/crates/voicevox_core_python_api/Cargo.toml b/crates/voicevox_core_python_api/Cargo.toml index 04b33a192..87aec1341 100644 --- a/crates/voicevox_core_python_api/Cargo.toml +++ b/crates/voicevox_core_python_api/Cargo.toml @@ -24,3 +24,7 @@ serde.workspace = true serde_json.workspace = true tracing.workspace = true voicevox_core.workspace = true + +[build-dependencies] +anyhow.workspace = true +fs_extra = "1.2.0" diff --git a/crates/voicevox_core_python_api/build.rs b/crates/voicevox_core_python_api/build.rs new file mode 100644 index 000000000..c06dcaae3 --- /dev/null +++ b/crates/voicevox_core_python_api/build.rs @@ -0,0 +1,13 @@ +use fs_extra::dir::CopyOptions; + +fn main() -> anyhow::Result<()> { + fs_extra::dir::copy( + "../../model", + "./python/voicevox_core/", + &CopyOptions { + overwrite: true, + ..Default::default() + }, + )?; + Ok(()) +}