実行時にモデルを読む (VOICEVOX#365)

* [wip] 実行時にモデルを読む * 実行時にモデルを読む * OSS版と製品版とで変わらないであろうアイテムを移動 * `$VV_MODELS_ROOT_DIR`を設定 * root_dirのデフォルトはDLLの位置を基準にする * model/を置く場所はここではない * `VVM` → `MODEL_FILE_SET` * モデルファイルをwhlの中に入れる * 不要なTODOを消す * `DecryptError` → `DecryptModelError` * エラーメッセージを変更 Co-authored-by: Hiroshiba <[email protected]> * voicevox_core_python_api下に.gitignoreを作る * `Error::LoadModel`にファイルパスを入れる * エラーメッセージに重複部分があったので削る Co-authored-by: Hiroshiba <[email protected]>
sevenc-nanashi · Jan 5, 2023 · a787f6d · a787f6d
1 parent da99592
commit a787f6d
Show file tree

Hide file tree

Showing 15 changed files with 241 additions and 88 deletions.
diff --git a/.github/workflows/build_and_deploy.yml b/.github/workflows/build_and_deploy.yml
@@ -146,6 +146,7 @@ jobs:
           # libonnxruntimeについてはバージョン付のshared libraryを使用するためバージョンがついてないものを削除する
           rm -f artifact/${{ env.ASSET_NAME }}/libonnxruntime.{so,dylib}
           cp -v README.md "artifact/${{ env.ASSET_NAME }}/README.txt"
+          cp -vr model "artifact/${{ env.ASSET_NAME }}/"
           echo "${{ env.VERSION }}" > "artifact/${{ env.ASSET_NAME }}/VERSION"
       - name: Code signing (Windows)
         if: startsWith(matrix.os, 'windows') && github.event.inputs.code_signing == 'true'

diff --git a/.gitignore b/.gitignore
@@ -24,11 +24,6 @@ core/_core.cpp
 __pycache__/
 *.egg-info
 
-# Maturin
-*.abi3.dll
-*.abi3.dylib
-*.abi3.so
-
 # CMake
 CMakeFiles/
 CMakeCache.txt

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml
@@ -15,11 +15,14 @@ cfg-if = "1.0.0"
 derive-getters = "0.2.0"
 derive-new = "0.5.9"
 easy-ext.workspace = true
+fs-err = "2.9.0"
 once_cell.workspace = true
 onnxruntime = { git = "https://github.com/VOICEVOX/onnxruntime-rs.git", rev="405f62fb53df1b59b0e69adafbd1c28e4d5c2787" }
+process_path = "0.1.4"
 serde.workspace = true
 serde_json.workspace = true
 thiserror.workspace = true
+tracing.workspace = true
 open_jtalk = { git = "https://github.com/VOICEVOX/open_jtalk-rs.git", rev="9edab53f0bfa877dbb37224d17fd0f3efbe32abd" }
 regex = "1.6.0"
 
@@ -34,5 +37,4 @@ heck = "0.4.0"
 
 [target."cfg(windows)".dependencies]
 humansize = "2.1.2"
-tracing.workspace = true
 windows = { version = "0.43.0", features = ["Win32_Foundation", "Win32_Graphics_Dxgi"] }
diff --git a/crates/voicevox_core/src/error.rs b/crates/voicevox_core/src/error.rs
@@ -2,6 +2,7 @@ use self::engine::{FullContextLabelError, KanaParseError};
 use self::result_code::VoicevoxResultCode::{self, *};
 use super::*;
 //use engine::
+use std::path::PathBuf;
 use thiserror::Error;
 
 /*
@@ -23,8 +24,12 @@ pub enum Error {
     #[error("{}", base_error_message(VOICEVOX_RESULT_GPU_SUPPORT_ERROR))]
     GpuSupport,
 
-    #[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR))]
-    LoadModel(#[source] anyhow::Error),
+    #[error("{} ({}): {source}", base_error_message(VOICEVOX_RESULT_LOAD_MODEL_ERROR), path.display())]
+    LoadModel {
+        path: PathBuf,
+        #[source]
+        source: anyhow::Error,
+    },
 
     #[error("{},{0}", base_error_message(VOICEVOX_RESULT_LOAD_METAS_ERROR))]
     LoadMetas(#[source] anyhow::Error),
@@ -70,8 +75,17 @@ impl PartialEq for Error {
             | (Self::GpuSupport, Self::GpuSupport)
             | (Self::UninitializedStatus, Self::UninitializedStatus)
             | (Self::InferenceFailed, Self::InferenceFailed) => true,
-            (Self::LoadModel(e1), Self::LoadModel(e2))
-            | (Self::LoadMetas(e1), Self::LoadMetas(e2))
+            (
+                Self::LoadModel {
+                    path: path1,
+                    source: source1,
+                },
+                Self::LoadModel {
+                    path: path2,
+                    source: source2,
+                },
+            ) => (path1, source1.to_string()) == (path2, source2.to_string()),
+            (Self::LoadMetas(e1), Self::LoadMetas(e2))
             | (Self::GetSupportedDevices(e1), Self::GetSupportedDevices(e2)) => {
                 e1.to_string() == e2.to_string()
             }

diff --git a/crates/voicevox_core/src/include_models.rs b/crates/voicevox_core/src/include_models.rs
diff --git a/crates/voicevox_core/src/include_speaker_id_map.rs b/crates/voicevox_core/src/include_speaker_id_map.rs
diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs
@@ -8,14 +8,11 @@ use onnxruntime::{
     session::{AnyArray, NdArray},
 };
 use std::ffi::{CStr, CString};
+use std::path::PathBuf;
 use std::sync::Mutex;
-use std::{collections::BTreeMap, path::PathBuf};
 
 const PHONEME_LENGTH_MINIMAL: f32 = 0.01;
 
-static SPEAKER_ID_MAP: Lazy<BTreeMap<u32, (usize, u32)>> =
-    Lazy::new(|| include!("include_speaker_id_map.rs").into_iter().collect());
-
 pub struct VoicevoxCore {
     synthesis_engine: SynthesisEngine,
     use_gpu: bool,
@@ -284,7 +281,7 @@ impl InferenceCore {
             status.load_metas()?;
 
             if load_all_models {
-                for model_index in 0..Status::MODELS_COUNT {
+                for model_index in 0..MODEL_FILE_SET.models_count() {
                     status.load_model(model_index)?;
                 }
             }
@@ -363,7 +360,7 @@ impl InferenceCore {
                 return Err(Error::InvalidSpeakerId { speaker_id });
             };
 
-        if model_index >= Status::MODELS_COUNT {
+        if model_index >= MODEL_FILE_SET.models_count() {
             return Err(Error::InvalidModelIndex { model_index });
         }
 
@@ -416,7 +413,7 @@ impl InferenceCore {
                 return Err(Error::InvalidSpeakerId { speaker_id });
             };
 
-        if model_index >= Status::MODELS_COUNT {
+        if model_index >= MODEL_FILE_SET.models_count() {
             return Err(Error::InvalidModelIndex { model_index });
         }
 
@@ -474,7 +471,7 @@ impl InferenceCore {
                 return Err(Error::InvalidSpeakerId { speaker_id });
             };
 
-        if model_index >= Status::MODELS_COUNT {
+        if model_index >= MODEL_FILE_SET.models_count() {
             return Err(Error::InvalidModelIndex { model_index });
         }
 
@@ -563,9 +560,13 @@ impl InferenceCore {
     }
 }
 
-pub static METAS: &str = Status::METAS_STR;
+pub static METAS: &Lazy<&str> = {
+    static METAS: Lazy<&str> = Lazy::new(|| &MODEL_FILE_SET.metas_str);
+    &METAS
+};
 
-pub static METAS_CSTRING: Lazy<CString> = Lazy::new(|| CString::new(METAS).unwrap());
+pub static METAS_CSTRING: Lazy<CString> =
+    Lazy::new(|| CString::new(&*MODEL_FILE_SET.metas_str).unwrap());
 
 pub static SUPPORTED_DEVICES: Lazy<SupportedDevices> =
     Lazy::new(|| SupportedDevices::get_supported_devices().unwrap());
@@ -574,7 +575,7 @@ pub static SUPPORTED_DEVICES_CSTRING: Lazy<CString> =
     Lazy::new(|| CString::new(SUPPORTED_DEVICES.to_json().to_string()).unwrap());
 
 fn get_model_index_and_speaker_id(speaker_id: u32) -> Option<(usize, u32)> {
-    SPEAKER_ID_MAP.get(&speaker_id).copied()
+    MODEL_FILE_SET.speaker_id_map.get(&speaker_id).copied()
 }
 
 pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'static str {
@@ -584,9 +585,7 @@ pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'stati
         VOICEVOX_RESULT_NOT_LOADED_OPENJTALK_DICT_ERROR => {
             "OpenJTalkの辞書が読み込まれていません\0"
         }
-        VOICEVOX_RESULT_LOAD_MODEL_ERROR => {
-            "modelデータ読み込み中にOnnxruntimeエラーが発生しました\0"
-        }
+        VOICEVOX_RESULT_LOAD_MODEL_ERROR => "modelデータ読み込みに失敗しました\0",
         VOICEVOX_RESULT_LOAD_METAS_ERROR => "メタデータ読み込みに失敗しました\0",
 
         VOICEVOX_RESULT_GPU_SUPPORT_ERROR => "GPU機能をサポートすることができません\0",