From 94a3c0a879b084cb8cb564f448a5e901809cc155 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Tue, 27 Feb 2024 00:47:02 +0900 Subject: [PATCH] =?UTF-8?q?Python=20API=E3=81=AE`asyncio`=E7=89=88?= =?UTF-8?q?=E5=AE=9F=E8=A3=85=E3=82=92`mod`=E5=86=85=E3=81=AB=E7=A7=BB?= =?UTF-8?q?=E3=81=99=20(#754)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core_python_api/src/lib.rs | 1017 ++++++++++---------- 1 file changed, 518 insertions(+), 499 deletions(-) diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index ea20066b4..4d190333d 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -1,25 +1,15 @@ -use std::{marker::PhantomData, path::PathBuf, sync::Arc}; +use std::marker::PhantomData; mod convert; -use self::convert::{ - async_modify_accent_phrases, from_acceleration_mode, from_dataclass, from_utf8_path, - to_py_user_dict_word, to_py_uuid, to_pydantic_dataclass, to_pydantic_voice_model_meta, - to_rust_user_dict_word, to_rust_uuid, VoicevoxCoreResultExt as _, -}; -use camino::Utf8PathBuf; +use self::convert::{from_utf8_path, to_pydantic_dataclass, VoicevoxCoreResultExt as _}; use easy_ext::ext; use log::debug; use pyo3::{ create_exception, exceptions::{PyException, PyKeyError, PyValueError}, - pyclass, pyfunction, pymethods, pymodule, - types::{IntoPyDict as _, PyBytes, PyDict, PyList, PyModule, PyString}, - wrap_pyfunction, PyAny, PyObject, PyRef, PyResult, PyTypeInfo, Python, ToPyObject, -}; -use uuid::Uuid; -use voicevox_core::{ - AccelerationMode, AudioQueryModel, InitializeOptions, StyleId, SynthesisOptions, TtsOptions, - UserDictWord, VoiceModelId, + pyfunction, pymodule, + types::PyModule, + wrap_pyfunction, PyAny, PyResult, PyTypeInfo, Python, }; #[pymodule] @@ -42,10 +32,10 @@ fn rust(py: Python<'_>, module: &PyModule) -> PyResult<()> { module.add_and_register_submodule(blocking_module)?; let asyncio_module = PyModule::new(py, "voicevox_core._rust.asyncio")?; - asyncio_module.add_class::()?; - asyncio_module.add_class::()?; - asyncio_module.add_class::()?; - asyncio_module.add_class::()?; + asyncio_module.add_class::()?; + asyncio_module.add_class::()?; + asyncio_module.add_class::()?; + asyncio_module.add_class::()?; module.add_and_register_submodule(asyncio_module) } @@ -95,12 +85,6 @@ exceptions! { InvalidWordError: PyValueError; } -#[pyclass] -#[derive(Clone)] -struct VoiceModel { - model: voicevox_core::tokio::VoiceModel, -} - #[pyfunction] fn supported_devices(py: Python<'_>) -> PyResult<&PyAny> { let class = py @@ -111,397 +95,6 @@ fn supported_devices(py: Python<'_>) -> PyResult<&PyAny> { to_pydantic_dataclass(s, class) } -#[pymethods] -impl VoiceModel { - #[staticmethod] - fn from_path(py: Python<'_>, path: PathBuf) -> PyResult<&PyAny> { - pyo3_asyncio::tokio::future_into_py(py, async move { - let model = voicevox_core::tokio::VoiceModel::from_path(path).await; - let model = Python::with_gil(|py| model.into_py_result(py))?; - Ok(Self { model }) - }) - } - - #[getter] - fn id(&self) -> &str { - self.model.id().raw_voice_model_id() - } - - #[getter] - fn metas<'py>(&self, py: Python<'py>) -> Vec<&'py PyAny> { - to_pydantic_voice_model_meta(self.model.metas(), py).unwrap() - } -} - -#[pyclass] -#[derive(Clone)] -struct OpenJtalk { - open_jtalk: voicevox_core::tokio::OpenJtalk, -} - -#[pymethods] -impl OpenJtalk { - #[allow(clippy::new_ret_no_self)] - #[staticmethod] - fn new( - #[pyo3(from_py_with = "from_utf8_path")] open_jtalk_dict_dir: Utf8PathBuf, - py: Python<'_>, - ) -> PyResult<&PyAny> { - pyo3_asyncio::tokio::future_into_py(py, async move { - let open_jtalk = voicevox_core::tokio::OpenJtalk::new(open_jtalk_dict_dir).await; - let open_jtalk = Python::with_gil(|py| open_jtalk.into_py_result(py))?; - Ok(Self { open_jtalk }) - }) - } - - fn use_user_dict<'py>(&self, user_dict: UserDict, py: Python<'py>) -> PyResult<&'py PyAny> { - let this = self.open_jtalk.clone(); - - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = this.use_user_dict(&user_dict.dict).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } -} - -#[pyclass] -struct Synthesizer { - synthesizer: Closable, Self>, -} - -#[pymethods] -impl Synthesizer { - #[new] - #[pyo3(signature =( - open_jtalk, - acceleration_mode = InitializeOptions::default().acceleration_mode, - cpu_num_threads = InitializeOptions::default().cpu_num_threads, - ))] - fn new( - open_jtalk: OpenJtalk, - #[pyo3(from_py_with = "from_acceleration_mode")] acceleration_mode: AccelerationMode, - cpu_num_threads: u16, - ) -> PyResult { - let synthesizer = voicevox_core::tokio::Synthesizer::new( - open_jtalk.open_jtalk.clone(), - &InitializeOptions { - acceleration_mode, - cpu_num_threads, - }, - ); - let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?; - let synthesizer = Closable::new(synthesizer); - Ok(Self { synthesizer }) - } - - fn __repr__(&self) -> &'static str { - "Synthesizer { .. }" - } - - fn __enter__(slf: PyRef<'_, Self>) -> PyResult> { - slf.synthesizer.get()?; - Ok(slf) - } - - fn __exit__( - &mut self, - #[allow(unused_variables)] exc_type: &PyAny, - #[allow(unused_variables)] exc_value: &PyAny, - #[allow(unused_variables)] traceback: &PyAny, - ) { - self.close(); - } - - #[getter] - fn is_gpu_mode(&self) -> PyResult { - let synthesizer = self.synthesizer.get()?; - Ok(synthesizer.is_gpu_mode()) - } - - #[getter] - fn metas<'py>(&self, py: Python<'py>) -> PyResult> { - let synthesizer = self.synthesizer.get()?; - to_pydantic_voice_model_meta(&synthesizer.metas(), py) - } - - fn load_voice_model<'py>( - &mut self, - model: &'py PyAny, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let model: VoiceModel = model.extract()?; - let synthesizer = self.synthesizer.get()?.clone(); - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = synthesizer.load_voice_model(&model.model).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } - - fn unload_voice_model(&mut self, voice_model_id: &str, py: Python<'_>) -> PyResult<()> { - self.synthesizer - .get()? - .unload_voice_model(&VoiceModelId::new(voice_model_id.to_string())) - .into_py_result(py) - } - - // C APIの挙動と一貫性を持たせる。 - fn is_loaded_voice_model(&self, voice_model_id: &PyString) -> PyResult { - let Ok(voice_model_id) = voice_model_id.to_str() else { - // 与えられたIDがUTF-8ではない場合、それに対応する`VoicdModel`は確実に存在しない - return Ok(false); - }; - Ok(self - .synthesizer - .get()? - .is_loaded_voice_model(&VoiceModelId::new(voice_model_id.to_string()))) - } - - fn audio_query_from_kana<'py>( - &self, - kana: &str, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let kana = kana.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let audio_query = synthesizer - .audio_query_from_kana(&kana, StyleId::new(style_id)) - .await; - - Python::with_gil(|py| { - let class = py.import("voicevox_core")?.getattr("AudioQuery")?; - let ret = to_pydantic_dataclass(audio_query.into_py_result(py)?, class)?; - Ok(ret.to_object(py)) - }) - }, - ) - } - - fn audio_query<'py>(&self, text: &str, style_id: u32, py: Python<'py>) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let text = text.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let audio_query = synthesizer.audio_query(&text, StyleId::new(style_id)).await; - - Python::with_gil(|py| { - let audio_query = audio_query.into_py_result(py)?; - let class = py.import("voicevox_core")?.getattr("AudioQuery")?; - let ret = to_pydantic_dataclass(audio_query, class)?; - Ok(ret.to_object(py)) - }) - }, - ) - } - - fn create_accent_phrases_from_kana<'py>( - &self, - kana: &str, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let kana = kana.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let accent_phrases = synthesizer - .create_accent_phrases_from_kana(&kana, StyleId::new(style_id)) - .await; - Python::with_gil(|py| { - let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; - let accent_phrases = accent_phrases - .into_py_result(py)? - .iter() - .map(|ap| to_pydantic_dataclass(ap, class)) - .collect::>>(); - let list = PyList::new(py, accent_phrases); - Ok(list.to_object(py)) - }) - }, - ) - } - - fn create_accent_phrases<'py>( - &self, - text: &str, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - let text = text.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let accent_phrases = synthesizer - .create_accent_phrases(&text, StyleId::new(style_id)) - .await; - Python::with_gil(|py| { - let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; - let accent_phrases = accent_phrases - .into_py_result(py)? - .iter() - .map(|ap| to_pydantic_dataclass(ap, class)) - .collect::>>(); - let list = PyList::new(py, accent_phrases); - Ok(list.to_object(py)) - }) - }, - ) - } - - fn replace_mora_data<'py>( - &self, - accent_phrases: &'py PyList, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - async_modify_accent_phrases( - accent_phrases, - StyleId::new(style_id), - py, - |a, s| async move { synthesizer.replace_mora_data(&a, s).await }, - ) - } - - fn replace_phoneme_length<'py>( - &self, - accent_phrases: &'py PyList, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - async_modify_accent_phrases( - accent_phrases, - StyleId::new(style_id), - py, - |a, s| async move { synthesizer.replace_phoneme_length(&a, s).await }, - ) - } - - fn replace_mora_pitch<'py>( - &self, - accent_phrases: &'py PyList, - style_id: u32, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - async_modify_accent_phrases( - accent_phrases, - StyleId::new(style_id), - py, - |a, s| async move { synthesizer.replace_mora_pitch(&a, s).await }, - ) - } - - #[pyo3(signature=(audio_query,style_id,enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak))] - fn synthesis<'py>( - &self, - #[pyo3(from_py_with = "from_dataclass")] audio_query: AudioQueryModel, - style_id: u32, - enable_interrogative_upspeak: bool, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let synthesizer = self.synthesizer.get()?.clone(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let wav = synthesizer - .synthesis( - &audio_query, - StyleId::new(style_id), - &SynthesisOptions { - enable_interrogative_upspeak, - }, - ) - .await; - Python::with_gil(|py| { - let wav = wav.into_py_result(py)?; - Ok(PyBytes::new(py, &wav).to_object(py)) - }) - }, - ) - } - - #[pyo3(signature=( - kana, - style_id, - enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak - ))] - fn tts_from_kana<'py>( - &self, - kana: &str, - style_id: u32, - enable_interrogative_upspeak: bool, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let style_id = StyleId::new(style_id); - let options = TtsOptions { - enable_interrogative_upspeak, - }; - let synthesizer = self.synthesizer.get()?.clone(); - let kana = kana.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let wav = synthesizer.tts_from_kana(&kana, style_id, &options).await; - - Python::with_gil(|py| { - let wav = wav.into_py_result(py)?; - Ok(PyBytes::new(py, &wav).to_object(py)) - }) - }, - ) - } - - #[pyo3(signature=( - text, - style_id, - enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak - ))] - fn tts<'py>( - &self, - text: &str, - style_id: u32, - enable_interrogative_upspeak: bool, - py: Python<'py>, - ) -> PyResult<&'py PyAny> { - let style_id = StyleId::new(style_id); - let options = TtsOptions { - enable_interrogative_upspeak, - }; - let synthesizer = self.synthesizer.get()?.clone(); - let text = text.to_owned(); - pyo3_asyncio::tokio::future_into_py_with_locals( - py, - pyo3_asyncio::tokio::get_current_locals(py)?, - async move { - let wav = synthesizer.tts(&text, style_id, &options).await; - - Python::with_gil(|py| { - let wav = wav.into_py_result(py)?; - Ok(PyBytes::new(py, &wav).to_object(py)) - }) - }, - ) - } - - fn close(&mut self) { - self.synthesizer.close() - } -} - struct Closable { content: MaybeClosed, marker: PhantomData, @@ -554,89 +147,6 @@ fn _to_zenkaku(text: &str) -> PyResult { Ok(voicevox_core::__internal::to_zenkaku(text)) } -#[pyclass] -#[derive(Default, Debug, Clone)] -struct UserDict { - dict: Arc, -} - -#[pymethods] -impl UserDict { - #[new] - fn new() -> Self { - Self::default() - } - - fn load<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { - let this = self.dict.clone(); - let path = path.to_owned(); - - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = this.load(&path).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } - - fn save<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { - let this = self.dict.clone(); - let path = path.to_owned(); - - pyo3_asyncio::tokio::future_into_py(py, async move { - let result = this.save(&path).await; - Python::with_gil(|py| result.into_py_result(py)) - }) - } - - fn add_word( - &mut self, - #[pyo3(from_py_with = "to_rust_user_dict_word")] word: UserDictWord, - py: Python<'_>, - ) -> PyResult { - let uuid = self.dict.add_word(word).into_py_result(py)?; - - to_py_uuid(py, uuid) - } - - fn update_word( - &mut self, - #[pyo3(from_py_with = "to_rust_uuid")] word_uuid: Uuid, - #[pyo3(from_py_with = "to_rust_user_dict_word")] word: UserDictWord, - py: Python<'_>, - ) -> PyResult<()> { - self.dict.update_word(word_uuid, word).into_py_result(py)?; - Ok(()) - } - - fn remove_word( - &mut self, - #[pyo3(from_py_with = "to_rust_uuid")] word_uuid: Uuid, - py: Python<'_>, - ) -> PyResult<()> { - self.dict.remove_word(word_uuid).into_py_result(py)?; - Ok(()) - } - - fn import_dict(&mut self, other: &UserDict, py: Python<'_>) -> PyResult<()> { - self.dict.import(&other.dict).into_py_result(py)?; - Ok(()) - } - - #[getter] - fn words<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> { - let words = self.dict.with_words(|words| { - words - .iter() - .map(|(&uuid, word)| { - let uuid = to_py_uuid(py, uuid)?; - let word = to_py_user_dict_word(py, word)?; - Ok((uuid, word)) - }) - .collect::>>() - })?; - Ok(words.into_py_dict(py)) - } -} - mod blocking { use std::{path::PathBuf, sync::Arc}; @@ -1061,3 +571,512 @@ mod blocking { } } } + +mod asyncio { + use std::{path::PathBuf, sync::Arc}; + + use camino::Utf8PathBuf; + use pyo3::{ + pyclass, pymethods, + types::{IntoPyDict as _, PyBytes, PyDict, PyList, PyString}, + PyAny, PyObject, PyRef, PyResult, Python, ToPyObject as _, + }; + use uuid::Uuid; + use voicevox_core::{ + AccelerationMode, AudioQueryModel, InitializeOptions, StyleId, SynthesisOptions, + TtsOptions, UserDictWord, VoiceModelId, + }; + + use crate::{convert::VoicevoxCoreResultExt as _, Closable}; + + #[pyclass] + #[derive(Clone)] + pub(crate) struct VoiceModel { + model: voicevox_core::tokio::VoiceModel, + } + + #[pymethods] + impl VoiceModel { + #[staticmethod] + fn from_path(py: Python<'_>, path: PathBuf) -> PyResult<&PyAny> { + pyo3_asyncio::tokio::future_into_py(py, async move { + let model = voicevox_core::tokio::VoiceModel::from_path(path).await; + let model = Python::with_gil(|py| model.into_py_result(py))?; + Ok(Self { model }) + }) + } + + #[getter] + fn id(&self) -> &str { + self.model.id().raw_voice_model_id() + } + + #[getter] + fn metas<'py>(&self, py: Python<'py>) -> Vec<&'py PyAny> { + crate::convert::to_pydantic_voice_model_meta(self.model.metas(), py).unwrap() + } + } + + #[pyclass] + #[derive(Clone)] + pub(crate) struct OpenJtalk { + open_jtalk: voicevox_core::tokio::OpenJtalk, + } + + #[pymethods] + impl OpenJtalk { + #[allow(clippy::new_ret_no_self)] + #[staticmethod] + fn new( + #[pyo3(from_py_with = "crate::convert::from_utf8_path")] + open_jtalk_dict_dir: Utf8PathBuf, + py: Python<'_>, + ) -> PyResult<&PyAny> { + pyo3_asyncio::tokio::future_into_py(py, async move { + let open_jtalk = voicevox_core::tokio::OpenJtalk::new(open_jtalk_dict_dir).await; + let open_jtalk = Python::with_gil(|py| open_jtalk.into_py_result(py))?; + Ok(Self { open_jtalk }) + }) + } + + fn use_user_dict<'py>(&self, user_dict: UserDict, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.open_jtalk.clone(); + + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = this.use_user_dict(&user_dict.dict).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + } + + #[pyclass] + pub(crate) struct Synthesizer { + synthesizer: + Closable, Self>, + } + + #[pymethods] + impl Synthesizer { + #[new] + #[pyo3(signature =( + open_jtalk, + acceleration_mode = InitializeOptions::default().acceleration_mode, + cpu_num_threads = InitializeOptions::default().cpu_num_threads, + ))] + fn new( + open_jtalk: OpenJtalk, + #[pyo3(from_py_with = "crate::convert::from_acceleration_mode")] + acceleration_mode: AccelerationMode, + cpu_num_threads: u16, + ) -> PyResult { + let synthesizer = voicevox_core::tokio::Synthesizer::new( + open_jtalk.open_jtalk.clone(), + &InitializeOptions { + acceleration_mode, + cpu_num_threads, + }, + ); + let synthesizer = Python::with_gil(|py| synthesizer.into_py_result(py))?; + let synthesizer = Closable::new(synthesizer); + Ok(Self { synthesizer }) + } + + fn __repr__(&self) -> &'static str { + "Synthesizer { .. }" + } + + fn __enter__(slf: PyRef<'_, Self>) -> PyResult> { + slf.synthesizer.get()?; + Ok(slf) + } + + fn __exit__( + &mut self, + #[allow(unused_variables)] exc_type: &PyAny, + #[allow(unused_variables)] exc_value: &PyAny, + #[allow(unused_variables)] traceback: &PyAny, + ) { + self.close(); + } + + #[getter] + fn is_gpu_mode(&self) -> PyResult { + let synthesizer = self.synthesizer.get()?; + Ok(synthesizer.is_gpu_mode()) + } + + #[getter] + fn metas<'py>(&self, py: Python<'py>) -> PyResult> { + let synthesizer = self.synthesizer.get()?; + crate::convert::to_pydantic_voice_model_meta(&synthesizer.metas(), py) + } + + fn load_voice_model<'py>( + &mut self, + model: &'py PyAny, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let model: VoiceModel = model.extract()?; + let synthesizer = self.synthesizer.get()?.clone(); + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = synthesizer.load_voice_model(&model.model).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + + fn unload_voice_model(&mut self, voice_model_id: &str, py: Python<'_>) -> PyResult<()> { + self.synthesizer + .get()? + .unload_voice_model(&VoiceModelId::new(voice_model_id.to_string())) + .into_py_result(py) + } + + // C APIの挙動と一貫性を持たせる。 + fn is_loaded_voice_model(&self, voice_model_id: &PyString) -> PyResult { + let Ok(voice_model_id) = voice_model_id.to_str() else { + // 与えられたIDがUTF-8ではない場合、それに対応する`VoicdModel`は確実に存在しない + return Ok(false); + }; + Ok(self + .synthesizer + .get()? + .is_loaded_voice_model(&VoiceModelId::new(voice_model_id.to_string()))) + } + + fn audio_query_from_kana<'py>( + &self, + kana: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let kana = kana.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let audio_query = synthesizer + .audio_query_from_kana(&kana, StyleId::new(style_id)) + .await; + + Python::with_gil(|py| { + let class = py.import("voicevox_core")?.getattr("AudioQuery")?; + let ret = crate::convert::to_pydantic_dataclass( + audio_query.into_py_result(py)?, + class, + )?; + Ok(ret.to_object(py)) + }) + }, + ) + } + + fn audio_query<'py>( + &self, + text: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let text = text.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let audio_query = synthesizer.audio_query(&text, StyleId::new(style_id)).await; + + Python::with_gil(|py| { + let audio_query = audio_query.into_py_result(py)?; + let class = py.import("voicevox_core")?.getattr("AudioQuery")?; + let ret = crate::convert::to_pydantic_dataclass(audio_query, class)?; + Ok(ret.to_object(py)) + }) + }, + ) + } + + fn create_accent_phrases_from_kana<'py>( + &self, + kana: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let kana = kana.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let accent_phrases = synthesizer + .create_accent_phrases_from_kana(&kana, StyleId::new(style_id)) + .await; + Python::with_gil(|py| { + let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; + let accent_phrases = accent_phrases + .into_py_result(py)? + .iter() + .map(|ap| crate::convert::to_pydantic_dataclass(ap, class)) + .collect::>>(); + let list = PyList::new(py, accent_phrases); + Ok(list.to_object(py)) + }) + }, + ) + } + + fn create_accent_phrases<'py>( + &self, + text: &str, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + let text = text.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let accent_phrases = synthesizer + .create_accent_phrases(&text, StyleId::new(style_id)) + .await; + Python::with_gil(|py| { + let class = py.import("voicevox_core")?.getattr("AccentPhrase")?; + let accent_phrases = accent_phrases + .into_py_result(py)? + .iter() + .map(|ap| crate::convert::to_pydantic_dataclass(ap, class)) + .collect::>>(); + let list = PyList::new(py, accent_phrases); + Ok(list.to_object(py)) + }) + }, + ) + } + + fn replace_mora_data<'py>( + &self, + accent_phrases: &'py PyList, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + crate::convert::async_modify_accent_phrases( + accent_phrases, + StyleId::new(style_id), + py, + |a, s| async move { synthesizer.replace_mora_data(&a, s).await }, + ) + } + + fn replace_phoneme_length<'py>( + &self, + accent_phrases: &'py PyList, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + crate::convert::async_modify_accent_phrases( + accent_phrases, + StyleId::new(style_id), + py, + |a, s| async move { synthesizer.replace_phoneme_length(&a, s).await }, + ) + } + + fn replace_mora_pitch<'py>( + &self, + accent_phrases: &'py PyList, + style_id: u32, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + crate::convert::async_modify_accent_phrases( + accent_phrases, + StyleId::new(style_id), + py, + |a, s| async move { synthesizer.replace_mora_pitch(&a, s).await }, + ) + } + + #[pyo3(signature=(audio_query,style_id,enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak))] + fn synthesis<'py>( + &self, + #[pyo3(from_py_with = "crate::convert::from_dataclass")] audio_query: AudioQueryModel, + style_id: u32, + enable_interrogative_upspeak: bool, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let synthesizer = self.synthesizer.get()?.clone(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let wav = synthesizer + .synthesis( + &audio_query, + StyleId::new(style_id), + &SynthesisOptions { + enable_interrogative_upspeak, + }, + ) + .await; + Python::with_gil(|py| { + let wav = wav.into_py_result(py)?; + Ok(PyBytes::new(py, &wav).to_object(py)) + }) + }, + ) + } + + #[pyo3(signature=( + kana, + style_id, + enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak + ))] + fn tts_from_kana<'py>( + &self, + kana: &str, + style_id: u32, + enable_interrogative_upspeak: bool, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let style_id = StyleId::new(style_id); + let options = TtsOptions { + enable_interrogative_upspeak, + }; + let synthesizer = self.synthesizer.get()?.clone(); + let kana = kana.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let wav = synthesizer.tts_from_kana(&kana, style_id, &options).await; + + Python::with_gil(|py| { + let wav = wav.into_py_result(py)?; + Ok(PyBytes::new(py, &wav).to_object(py)) + }) + }, + ) + } + + #[pyo3(signature=( + text, + style_id, + enable_interrogative_upspeak = TtsOptions::default().enable_interrogative_upspeak + ))] + fn tts<'py>( + &self, + text: &str, + style_id: u32, + enable_interrogative_upspeak: bool, + py: Python<'py>, + ) -> PyResult<&'py PyAny> { + let style_id = StyleId::new(style_id); + let options = TtsOptions { + enable_interrogative_upspeak, + }; + let synthesizer = self.synthesizer.get()?.clone(); + let text = text.to_owned(); + pyo3_asyncio::tokio::future_into_py_with_locals( + py, + pyo3_asyncio::tokio::get_current_locals(py)?, + async move { + let wav = synthesizer.tts(&text, style_id, &options).await; + + Python::with_gil(|py| { + let wav = wav.into_py_result(py)?; + Ok(PyBytes::new(py, &wav).to_object(py)) + }) + }, + ) + } + + fn close(&mut self) { + self.synthesizer.close() + } + } + + #[pyclass] + #[derive(Default, Debug, Clone)] + pub(crate) struct UserDict { + dict: Arc, + } + + #[pymethods] + impl UserDict { + #[new] + fn new() -> Self { + Self::default() + } + + fn load<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.dict.clone(); + let path = path.to_owned(); + + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = this.load(&path).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + + fn save<'py>(&self, path: &str, py: Python<'py>) -> PyResult<&'py PyAny> { + let this = self.dict.clone(); + let path = path.to_owned(); + + pyo3_asyncio::tokio::future_into_py(py, async move { + let result = this.save(&path).await; + Python::with_gil(|py| result.into_py_result(py)) + }) + } + + fn add_word( + &mut self, + #[pyo3(from_py_with = "crate::convert::to_rust_user_dict_word")] word: UserDictWord, + py: Python<'_>, + ) -> PyResult { + let uuid = self.dict.add_word(word).into_py_result(py)?; + + crate::convert::to_py_uuid(py, uuid) + } + + fn update_word( + &mut self, + #[pyo3(from_py_with = "crate::convert::to_rust_uuid")] word_uuid: Uuid, + #[pyo3(from_py_with = "crate::convert::to_rust_user_dict_word")] word: UserDictWord, + py: Python<'_>, + ) -> PyResult<()> { + self.dict.update_word(word_uuid, word).into_py_result(py)?; + Ok(()) + } + + fn remove_word( + &mut self, + #[pyo3(from_py_with = "crate::convert::to_rust_uuid")] word_uuid: Uuid, + py: Python<'_>, + ) -> PyResult<()> { + self.dict.remove_word(word_uuid).into_py_result(py)?; + Ok(()) + } + + fn import_dict(&mut self, other: &UserDict, py: Python<'_>) -> PyResult<()> { + self.dict.import(&other.dict).into_py_result(py)?; + Ok(()) + } + + #[getter] + fn words<'py>(&self, py: Python<'py>) -> PyResult<&'py PyDict> { + let words = self.dict.with_words(|words| { + words + .iter() + .map(|(&uuid, word)| { + let uuid = crate::convert::to_py_uuid(py, uuid)?; + let word = crate::convert::to_py_user_dict_word(py, word)?; + Ok((uuid, word)) + }) + .collect::>>() + })?; + Ok(words.into_py_dict(py)) + } + } +}