From 9c41398fdbad2244f9e2ea8953ab78233dc11546 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Thu, 4 Jan 2024 11:37:46 +0900 Subject: [PATCH] =?UTF-8?q?`synthesis=5Fmorphing`=E3=81=AE=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/e2e/testcases/morph.rs | 1 + .../voicevoxcore/SynthesizerTest.java | 149 +++++++++++++----- .../python/test/test_asyncio_morph.py | 110 +++++++++++++ .../python/test/test_blocking_morph.py | 108 +++++++++++++ .../python/voicevox_core/__init__.py | 2 + 5 files changed, 332 insertions(+), 38 deletions(-) create mode 100644 crates/voicevox_core_python_api/python/test/test_asyncio_morph.py create mode 100644 crates/voicevox_core_python_api/python/test/test_blocking_morph.py diff --git a/crates/voicevox_core_c_api/tests/e2e/testcases/morph.rs b/crates/voicevox_core_c_api/tests/e2e/testcases/morph.rs index b2622c46c..20b79004d 100644 --- a/crates/voicevox_core_c_api/tests/e2e/testcases/morph.rs +++ b/crates/voicevox_core_c_api/tests/e2e/testcases/morph.rs @@ -132,6 +132,7 @@ impl assert_cdylib::TestCase for TestCase { CStr::from_ptr(morphable_targets).to_bytes(), )?[&self.target_style]; + // TODO: スナップショットテストをやる let result = { const MORPH_RATE: f64 = 0.5; diff --git a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java index 7fc6cb5df..282df0264 100644 --- a/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java +++ b/crates/voicevox_core_java_api/lib/src/test/java/jp/hiroshiba/voicevoxcore/SynthesizerTest.java @@ -9,11 +9,17 @@ import java.util.List; import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.stream.Stream; import jp.hiroshiba.voicevoxcore.Synthesizer.MorphableTargetInfo; import jp.hiroshiba.voicevoxcore.exceptions.InferenceFailedException; import jp.hiroshiba.voicevoxcore.exceptions.InvalidModelDataException; +import jp.hiroshiba.voicevoxcore.exceptions.SpeakerFeatureException; import jp.hiroshiba.voicevoxcore.exceptions.StyleNotFoundException; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; class SynthesizerTest extends TestUtils { @FunctionalInterface @@ -29,44 +35,6 @@ void checkIsGpuMode() { assertFalse(synthesizer.isGpuMode()); } - @Test - void checkMorphableTargets() throws InvalidModelDataException { - OpenJtalk openJtalk = loadOpenJtalk(); - Synthesizer synthesizer = - Synthesizer.builder(openJtalk).accelerationMode(Synthesizer.AccelerationMode.CPU).build(); - - synthesizer.loadVoiceModel(loadModel()); - - Map morphableTargets = synthesizer.morphableTargets(0); - assertFalse(morphableTargets.get(0).isMorphable); - assertFalse(morphableTargets.get(1).isMorphable); - assertFalse(morphableTargets.get(302).isMorphable); - assertFalse(morphableTargets.get(303).isMorphable); - - morphableTargets = synthesizer.morphableTargets(1); - assertFalse(morphableTargets.get(0).isMorphable); - assertTrue(morphableTargets.get(1).isMorphable); - assertFalse(morphableTargets.get(302).isMorphable); - assertFalse(morphableTargets.get(303).isMorphable); - - morphableTargets = synthesizer.morphableTargets(302); - assertFalse(morphableTargets.get(0).isMorphable); - assertFalse(morphableTargets.get(1).isMorphable); - assertTrue(morphableTargets.get(302).isMorphable); - assertTrue(morphableTargets.get(303).isMorphable); - - morphableTargets = synthesizer.morphableTargets(303); - assertFalse(morphableTargets.get(0).isMorphable); - assertFalse(morphableTargets.get(1).isMorphable); - assertTrue(morphableTargets.get(302).isMorphable); - assertTrue(morphableTargets.get(303).isMorphable); - - try { - synthesizer.morphableTargets(2); - } catch (StyleNotFoundException e) { - } - } - boolean checkAllMoras( List accentPhrases, List otherAccentPhrases, @@ -152,4 +120,109 @@ void checkTts() throws InferenceFailedException, InvalidModelDataException { synthesizer.loadVoiceModel(model); synthesizer.tts("こんにちは", model.metas[0].styles[0].id); } + + @ParameterizedTest + @MethodSource("morphParamsProvider") + void checkMorphing(MorphParams params) + throws InvalidModelDataException, InferenceFailedException { + OpenJtalk openJtalk = loadOpenJtalk(); + Synthesizer synthesizer = + Synthesizer.builder(openJtalk).accelerationMode(Synthesizer.AccelerationMode.CPU).build(); + + synthesizer.loadVoiceModel(loadModel()); + + int baseStyleId = params.getBaseStyleId(); + AudioQuery query = synthesizer.createAudioQuery("こんにちは", baseStyleId); + Map morphableTargets = synthesizer.morphableTargets(baseStyleId); + + for (Map.Entry entry : params.getTargets().entrySet()) { + int targetStyleId = entry.getKey(); + boolean shouldSuccess = entry.getValue(); + + assertTrue(morphableTargets.get(targetStyleId).isMorphable == shouldSuccess); + + try { + // TODO: スナップショットテストをやる + synthesizer.synthesisMorphing(query, baseStyleId, targetStyleId, 0.5); + assertTrue(shouldSuccess); + } catch (SpeakerFeatureException e) { + assertFalse(shouldSuccess); + } + } + } + + static Stream morphParamsProvider() { + return Stream.of( + new MorphParams( + 0, + new TreeMap() { + { + put(0, false); + put(1, false); + put(302, false); + put(303, false); + } + }), + new MorphParams( + 1, + new TreeMap() { + { + put(0, false); + put(1, true); + put(302, false); + put(303, false); + } + }), + new MorphParams( + 302, + new TreeMap() { + { + put(0, false); + put(1, false); + put(302, true); + put(303, true); + } + }), + new MorphParams( + 303, + new TreeMap() { + { + put(0, false); + put(1, false); + put(302, true); + put(303, true); + } + })); + } + + // TODO: Lombokを使う + static class MorphParams { + private final int baseStyleId; + private final SortedMap targets; + + MorphParams(int baseStyleId, SortedMap targets) { + this.baseStyleId = baseStyleId; + this.targets = targets; + } + + int getBaseStyleId() { + return baseStyleId; + } + + SortedMap getTargets() { + return targets; + } + } + + @Test + void checkMorphableTargetsDeniesUnknownStyle() { + OpenJtalk openJtalk = loadOpenJtalk(); + Synthesizer synthesizer = + Synthesizer.builder(openJtalk).accelerationMode(Synthesizer.AccelerationMode.CPU).build(); + + try { + synthesizer.morphableTargets(0); + } catch (StyleNotFoundException e) { + } + } } diff --git a/crates/voicevox_core_python_api/python/test/test_asyncio_morph.py b/crates/voicevox_core_python_api/python/test/test_asyncio_morph.py new file mode 100644 index 000000000..4adcfc6f0 --- /dev/null +++ b/crates/voicevox_core_python_api/python/test/test_asyncio_morph.py @@ -0,0 +1,110 @@ +""" +モーフィング機能をテストする。 + +``test_blocking_morph`` と対になる。 +""" + +from typing import Dict + +import conftest +import pytest +import pytest_asyncio +from voicevox_core import SpeakerFeatureError, StyleId, StyleNotFoundError +from voicevox_core.asyncio import OpenJtalk, Synthesizer, VoiceModel + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "base, targets", + [ + ( + 0, + { + 0: False, + 1: False, + 302: False, + 303: False, + }, + ), + ( + 1, + { + 0: False, + 1: True, + 302: False, + 303: False, + }, + ), + ( + 302, + { + 0: False, + 1: False, + 302: True, + 303: True, + }, + ), + ( + 303, + { + 0: False, + 1: False, + 302: True, + 303: True, + }, + ), + ], +) +async def test_morph( + synthesizer: Synthesizer, base: StyleId, targets: Dict[StyleId, bool] +) -> None: + TEXT = "こんにちは" + MORPH_RATE = 0.5 + + query = await synthesizer.audio_query(TEXT, base) + + for target, should_success in targets.items(): + is_morphable = synthesizer.morphable_targets(base)[target].is_morphable + assert is_morphable == should_success + + if should_success: + # TODO: スナップショットテストをやる + await synthesizer.synthesis_morphing(query, base, target, MORPH_RATE) + else: + with pytest.raises( + SpeakerFeatureError, + match=( + r"^`dummy[1-3]` \([0-9a-f-]{36}\)は以下の機能を持ちません: " + r"`dummy[1-3]` \([0-9a-f-]{36}\)に対するモーフィング$" + ), + ): + await synthesizer.synthesis_morphing(query, base, target, MORPH_RATE) + + +def test_morphable_targets_raises_for_unknown_style(synthesizer: Synthesizer) -> None: + STYLE_ID = StyleId(9999) + + # FIXME: `KeyError.__init__`を通しているため、メッセージが`repr`で表示されてしまう + # https://github.com/VOICEVOX/voicevox_core/blob/4e13bca5a55a08d7aea08af4f949462bd284b1c1/crates/voicevox_core_python_api/src/convert.rs#L186-L206 + with pytest.raises( + StyleNotFoundError, + match=f"^'`{STYLE_ID}`に対するスタイルが見つかりませんでした。音声モデルが読み込まれていないか、読み込みが解除されています'$", + ): + synthesizer.morphable_targets(STYLE_ID) + + +@pytest_asyncio.fixture +async def synthesizer(open_jtalk: OpenJtalk, model: VoiceModel) -> Synthesizer: + synthesizer = Synthesizer(open_jtalk) + await synthesizer.load_voice_model(model) + return synthesizer + + +@pytest_asyncio.fixture +async def open_jtalk() -> OpenJtalk: + return await OpenJtalk.new(conftest.open_jtalk_dic_dir) + + +@pytest_asyncio.fixture +async def model() -> VoiceModel: + return await VoiceModel.from_path(conftest.model_dir) diff --git a/crates/voicevox_core_python_api/python/test/test_blocking_morph.py b/crates/voicevox_core_python_api/python/test/test_blocking_morph.py new file mode 100644 index 000000000..6551e9eb8 --- /dev/null +++ b/crates/voicevox_core_python_api/python/test/test_blocking_morph.py @@ -0,0 +1,108 @@ +""" +モーフィング機能をテストする。 + +``test_asyncio_morph`` と対になる。 +""" + +from typing import Dict + +import conftest +import pytest +from voicevox_core import SpeakerFeatureError, StyleId, StyleNotFoundError +from voicevox_core.blocking import OpenJtalk, Synthesizer, VoiceModel + + +@pytest.mark.parametrize( + "base, targets", + [ + ( + 0, + { + 0: False, + 1: False, + 302: False, + 303: False, + }, + ), + ( + 1, + { + 0: False, + 1: True, + 302: False, + 303: False, + }, + ), + ( + 302, + { + 0: False, + 1: False, + 302: True, + 303: True, + }, + ), + ( + 303, + { + 0: False, + 1: False, + 302: True, + 303: True, + }, + ), + ], +) +def test_morph( + synthesizer: Synthesizer, base: StyleId, targets: Dict[StyleId, bool] +) -> None: + TEXT = "こんにちは" + MORPH_RATE = 0.5 + + query = synthesizer.audio_query(TEXT, base) + + for target, should_success in targets.items(): + is_morphable = synthesizer.morphable_targets(base)[target].is_morphable + assert is_morphable == should_success + + if should_success: + # TODO: スナップショットテストをやる + synthesizer.synthesis_morphing(query, base, target, MORPH_RATE) + else: + with pytest.raises( + SpeakerFeatureError, + match=( + r"^`dummy[1-3]` \([0-9a-f-]{36}\)は以下の機能を持ちません: " + r"`dummy[1-3]` \([0-9a-f-]{36}\)に対するモーフィング$" + ), + ): + synthesizer.synthesis_morphing(query, base, target, MORPH_RATE) + + +def test_morphable_targets_raises_for_unknown_style(synthesizer: Synthesizer) -> None: + STYLE_ID = StyleId(9999) + + # FIXME: `KeyError.__init__`を通しているため、メッセージが`repr`で表示されてしまう + # https://github.com/VOICEVOX/voicevox_core/blob/4e13bca5a55a08d7aea08af4f949462bd284b1c1/crates/voicevox_core_python_api/src/convert.rs#L186-L206 + with pytest.raises( + StyleNotFoundError, + match=f"^'`{STYLE_ID}`に対するスタイルが見つかりませんでした。音声モデルが読み込まれていないか、読み込みが解除されています'$", + ): + synthesizer.morphable_targets(STYLE_ID) + + +@pytest.fixture +def synthesizer(open_jtalk: OpenJtalk, model: VoiceModel) -> Synthesizer: + synthesizer = Synthesizer(open_jtalk) + synthesizer.load_voice_model(model) + return synthesizer + + +@pytest.fixture(scope="session") +def open_jtalk() -> OpenJtalk: + return OpenJtalk(conftest.open_jtalk_dic_dir) + + +@pytest.fixture(scope="session") +def model() -> VoiceModel: + return VoiceModel.from_path(conftest.model_dir) diff --git a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py index 9b93407fd..2dfca2b43 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py @@ -32,6 +32,7 @@ ParseKanaError, ReadZipEntryError, SaveUserDictError, + SpeakerFeatureError, StyleAlreadyLoadedError, StyleNotFoundError, UseUserDictError, @@ -66,6 +67,7 @@ "PermittedSynthesisMorphing", "ReadZipEntryError", "SaveUserDictError", + "SpeakerFeatureError", "SpeakerMeta", "SpeakerSupportedFeatures", "StyleAlreadyLoadedError",