diff --git a/test/e2e/__snapshots__/test_tts.ambr b/test/e2e/__snapshots__/test_tts.ambr index a68d0d808..492e474aa 100644 --- a/test/e2e/__snapshots__/test_tts.ambr +++ b/test/e2e/__snapshots__/test_tts.ambr @@ -1,4 +1,4 @@ # serializer version: 1 # name: test_テキストと話者IDから音声を合成できる - 'MD5:9cb1070db2510240ff63a16fd42907c9' + 'MD5:8f7ddc461c68542d4d8ef4cd5c54ca82' # --- diff --git "a/test/e2e/single_api/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" b/test/e2e/single_api/__snapshots__/test_audio_query/test_post_audio_query_200.json similarity index 100% rename from "test/e2e/single_api/__snapshots__/test_audio_query/test_speaker\343\202\222\346\214\207\345\256\232\343\201\227\343\201\246\351\237\263\345\243\260\345\220\210\346\210\220\343\202\257\343\202\250\343\203\252\343\201\214\345\217\226\345\276\227\343\201\247\343\201\215\343\202\213.json" rename to test/e2e/single_api/__snapshots__/test_audio_query/test_post_audio_query_200.json diff --git a/test/e2e/single_api/__snapshots__/test_frame_synthesis.ambr b/test/e2e/single_api/__snapshots__/test_frame_synthesis.ambr new file mode 100644 index 000000000..ec1568011 --- /dev/null +++ b/test/e2e/single_api/__snapshots__/test_frame_synthesis.ambr @@ -0,0 +1,4 @@ +# serializer version: 1 +# name: test_post_frame_synthesis_200 + 'MD5:1c385210acba238994604a8cee96aee3' +# --- diff --git a/test/e2e/single_api/__snapshots__/test_synthesis.ambr b/test/e2e/single_api/__snapshots__/test_synthesis.ambr new file mode 100644 index 000000000..27c4fb9de --- /dev/null +++ b/test/e2e/single_api/__snapshots__/test_synthesis.ambr @@ -0,0 +1,4 @@ +# serializer version: 1 +# name: test_post_synthesis_200 + 'MD5:f7d42ce5787856549abc3d2d7561c06f' +# --- diff --git a/test/e2e/single_api/test_audio_query.py b/test/e2e/single_api/test_audio_query.py index a77db614e..e57ee66a1 100644 --- a/test/e2e/single_api/test_audio_query.py +++ b/test/e2e/single_api/test_audio_query.py @@ -1,5 +1,5 @@ """ -AudioQuery APIのテスト +/audio_query API のテスト """ from test.utility import round_floats @@ -8,7 +8,7 @@ from syrupy.assertion import SnapshotAssertion -def test_speakerを指定して音声合成クエリが取得できる( +def test_post_audio_query_200( client: TestClient, snapshot_json: SnapshotAssertion ) -> None: response = client.post("/audio_query", params={"text": "テストです", "speaker": 0}) diff --git a/test/e2e/single_api/test_frame_synthesis.py b/test/e2e/single_api/test_frame_synthesis.py index 8ffec0fbb..836b52ac7 100644 --- a/test/e2e/single_api/test_frame_synthesis.py +++ b/test/e2e/single_api/test_frame_synthesis.py @@ -2,10 +2,15 @@ /frame_synthesis API のテスト """ +from test.utility import hash_wave_floats_from_wav_bytes + from fastapi.testclient import TestClient +from syrupy.assertion import SnapshotAssertion -def test_post_frame_synthesis_200(client: TestClient) -> None: +def test_post_frame_synthesis_200( + client: TestClient, snapshot: SnapshotAssertion +) -> None: query = { "f0": [ 0.0, @@ -81,3 +86,7 @@ def test_post_frame_synthesis_200(client: TestClient) -> None: } response = client.post("/frame_synthesis", params={"speaker": 0}, json=query) assert response.status_code == 200 + + # FileResponse 内の .wav から抽出された音声波形が一致する + assert response.headers["content-type"] == "audio/wav" + assert snapshot == hash_wave_floats_from_wav_bytes(response.read()) diff --git a/test/e2e/single_api/test_multi_synthesis.py b/test/e2e/single_api/test_multi_synthesis.py index f2fef2b40..dafaafc35 100644 --- a/test/e2e/single_api/test_multi_synthesis.py +++ b/test/e2e/single_api/test_multi_synthesis.py @@ -59,3 +59,14 @@ def test_post_multi_synthesis_200(client: TestClient) -> None: ] response = client.post("/multi_synthesis", params={"speaker": 0}, json=queries) assert response.status_code == 200 + + # FileResponse 内の zip ファイルに圧縮された .wav から抽出された音声波形が一致する + # FIXME: スナップショットテストを足す + # NOTE: ZIP ファイル内の .wav に Linux-Windows 数値精度問題があるため解凍が必要 + assert response.headers["content-type"] == "application/zip" + # from test.utility import summarize_wav_bytes + # from syrupy.assertion import SnapshotAssertion + # # zip 解凍 + # wav_summarys = map(lambda wav_byte: summarize_wav_bytes(wav_byte), wav_bytes) + # wavs_summary = concatenate_func(wav_summarys) + # assert snapshot == wavs_summary diff --git a/test/e2e/single_api/test_synthesis.py b/test/e2e/single_api/test_synthesis.py index 257c7216a..739cf9d62 100644 --- a/test/e2e/single_api/test_synthesis.py +++ b/test/e2e/single_api/test_synthesis.py @@ -3,11 +3,13 @@ """ from test.e2e.single_api.utils import gen_mora +from test.utility import hash_wave_floats_from_wav_bytes from fastapi.testclient import TestClient +from syrupy.assertion import SnapshotAssertion -def test_post_synthesis_200(client: TestClient) -> None: +def test_post_synthesis_200(client: TestClient, snapshot: SnapshotAssertion) -> None: query = { "accent_phrases": [ { @@ -33,3 +35,7 @@ def test_post_synthesis_200(client: TestClient) -> None: } response = client.post("/synthesis", params={"speaker": 0}, json=query) assert response.status_code == 200 + + # 音声波形が一致する + assert response.headers["content-type"] == "audio/wav" + assert snapshot == hash_wave_floats_from_wav_bytes(response.read()) diff --git a/test/e2e/single_api/test_synthesis_morphing.py b/test/e2e/single_api/test_synthesis_morphing.py index e017df039..f1a6a9114 100644 --- a/test/e2e/single_api/test_synthesis_morphing.py +++ b/test/e2e/single_api/test_synthesis_morphing.py @@ -37,3 +37,10 @@ def test_post_synthesis_morphing_200(client: TestClient) -> None: json=queries, ) assert response.status_code == 200 + + # FIXME: LinuxとMacOSで計算結果が一致しないためスナップショットテストがコケる(原因不明) + # from test.utility import summarize_wav_bytes + # from syrupy.assertion import SnapshotAssertion + # # FileResponse 内の .wav から抽出された音声波形が一致する + # assert response.headers["content-type"] == "audio/wav" + # assert snapshot == summarize_wav_bytes(response.read()) diff --git a/test/e2e/test_tts.py b/test/e2e/test_tts.py index b0f7bbf8b..c8eb5daf0 100644 --- a/test/e2e/test_tts.py +++ b/test/e2e/test_tts.py @@ -2,10 +2,8 @@ TTSのテスト """ -import io -from test.utility import hash_long_string, round_floats +from test.utility import hash_wave_floats_from_wav_bytes -import soundfile as sf from fastapi.testclient import TestClient from syrupy.assertion import SnapshotAssertion @@ -22,17 +20,9 @@ def test_テキストと話者IDから音声を合成できる( # AudioQuery から音声波形を生成する synthesis_res = client.post("/synthesis", params={"speaker": 0}, json=audio_query) - # wav ファイルを含む FileResponse から音声波形を抽出する - wav_bytes = io.BytesIO(synthesis_res.read()) - wave = sf.read(wav_bytes)[0].tolist() - - # NOTE: Linux-Windows 数値精度問題に対するワークアラウンド - wave = round_floats(wave, 2) - # リクエストが成功している assert synthesis_res.status_code == 200 - # レスポンスが音声ファイルである + + # FileResponse 内の .wav から抽出された音声波形が一致する assert synthesis_res.headers["content-type"] == "audio/wav" - # 音声波形が commit 間で不変である - wave_str = " ".join(map(lambda point: str(point), wave)) - assert snapshot == hash_long_string(wave_str) + assert snapshot == hash_wave_floats_from_wav_bytes(synthesis_res.read()) diff --git a/test/utility.py b/test/utility.py index b11e8ded4..ec24edba9 100644 --- a/test/utility.py +++ b/test/utility.py @@ -1,7 +1,10 @@ import hashlib +import io import json from typing import Any +import numpy as np +import soundfile as sf from pydantic.json import pydantic_encoder @@ -36,3 +39,11 @@ def to_hash(value: str) -> str: return {k: hash_long_string(v) for k, v in value.items()} else: return value + + +def hash_wave_floats_from_wav_bytes(wav_bytes: bytes) -> str: + """.wavファイルバイト列から音声波形を抽出しハッシュ化する""" + wave = sf.read(io.BytesIO(wav_bytes))[0].tolist() + # NOTE: Linux-Windows 数値精度問題に対するワークアラウンド + wave = round_floats(wave, 2) + return "MD5:" + hashlib.md5(np.array(wave).tobytes()).hexdigest()