From 3483887d920ae9988ed02073d91be8678a40b1cf Mon Sep 17 00:00:00 2001
From: Gray Suitcase <41382894+PickledChair@users.noreply.github.com>
Date: Fri, 24 Feb 2023 01:36:55 +0900
Subject: [PATCH] =?UTF-8?q?python=20(FFI)=20example=20=E3=82=92=E5=89=8A?=
 =?UTF-8?q?=E9=99=A4=20(#432)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                 |   3 +-
 crates/voicevox_core_python_api/README.md |   2 +-
 example/pyo3/.gitignore                   |   8 -
 example/pyo3/README.md                    |  49 -----
 example/pyo3/run.py                       |  78 --------
 example/python/.gitignore                 | 141 +-------------
 example/python/README.md                  |  60 ++++--
 example/python/core.py                    | 227 ----------------------
 example/python/requirements.txt           |   1 -
 example/python/run.py                     | 105 ++++++----
 10 files changed, 111 insertions(+), 563 deletions(-)
 delete mode 100644 example/pyo3/.gitignore
 delete mode 100644 example/pyo3/README.md
 delete mode 100644 example/pyo3/run.py
 delete mode 100644 example/python/core.py
 delete mode 100644 example/python/requirements.txt
diff --git a/README.md b/README.md
index 46f266e4a..d4b90e00b 100644
--- a/README.md
+++ b/README.md
@@ -89,8 +89,7 @@ sudo apt install libgomp1
 
 現在このリポジトリでは次のサンプルが提供されています。実行方法についてはそれぞれのディレクトリ内にある README を参照してください
 
-- [Python](./example/python)
-- [Python(pip)](./example/pyo3)
+- [Python(pip)](./example/python)
 - [C++(UNIX CMake)](./example/cpp/unix)
 - [C++(Windows Visual Studio)](./example/cpp/windows)
 
diff --git a/crates/voicevox_core_python_api/README.md b/crates/voicevox_core_python_api/README.md
index 10df981c4..b5d71d819 100644
--- a/crates/voicevox_core_python_api/README.md
+++ b/crates/voicevox_core_python_api/README.md
@@ -70,4 +70,4 @@ venv を作ったらその venv 上で Maturin をインストールします。
 
 ## サンプル実行
 
-`maturin develop` で editable な状態でインストールした後、[example/pyo3](../../example/pyo3) にてサンプルを実行できます。
+`maturin develop` で editable な状態でインストールした後、[example/python](../../example/python) にてサンプルを実行できます。
diff --git a/example/pyo3/.gitignore b/example/pyo3/.gitignore
deleted file mode 100644
index c4d7f114d..000000000
--- a/example/pyo3/.gitignore
+++ /dev/null
@@ -1,8 +0,0 @@
-# OpenJTalk-dictionary's dir
-open_jtalk_dic_utf_8-*
-
-# shared library
-*.so
-*.so.*
-*.dylib
-*.dll
diff --git a/example/pyo3/README.md b/example/pyo3/README.md
deleted file mode 100644
index 05ff6e7e8..000000000
--- a/example/pyo3/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Python サンプルコード (PyO3 によるバインディング経由)
-
-voicevox_core ライブラリ の Python バインディングを使った音声合成のサンプルコードです。  
-`pip install`で導入することができます。
-
-## 準備
-
-TODO
-
-- Python インタプリタ ≧3.8 + venv
-- voicevox_core_python_api の whl (`pip install`)
-- onnxruntime の DLL (/README.md と同様)
-- open_jtalk_dic_utf_8-1.11 (/README.md と同様)
-
-## 実行
-
-Open JTalk 辞書ディレクトリ、読み上げさせたい文章、出力 wav ファイルのパスの 3 つを指定して run.py を実行します。
-
-```console
-❯ python ./run.py -h
-usage: run.py [-h] [--mode MODE] open_jtalk_dict_dir text out
-
-positional arguments:
-  open_jtalk_dict_dir  Open JTalkの辞書ディレクトリ
-  text                 読み上げさせたい文章
-  out                  出力wavファイルのパス
-
-optional arguments:
-  -h, --help           show this help message and exit
-  --mode MODE          モード ("AUTO", "CPU", "GPU")
-```
-
-```console
-❯ # python ./run.py <Open JTalk辞書ディレクトリ> <読み上げさせたい文章> <出力wavファイルのパス>
-❯ python ./run.py ./open_jtalk_dic_utf_8-1.11 これはテストです ./audio.wav
-[DEBUG] run.py: voicevox_core.METAS=[Meta(name='四国めたん', styles=[Style(name='あまあま', id=0)], speaker_uuid='7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff', version='0.0.1'), Meta(name='ずんだもん', styles=[Style(name='あまあま', id=1)], speaker_uuid='388f246b-8c41-4ac1-8e2d-5d79f3ff56d9', version='0.0.1')]
-[DEBUG] run.py: voicevox_core.SUPPORTED_DEVICES=SupportedDevices(cpu=True, cuda=True, dml=False)
-[INFO] run.py: Initializing (acceleration_mode=<AccelerationMode.AUTO: 'AUTO'>, open_jtalk_dict_dir=PosixPath('open_jtalk_dic_utf_8-1.11'))
-[DEBUG] run.py: core.is_gpu_mode=True
-[INFO] run.py: Loading model 0
-[DEBUG] run.py: core.is_model_loaded(0)=True
-[INFO] run.py: Creating an AudioQuery from 'これはテストです'
-[INFO] run.py: Synthesizing with {"accent_phrases": [{"moras": [{"text": "コ", "consonant": "k", "consonant_length": 0.063058704, "vowel": "o", "vowel_length": 0.08937682, "pitch": 5.5699596}, {"text": "レ", "consonant": "r", "consonant_length": 0.047547057, "vowel": "e", "vowel_length": 0.07596417, "pitch": 5.6643105}, {"text": "ワ", "consonant": "w", "consonant_length": 0.053706698, "vowel": "a", "vowel_length": 0.10348523, "pitch": 5.7773285}], "accent": 3, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "テ", "consonant": "t", "consonant_length": 0.06311223, "vowel": "e", "vowel_length": 0.07596652, "pitch": 5.881741}, {"text": "ス", "consonant": "s", "consonant_length": 0.038565055, "vowel": "U", "vowel_length": 0.050694168, "pitch": 0.0}, {"text": "ト", "consonant": "t", "consonant_length": 0.06685759, "vowel": "o", "vowel_length": 0.0753997, "pitch": 5.737323}, {"text": "デ", "consonant": "d", "consonant_length": 0.058399618, "vowel": "e", "vowel_length": 0.09201351, "pitch": 5.4747167}, {"text": "ス", "consonant": "s", "consonant_length": 0.08852549, "vowel": "U", "vowel_length": 0.1281984, "pitch": 0.0}], "accent": 1, "pause_mora": null, "is_interrogative": false}], "speed_scale": 1.0, "pitch_scale": 0.0, "intonation_scale": 1.0, "volume_scale": 1.0, "pre_phoneme_length": 0.1, "post_phoneme_length": 0.1, "output_sampling_rate": 24000, "output_stereo": false, "kana": "コレワ'/テ'_ストデ_ス"}
-[INFO] run.py: Wrote `audio.wav`
-[DEBUG] lib.rs: Destructing a VoicevoxCore
-```
-
-正常に実行されれば音声合成の結果である wav ファイルが生成されます。
-この例の場合、`"これはテストです"`という読み上げの wav ファイルが audio.wav という名前で生成されます。
diff --git a/example/pyo3/run.py b/example/pyo3/run.py
deleted file mode 100644
index f60f4e0fe..000000000
--- a/example/pyo3/run.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import dataclasses
-import json
-import logging
-from argparse import ArgumentParser
-from pathlib import Path
-from typing import Tuple
-
-import voicevox_core
-from voicevox_core import AccelerationMode, AudioQuery, VoicevoxCore
-
-SPEAKER_ID = 0
-
-
-def main() -> None:
-    logging.basicConfig(
-        format="[%(levelname)s] %(filename)s: %(message)s", level="DEBUG"
-    )
-    logger = logging.getLogger(__name__)
-
-    (acceleration_mode, open_jtalk_dict_dir, text, out) = parse_args()
-
-    logger.debug("%s", f"{voicevox_core.METAS=}")
-    logger.debug("%s", f"{voicevox_core.SUPPORTED_DEVICES=}")
-
-    logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
-    core = VoicevoxCore(
-        acceleration_mode=acceleration_mode, open_jtalk_dict_dir=open_jtalk_dict_dir
-    )
-
-    logger.debug("%s", f"{core.is_gpu_mode=}")
-
-    logger.info("%s", f"Loading model {SPEAKER_ID}")
-    core.load_model(SPEAKER_ID)
-
-    logger.debug("%s", f"{core.is_model_loaded(0)=}")
-
-    logger.info("%s", f"Creating an AudioQuery from {text!r}")
-    audio_query = core.audio_query(text, SPEAKER_ID)
-
-    logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
-    wav = core.synthesis(audio_query, SPEAKER_ID)
-
-    out.write_bytes(wav)
-    logger.info("%s", f"Wrote `{out}`")
-
-
-def parse_args() -> Tuple[AccelerationMode, Path, str, Path]:
-    argparser = ArgumentParser()
-    argparser.add_argument(
-        "--mode",
-        default="AUTO",
-        type=AccelerationMode,
-        help='モード ("AUTO", "CPU", "GPU")',
-    )
-    argparser.add_argument(
-        "open_jtalk_dict_dir",
-        type=Path,
-        help="Open JTalkの辞書ディレクトリ",
-    )
-    argparser.add_argument(
-        "text",
-        help="読み上げさせたい文章",
-    )
-    argparser.add_argument(
-        "out",
-        type=Path,
-        help="出力wavファイルのパス",
-    )
-    args = argparser.parse_args()
-    return (args.mode, args.open_jtalk_dict_dir, args.text, args.out)
-
-
-def display_as_json(audio_query: AudioQuery) -> str:
-    return json.dumps(dataclasses.asdict(audio_query), ensure_ascii=False)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/example/python/.gitignore b/example/python/.gitignore
index 117b718ec..c4d7f114d 100644
--- a/example/python/.gitignore
+++ b/example/python/.gitignore
@@ -1,147 +1,8 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py,cover
-.hypothesis/
-.pytest_cache/
-cover/
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-.pybuilder/
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# IPython
-profile_default/
-ipython_config.py
-
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow
-__pypackages__/
-
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-
-# Spyder project settings
-.spyderproject
-.spyproject
-
-# Rope project settings
-.ropeproject
-
-# mkdocs documentation
-/site
-
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-
-# Pyre type checker
-.pyre/
-
-# pytype static type analyzer
-.pytype/
-
-# Cython debug symbols
-cython_debug/
-
 # OpenJTalk-dictionary's dir
 open_jtalk_dic_utf_8-*
 
 # shared library
+*.so
 *.so.*
 *.dylib
 *.dll
-voicevox_core/
diff --git a/example/python/README.md b/example/python/README.md
index 606c6b24f..05ff6e7e8 100644
--- a/example/python/README.md
+++ b/example/python/README.md
@@ -1,23 +1,49 @@
-# Python のサンプルコード
+# Python サンプルコード (PyO3 によるバインディング経由)
 
-python から voicevox_core ライブラリを使用するためのサンプルコードです。
+voicevox_core ライブラリ の Python バインディングを使った音声合成のサンプルコードです。  
+`pip install`で導入することができます。
 
-## サンプル実行方法
+## 準備
 
-まず、この README があるディレクトリで、[Downloader を使用して voicevox_core をダウンロードします](../../docs/downloads/download.md#default)。  
-次に下記コマンドを実行して python のサンプルを実行します。
+TODO
 
-```bash
-# サンプルコード実行のための依存モジュールのインストール
-pip install -r requirements.txt
-python run.py \
-    --text "これは本当に実行できているんですか" \
-    --speaker_id 1
+- Python インタプリタ ≧3.8 + venv
+- voicevox_core_python_api の whl (`pip install`)
+- onnxruntime の DLL (/README.md と同様)
+- open_jtalk_dic_utf_8-1.11 (/README.md と同様)
 
-# 引数の紹介
-# --text 読み上げるテキスト
-# --speaker_id 話者ID
-# --use_gpu GPUを使う
-# --f0_speaker_id 音高の話者ID（デフォルト値はspeaker_id）
-# --f0_correct 音高の補正値（デフォルト値は0。+-0.3くらいで結果が大きく変わります）
+## 実行
+
+Open JTalk 辞書ディレクトリ、読み上げさせたい文章、出力 wav ファイルのパスの 3 つを指定して run.py を実行します。
+
+```console
+❯ python ./run.py -h
+usage: run.py [-h] [--mode MODE] open_jtalk_dict_dir text out
+
+positional arguments:
+  open_jtalk_dict_dir  Open JTalkの辞書ディレクトリ
+  text                 読み上げさせたい文章
+  out                  出力wavファイルのパス
+
+optional arguments:
+  -h, --help           show this help message and exit
+  --mode MODE          モード ("AUTO", "CPU", "GPU")
 ```
+
+```console
+❯ # python ./run.py <Open JTalk辞書ディレクトリ> <読み上げさせたい文章> <出力wavファイルのパス>
+❯ python ./run.py ./open_jtalk_dic_utf_8-1.11 これはテストです ./audio.wav
+[DEBUG] run.py: voicevox_core.METAS=[Meta(name='四国めたん', styles=[Style(name='あまあま', id=0)], speaker_uuid='7ffcb7ce-00ec-4bdc-82cd-45a8889e43ff', version='0.0.1'), Meta(name='ずんだもん', styles=[Style(name='あまあま', id=1)], speaker_uuid='388f246b-8c41-4ac1-8e2d-5d79f3ff56d9', version='0.0.1')]
+[DEBUG] run.py: voicevox_core.SUPPORTED_DEVICES=SupportedDevices(cpu=True, cuda=True, dml=False)
+[INFO] run.py: Initializing (acceleration_mode=<AccelerationMode.AUTO: 'AUTO'>, open_jtalk_dict_dir=PosixPath('open_jtalk_dic_utf_8-1.11'))
+[DEBUG] run.py: core.is_gpu_mode=True
+[INFO] run.py: Loading model 0
+[DEBUG] run.py: core.is_model_loaded(0)=True
+[INFO] run.py: Creating an AudioQuery from 'これはテストです'
+[INFO] run.py: Synthesizing with {"accent_phrases": [{"moras": [{"text": "コ", "consonant": "k", "consonant_length": 0.063058704, "vowel": "o", "vowel_length": 0.08937682, "pitch": 5.5699596}, {"text": "レ", "consonant": "r", "consonant_length": 0.047547057, "vowel": "e", "vowel_length": 0.07596417, "pitch": 5.6643105}, {"text": "ワ", "consonant": "w", "consonant_length": 0.053706698, "vowel": "a", "vowel_length": 0.10348523, "pitch": 5.7773285}], "accent": 3, "pause_mora": null, "is_interrogative": false}, {"moras": [{"text": "テ", "consonant": "t", "consonant_length": 0.06311223, "vowel": "e", "vowel_length": 0.07596652, "pitch": 5.881741}, {"text": "ス", "consonant": "s", "consonant_length": 0.038565055, "vowel": "U", "vowel_length": 0.050694168, "pitch": 0.0}, {"text": "ト", "consonant": "t", "consonant_length": 0.06685759, "vowel": "o", "vowel_length": 0.0753997, "pitch": 5.737323}, {"text": "デ", "consonant": "d", "consonant_length": 0.058399618, "vowel": "e", "vowel_length": 0.09201351, "pitch": 5.4747167}, {"text": "ス", "consonant": "s", "consonant_length": 0.08852549, "vowel": "U", "vowel_length": 0.1281984, "pitch": 0.0}], "accent": 1, "pause_mora": null, "is_interrogative": false}], "speed_scale": 1.0, "pitch_scale": 0.0, "intonation_scale": 1.0, "volume_scale": 1.0, "pre_phoneme_length": 0.1, "post_phoneme_length": 0.1, "output_sampling_rate": 24000, "output_stereo": false, "kana": "コレワ'/テ'_ストデ_ス"}
+[INFO] run.py: Wrote `audio.wav`
+[DEBUG] lib.rs: Destructing a VoicevoxCore
+```
+
+正常に実行されれば音声合成の結果である wav ファイルが生成されます。
+この例の場合、`"これはテストです"`という読み上げの wav ファイルが audio.wav という名前で生成されます。
diff --git a/example/python/core.py b/example/python/core.py
deleted file mode 100644
index 4df8bb6cf..000000000
--- a/example/python/core.py
+++ /dev/null
@@ -1,227 +0,0 @@
-from ctypes import *
-import platform
-import os
-from pathlib import Path
-import json
-from typing import List, Optional, TypedDict, Union
-import numpy
-
-# numpy ndarray types
-int64_dim1_type = numpy.ctypeslib.ndpointer(dtype=numpy.int64, ndim=1)
-float32_dim1_type = numpy.ctypeslib.ndpointer(dtype=numpy.float32, ndim=1)
-int64_dim2_type = numpy.ctypeslib.ndpointer(dtype=numpy.int64, ndim=2)
-float32_dim2_type = numpy.ctypeslib.ndpointer(dtype=numpy.float32, ndim=2)
-
-get_os = platform.system()
-
-lib_file = ""
-if get_os == "Windows":
-    lib_file = "core.dll"
-elif get_os == "Darwin":
-    lib_file = "libcore.dylib"
-elif get_os == "Linux":
-    lib_file = "libcore.so"
-
-# ライブラリ読み込み
-core_dll_path = Path(os.path.dirname(__file__) + f"/voicevox_core/{lib_file}")
-if not os.path.exists(core_dll_path):
-    raise Exception(f"coreライブラリファイルが{core_dll_path}に存在しません")
-lib = cdll.LoadLibrary(str(core_dll_path))
-
-# 関数型定義
-lib.initialize.argtypes = (c_bool, c_int, c_bool)
-lib.initialize.restype = c_bool
-
-lib.load_model.argtypes = (c_int64,)
-lib.load_model.restype = c_bool
-
-lib.is_model_loaded.argtypes = (c_int64,)
-lib.is_model_loaded.restype = c_bool
-
-lib.finalize.argtypes = ()
-
-lib.metas.restype = c_char_p
-
-lib.supported_devices.restype = c_char_p
-
-lib.yukarin_s_forward.argtypes = (
-    c_int64, int64_dim1_type, int64_dim1_type, float32_dim1_type)
-lib.yukarin_s_forward.restype = c_bool
-
-lib.yukarin_sa_forward.argtypes = (c_int64, int64_dim2_type, int64_dim2_type, int64_dim2_type,
-                                   int64_dim2_type, int64_dim2_type, int64_dim2_type, int64_dim1_type, float32_dim2_type)
-lib.yukarin_sa_forward.restype = c_bool
-
-lib.decode_forward.argtypes = (
-    c_int64, c_int64, float32_dim2_type, float32_dim2_type, int64_dim1_type, float32_dim1_type)
-lib.decode_forward.restype = c_bool
-
-lib.last_error_message.restype = c_char_p
-
-lib.voicevox_load_openjtalk_dict.argtypes = (c_char_p,)
-lib.voicevox_load_openjtalk_dict.restype = c_int
-
-lib.voicevox_audio_query.argtypes = (c_char_p, c_int64, POINTER(c_char_p))
-lib.voicevox_audio_query.restype = c_int
-
-lib.voicevox_audio_query_from_kana.argtypes = (c_char_p, c_int64, POINTER(c_char_p))
-lib.voicevox_audio_query_from_kana.restype = c_int
-
-lib.voicevox_synthesis.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8)))
-lib.voicevox_synthesis.restype = c_int
-
-lib.voicevox_tts.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8)))
-lib.voicevox_tts.restype = c_int
-
-lib.voicevox_tts_from_kana.argtypes = (c_char_p, c_int64, POINTER(c_int), POINTER(POINTER(c_uint8)))
-lib.voicevox_tts_from_kana.restype = c_int
-
-lib.voicevox_audio_query_json_free.argtypes = (c_char_p,)
-
-lib.voicevox_wav_free.argtypes = (POINTER(c_uint8),)
-
-lib.voicevox_error_result_to_message.argtypes = (c_int,)
-lib.voicevox_load_openjtalk_dict.argtypes = (c_char_p,)
-
-# ラッパー関数
-def initialize(use_gpu: bool, cpu_num_threads=0, load_all_models=True):
-    success = lib.initialize(use_gpu, cpu_num_threads, load_all_models)
-    if not success:
-        raise Exception(lib.last_error_message().decode())
-
-def load_model(speaker_id: int):
-    success = lib.load_model(speaker_id)
-    if not success:
-        raise Exception(lib.last_error_message().decode())
-
-def is_model_loaded(speaker_id: int) -> bool:
-    return lib.is_model_loaded(speaker_id)
-
-def metas() -> str:
-    return lib.metas().decode()
-
-
-def supported_devices() -> str:
-    return lib.supported_devices().decode()
-
-
-def yukarin_s_forward(length: int, phoneme_list: numpy.ndarray, speaker_id: numpy.ndarray) -> numpy.ndarray:
-    output = numpy.zeros((length, ), dtype=numpy.float32)
-    success = lib.yukarin_s_forward(length, phoneme_list, speaker_id, output)
-    if not success:
-        raise Exception(lib.last_error_message().decode())
-    return output
-
-
-def yukarin_sa_forward(
-    length: int,
-    vowel_phoneme_list,
-    consonant_phoneme_list,
-    start_accent_list,
-    end_accent_list,
-    start_accent_phrase_list,
-    end_accent_phrase_list,
-    speaker_id
-):
-    output = numpy.empty((len(speaker_id), length,), dtype=numpy.float32)
-    success = lib.yukarin_sa_forward(
-        length, vowel_phoneme_list, consonant_phoneme_list, start_accent_list, end_accent_list, start_accent_phrase_list, end_accent_phrase_list, speaker_id, output
-    )
-    if not success:
-        raise Exception(lib.last_error_message().decode())
-    return output
-
-
-def decode_forward(length: int, phoneme_size: int, f0, phoneme, speaker_id):
-    output = numpy.empty((length*256,), dtype=numpy.float32)
-    success = lib.decode_forward(
-        length, phoneme_size, f0, phoneme, speaker_id, output
-    )
-    if not success:
-        raise Exception(lib.last_error_message().decode())
-    return output
-
-def voicevox_load_openjtalk_dict(dict_path: str):
-    errno = lib.voicevox_load_openjtalk_dict(dict_path.encode())
-    if errno != 0:
-        raise Exception(lib.voicevox_error_result_to_message(errno).decode())
-
-def voicevox_audio_query(text: str, speaker_id: int) -> "AudioQuery":
-    output_json = c_char_p()
-    errno = lib.voicevox_audio_query(text.encode(), speaker_id, byref(output_json))
-    if errno != 0:
-        raise Exception(lib.voicevox_error_result_to_message(errno).decode())
-    audio_query = json.loads(output_json.value)
-    lib.voicevox_audio_query_json_free(output_json)
-    return audio_query
-
-def voicevox_audio_query_from_kana(text: str, speaker_id: int) -> "AudioQuery":
-    output_json = c_char_p()
-    errno = lib.voicevox_audio_query_from_kana(text.encode(), speaker_id, byref(output_json))
-    if errno != 0:
-        raise Exception(lib.voicevox_error_result_to_message(errno).decode())
-    audio_query = json.loads(output_json.value)
-    lib.voicevox_audio_query_json_free(output_json)
-    return audio_query
-
-def voicevox_synthesis(audio_query: "AudioQuery", speaker_id: int) -> bytes:
-    output_binary_size = c_int()
-    output_wav = POINTER(c_uint8)()
-    errno = lib.voicevox_synthesis(json.dumps(audio_query).encode(), speaker_id, byref(output_binary_size), byref(output_wav))
-    if errno != 0:
-        raise Exception(lib.voicevox_error_result_to_message(errno).decode())
-    output = create_string_buffer(output_binary_size.value * sizeof(c_uint8))
-    memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8))
-    lib.voicevox_wav_free(output_wav)
-    return output
-
-def voicevox_tts(text: str, speaker_id: int) -> bytes:
-    output_binary_size = c_int()
-    output_wav = POINTER(c_uint8)()
-    errno = lib.voicevox_tts(text.encode(), speaker_id, byref(output_binary_size), byref(output_wav))
-    if errno != 0:
-        raise Exception(lib.voicevox_error_result_to_message(errno).decode())
-    output = create_string_buffer(output_binary_size.value * sizeof(c_uint8))
-    memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8))
-    lib.voicevox_wav_free(output_wav)
-    return output
-
-def voicevox_tts_from_kana(text: str, speaker_id: int) -> bytes:
-    output_binary_size = c_int()
-    output_wav = POINTER(c_uint8)()
-    errno = lib.voicevox_tts_from_kana(text.encode(), speaker_id, byref(output_binary_size), byref(output_wav))
-    if errno != 0:
-        raise Exception(lib.voicevox_error_result_to_message(errno).decode())
-    output = create_string_buffer(output_binary_size.value * sizeof(c_uint8))
-    memmove(output, output_wav, output_binary_size.value * sizeof(c_uint8))
-    lib.voicevox_wav_free(output_wav)
-    return output
-
-def finalize():
-    lib.finalize()
-
-class AudioQuery(TypedDict):
-    accent_phrases: List["AccentPhrase"]
-    speedScale: float
-    pitchScale: float
-    intonationScale: float
-    volumeScale: float
-    prePhonemeLength: float
-    postPhonemeLength: float
-    outputSamplingRate: int
-    outputStereo: bool
-    kana: Optional[str]
-
-class AccentPhrase(TypedDict):
-    moras: List["Mora"]
-    accent: int
-    pause_mora: Optional["Mora"]
-    is_interrogative: bool
-
-class Mora(TypedDict):
-    text: str
-    consonant: Optional[str]
-    consonant_length: Optional[float]
-    vowel: str
-    vowel_length: float
-    pitch: float
diff --git a/example/python/requirements.txt b/example/python/requirements.txt
deleted file mode 100644
index 24ce15ab7..000000000
--- a/example/python/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-numpy
diff --git a/example/python/run.py b/example/python/run.py
index 638b1f029..f60f4e0fe 100644
--- a/example/python/run.py
+++ b/example/python/run.py
@@ -1,53 +1,78 @@
-import argparse
+import dataclasses
+import json
+import logging
+from argparse import ArgumentParser
+from pathlib import Path
+from typing import Tuple
 
-import core
+import voicevox_core
+from voicevox_core import AccelerationMode, AudioQuery, VoicevoxCore
 
+SPEAKER_ID = 0
 
-def run(
-    use_gpu: bool,
-    text: str,
-    speaker_id: int,
-    cpu_num_threads: int,
-    openjtalk_dict: str,
-    output: str,
-) -> None:
-    # コアの初期化
-    core.initialize(use_gpu, cpu_num_threads, load_all_models=False)
 
-    # openjtalk辞書のロード
-    core.voicevox_load_openjtalk_dict(openjtalk_dict)
+def main() -> None:
+    logging.basicConfig(
+        format="[%(levelname)s] %(filename)s: %(message)s", level="DEBUG"
+    )
+    logger = logging.getLogger(__name__)
+
+    (acceleration_mode, open_jtalk_dict_dir, text, out) = parse_args()
 
-    # 話者のロード
-    core.load_model(speaker_id)
+    logger.debug("%s", f"{voicevox_core.METAS=}")
+    logger.debug("%s", f"{voicevox_core.SUPPORTED_DEVICES=}")
 
-    # AudioQueryの生成
-    audio_query = core.voicevox_audio_query(text, speaker_id)
+    logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
+    core = VoicevoxCore(
+        acceleration_mode=acceleration_mode, open_jtalk_dict_dir=open_jtalk_dict_dir
+    )
 
-    # 音声合成
-    wavefmt = core.voicevox_synthesis(audio_query, speaker_id)
+    logger.debug("%s", f"{core.is_gpu_mode=}")
 
-    # 保存
-    with open(output, "wb") as f:
-        f.write(wavefmt)
+    logger.info("%s", f"Loading model {SPEAKER_ID}")
+    core.load_model(SPEAKER_ID)
 
-    core.finalize()
+    logger.debug("%s", f"{core.is_model_loaded(0)=}")
 
+    logger.info("%s", f"Creating an AudioQuery from {text!r}")
+    audio_query = core.audio_query(text, SPEAKER_ID)
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--use_gpu", action="store_true")
-    parser.add_argument("--text", required=True)
-    parser.add_argument("--speaker_id", type=int, required=True)
-    parser.add_argument("--cpu_num_threads", type=int, default=0)
-    parser.add_argument(
-        "--openjtalk_dict",
-        type=str,
-        default="voicevox_core/open_jtalk_dic_utf_8-1.11"
+    logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
+    wav = core.synthesis(audio_query, SPEAKER_ID)
+
+    out.write_bytes(wav)
+    logger.info("%s", f"Wrote `{out}`")
+
+
+def parse_args() -> Tuple[AccelerationMode, Path, str, Path]:
+    argparser = ArgumentParser()
+    argparser.add_argument(
+        "--mode",
+        default="AUTO",
+        type=AccelerationMode,
+        help='モード ("AUTO", "CPU", "GPU")',
     )
-    parser.add_argument("--output", type=str)
-    
-    args = parser.parse_args()
-    if args.output is None:
-        args.output = f"{args.text}-{args.speaker_id}.wav"
+    argparser.add_argument(
+        "open_jtalk_dict_dir",
+        type=Path,
+        help="Open JTalkの辞書ディレクトリ",
+    )
+    argparser.add_argument(
+        "text",
+        help="読み上げさせたい文章",
+    )
+    argparser.add_argument(
+        "out",
+        type=Path,
+        help="出力wavファイルのパス",
+    )
+    args = argparser.parse_args()
+    return (args.mode, args.open_jtalk_dict_dir, args.text, args.out)
+
 
-    run(**vars(args))
+def display_as_json(audio_query: AudioQuery) -> str:
+    return json.dumps(dataclasses.asdict(audio_query), ensure_ascii=False)
+
+
+if __name__ == "__main__":
+    main()