forked from VOICEVOX/voicevox_core
-
Notifications
You must be signed in to change notification settings - Fork 2
/
run.py
104 lines (87 loc) · 2.86 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import asyncio
import dataclasses
import json
import logging
from argparse import ArgumentParser
from pathlib import Path
from typing import Tuple
import voicevox_core
from voicevox_core import (
AccelerationMode,
AudioQuery,
OpenJtalk,
Synthesizer,
VoiceModel,
)
async def main() -> None:
logging.basicConfig(format="[%(levelname)s] %(name)s: %(message)s")
logger = logging.getLogger(__name__)
logger.setLevel("DEBUG")
logging.getLogger("voicevox_core_python_api").setLevel("DEBUG")
logging.getLogger("voicevox_core").setLevel("DEBUG")
(
acceleration_mode,
vvm_path,
open_jtalk_dict_dir,
text,
out,
style_id,
) = parse_args()
logger.debug("%s", f"{voicevox_core.supported_devices()=}")
logger.info("%s", f"Initializing ({acceleration_mode=}, {open_jtalk_dict_dir=})")
synthesizer = await Synthesizer.new_with_initialize(
OpenJtalk(open_jtalk_dict_dir), acceleration_mode=acceleration_mode
)
logger.debug("%s", f"{synthesizer.metas=}")
logger.debug("%s", f"{synthesizer.is_gpu_mode=}")
logger.info("%s", f"Loading `{vvm_path}`")
model = await VoiceModel.from_path(vvm_path)
await synthesizer.load_voice_model(model)
logger.info("%s", f"Creating an AudioQuery from {text!r}")
audio_query = await synthesizer.audio_query(text, style_id)
logger.info("%s", f"Synthesizing with {display_as_json(audio_query)}")
wav = await synthesizer.synthesis(audio_query, style_id)
out.write_bytes(wav)
logger.info("%s", f"Wrote `{out}`")
def parse_args() -> Tuple[AccelerationMode, Path, Path, str, Path, int]:
argparser = ArgumentParser()
argparser.add_argument(
"--mode",
default="AUTO",
type=AccelerationMode,
help='モード ("AUTO", "CPU", "GPU")',
)
argparser.add_argument(
"vvm",
type=Path,
help="vvmファイルへのパス",
)
argparser.add_argument(
"--dict-dir",
default="./open_jtalk_dic_utf_8-1.11",
type=Path,
help="Open JTalkの辞書ディレクトリ",
)
argparser.add_argument(
"--text",
default="この音声は、ボイスボックスを使用して、出力されています。",
help="読み上げさせたい文章",
)
argparser.add_argument(
"--out",
default="./output.wav",
type=Path,
help="出力wavファイルのパス",
)
argparser.add_argument(
"--style-id",
default=0,
type=int,
help="話者IDを指定",
)
args = argparser.parse_args()
return (args.mode, args.vvm, args.dict_dir, args.text, args.out, args.style_id)
def display_as_json(audio_query: AudioQuery) -> str:
return json.dumps(dataclasses.asdict(audio_query), ensure_ascii=False)
if __name__ == "__main__":
asyncio.run(main())