diff --git a/voicevox_engine/user_dict/user_dict_manager.py b/voicevox_engine/user_dict/user_dict_manager.py index be12a3371..06db43e44 100644 --- a/voicevox_engine/user_dict/user_dict_manager.py +++ b/voicevox_engine/user_dict/user_dict_manager.py @@ -9,13 +9,16 @@ from uuid import UUID, uuid4 import pyopenjtalk +from pydantic import TypeAdapter from ..utility.path_utility import get_save_dir, resource_root -from .model import UserDictWord, WordTypes +from .model import UserDictWord from .user_dict_word import ( + SaveFormatUserDictWord, UserDictInputError, WordProperty, - cost2priority, + convert_from_save_format, + convert_to_save_format, create_word, part_of_speech_data, priority2cost, @@ -55,6 +58,9 @@ def func(*args: Any, **kw: Any) -> Any: mutex_openjtalk_dict = threading.Lock() +_save_format_dict_adapter = TypeAdapter(dict[str, SaveFormatUserDictWord]) + + class UserDictionary: """ユーザー辞書""" @@ -82,21 +88,12 @@ def __init__( @mutex_wrapper(mutex_user_dict) def _write_to_json(self, user_dict: dict[str, UserDictWord]) -> None: """ユーザー辞書データをファイルへ書き込む。""" - user_dict_path = self._user_dict_path - - converted_user_dict = {} + save_format_user_dict: dict[str, SaveFormatUserDictWord] = {} for word_uuid, word in user_dict.items(): - word_dict = word.model_dump() - word_dict["cost"] = priority2cost( - word_dict["context_id"], word_dict["priority"] - ) - del word_dict["priority"] - converted_user_dict[word_uuid] = word_dict - # 予めjsonに変換できることを確かめる - user_dict_json = json.dumps(converted_user_dict, ensure_ascii=False) - - # ユーザー辞書ファイルへの書き込み - user_dict_path.write_text(user_dict_json, encoding="utf-8") + save_format_word = convert_to_save_format(word) + save_format_user_dict[word_uuid] = save_format_word + user_dict_json = _save_format_dict_adapter.dump_json(save_format_user_dict) + self._user_dict_path.write_bytes(user_dict_json) @mutex_wrapper(mutex_openjtalk_dict) def update_dict(self) -> None: @@ -180,26 +177,15 @@ def update_dict(self) -> None: @mutex_wrapper(mutex_user_dict) def read_dict(self) -> dict[str, UserDictWord]: """ユーザー辞書を読み出す。""" - user_dict_path = self._user_dict_path - # 指定ユーザー辞書が存在しない場合、空辞書を返す - if not user_dict_path.is_file(): + if not self._user_dict_path.is_file(): return {} - with user_dict_path.open(encoding="utf-8") as f: + with self._user_dict_path.open(encoding="utf-8") as f: + save_format_dict = _save_format_dict_adapter.validate_python(json.load(f)) result: dict[str, UserDictWord] = {} - for word_uuid, word in json.load(f).items(): - # cost2priorityで変換を行う際にcontext_idが必要となるが、 - # 0.12以前の辞書は、context_idがハードコーディングされていたためにユーザー辞書内に保管されていない - # ハードコーディングされていたcontext_idは固有名詞を意味するものなので、固有名詞のcontext_idを補完する - if word.get("context_id") is None: - word["context_id"] = part_of_speech_data[ - WordTypes.PROPER_NOUN - ].context_id - word["priority"] = cost2priority(word["context_id"], word["cost"]) - del word["cost"] - result[str(UUID(word_uuid))] = UserDictWord(**word) - + for word_uuid, word in save_format_dict.items(): + result[str(UUID(word_uuid))] = convert_from_save_format(word) return result def import_user_dict( diff --git a/voicevox_engine/user_dict/user_dict_word.py b/voicevox_engine/user_dict/user_dict_word.py index 378e951b8..301cd946c 100644 --- a/voicevox_engine/user_dict/user_dict_word.py +++ b/voicevox_engine/user_dict/user_dict_word.py @@ -159,3 +159,73 @@ def priority2cost(context_id: int, priority: int) -> int: assert USER_DICT_MIN_PRIORITY <= priority <= USER_DICT_MAX_PRIORITY cost_candidates = _search_cost_candidates(context_id) return cost_candidates[USER_DICT_MAX_PRIORITY - priority] + + +@dataclass +class SaveFormatUserDictWord: + """保存用の単語の型""" + + surface: str + cost: int # `UserDictWord.priority` と対応 + part_of_speech: str + part_of_speech_detail_1: str + part_of_speech_detail_2: str + part_of_speech_detail_3: str + inflectional_type: str + inflectional_form: str + stem: str + yomi: str + pronunciation: str + accent_type: int + accent_associative_rule: str + context_id: int | None = None # v0.12 以前の辞書でのみ `None` + mora_count: int | None = None + + +def convert_to_save_format(word: UserDictWord) -> SaveFormatUserDictWord: + """単語を保存用に変換する。""" + cost = priority2cost(word.context_id, word.priority) + return SaveFormatUserDictWord( + surface=word.surface, + cost=cost, + context_id=word.context_id, + part_of_speech=word.part_of_speech, + part_of_speech_detail_1=word.part_of_speech_detail_1, + part_of_speech_detail_2=word.part_of_speech_detail_2, + part_of_speech_detail_3=word.part_of_speech_detail_3, + inflectional_type=word.inflectional_type, + inflectional_form=word.inflectional_form, + stem=word.stem, + yomi=word.yomi, + pronunciation=word.pronunciation, + accent_type=word.accent_type, + mora_count=word.mora_count, + accent_associative_rule=word.accent_associative_rule, + ) + + +def convert_from_save_format(word: SaveFormatUserDictWord) -> UserDictWord: + """単語を保存用から変換する。""" + context_id_p_noun = part_of_speech_data[WordTypes.PROPER_NOUN].context_id + # cost2priorityで変換を行う際にcontext_idが必要となるが、 + # 0.12以前の辞書は、context_idがハードコーディングされていたためにユーザー辞書内に保管されていない + # ハードコーディングされていたcontext_idは固有名詞を意味するものなので、固有名詞のcontext_idを補完する + context_id = context_id_p_noun if word.context_id is None else word.context_id + priority = cost2priority(context_id, word.cost) + return UserDictWord( + surface=word.surface, + priority=priority, + context_id=context_id, + part_of_speech=word.part_of_speech, + part_of_speech_detail_1=word.part_of_speech_detail_1, + part_of_speech_detail_2=word.part_of_speech_detail_2, + part_of_speech_detail_3=word.part_of_speech_detail_3, + inflectional_type=word.inflectional_type, + inflectional_form=word.inflectional_form, + stem=word.stem, + yomi=word.yomi, + pronunciation=word.pronunciation, + accent_type=word.accent_type, + mora_count=word.mora_count, + accent_associative_rule=word.accent_associative_rule, + )