From 0c0cc7648354f66ba8ebe3f672eceea06d84760c Mon Sep 17 00:00:00 2001 From: TeoColuccio Date: Thu, 20 Oct 2022 01:15:19 +0200 Subject: [PATCH 1/2] add argument to force the language --- auto_subtitle/cli.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index 3edc26b..346ba86 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -4,6 +4,7 @@ import argparse import warnings import tempfile +from whisper.tokenizer import LANGUAGES from .utils import filename, str2bool, write_srt @@ -22,6 +23,8 @@ def main(): help="only generate the .srt file and not create overlayed video") parser.add_argument("--verbose", type=str2bool, default=False, help="whether to print out the progress and debug messages") + parser.add_argument("--language", type=str, + help=f"force the use of a chosen language: {list(LANGUAGES.keys())} {list(LANGUAGES.values())})") parser.add_argument("--task", type=str, default="transcribe", choices=[ "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") @@ -31,13 +34,22 @@ def main(): output_dir: str = args.pop("output_dir") output_srt: bool = args.pop("output_srt") srt_only: bool = args.pop("srt_only") + language: str = args.pop("language") os.makedirs(output_dir, exist_ok=True) + if language not in LANGUAGES: + raise Exception(f'whisper: error: argument --language: invalid choice: {language} (choose from {list(LANGUAGES.keys())}) {list(LANGUAGES.values())}') + if model_name.endswith(".en"): warnings.warn( f"{model_name} is an English-only model, forcing English detection.") args["language"] = "en" + elif language is not None: + warnings.warn( + f"You have forced the use of the {language} language.") + args["language"] = language + model = whisper.load_model(model_name) audios = get_audio(args.pop("video")) subtitles = get_subtitles( From 88038ab37c73bfba578ae22e9b0ccec7ed67de1f Mon Sep 17 00:00:00 2001 From: TeoColuccio Date: Thu, 20 Oct 2022 01:41:54 +0200 Subject: [PATCH 2/2] fix checking language --- auto_subtitle/cli.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index 346ba86..1e37649 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -37,23 +37,25 @@ def main(): language: str = args.pop("language") os.makedirs(output_dir, exist_ok=True) - if language not in LANGUAGES: - raise Exception(f'whisper: error: argument --language: invalid choice: {language} (choose from {list(LANGUAGES.keys())}) {list(LANGUAGES.values())}') + if language is not None: + if language not in LANGUAGES: + raise Exception( + f'whisper: error: argument --language: invalid choice: {language} (choose from {list(LANGUAGES.keys())}) {list(LANGUAGES.values())}') + else: + warnings.warn( + f"You have forced the use of the {language} language.") + args["language"] = language if model_name.endswith(".en"): warnings.warn( f"{model_name} is an English-only model, forcing English detection.") args["language"] = "en" - elif language is not None: - warnings.warn( - f"You have forced the use of the {language} language.") - args["language"] = language - model = whisper.load_model(model_name) audios = get_audio(args.pop("video")) subtitles = get_subtitles( - audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args) + audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe( + audio_path, **args) ) if srt_only: