diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index 3edc26b..1e37649 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -4,6 +4,7 @@ import argparse import warnings import tempfile +from whisper.tokenizer import LANGUAGES from .utils import filename, str2bool, write_srt @@ -22,6 +23,8 @@ def main(): help="only generate the .srt file and not create overlayed video") parser.add_argument("--verbose", type=str2bool, default=False, help="whether to print out the progress and debug messages") + parser.add_argument("--language", type=str, + help=f"force the use of a chosen language: {list(LANGUAGES.keys())} {list(LANGUAGES.values())})") parser.add_argument("--task", type=str, default="transcribe", choices=[ "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") @@ -31,8 +34,18 @@ def main(): output_dir: str = args.pop("output_dir") output_srt: bool = args.pop("output_srt") srt_only: bool = args.pop("srt_only") + language: str = args.pop("language") os.makedirs(output_dir, exist_ok=True) + if language is not None: + if language not in LANGUAGES: + raise Exception( + f'whisper: error: argument --language: invalid choice: {language} (choose from {list(LANGUAGES.keys())}) {list(LANGUAGES.values())}') + else: + warnings.warn( + f"You have forced the use of the {language} language.") + args["language"] = language + if model_name.endswith(".en"): warnings.warn( f"{model_name} is an English-only model, forcing English detection.") @@ -41,7 +54,8 @@ def main(): model = whisper.load_model(model_name) audios = get_audio(args.pop("video")) subtitles = get_subtitles( - audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args) + audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe( + audio_path, **args) ) if srt_only: