m1guelpf · TeoColuccio · Oct 19, 2022 · Oct 19, 2022
diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py
@@ -4,6 +4,7 @@
 import argparse
 import warnings
 import tempfile
+from whisper.tokenizer import LANGUAGES
 from .utils import filename, str2bool, write_srt
 
 
@@ -22,6 +23,8 @@ def main():
                         help="only generate the .srt file and not create overlayed video")
     parser.add_argument("--verbose", type=str2bool, default=False,
                         help="whether to print out the progress and debug messages")
+    parser.add_argument("--language", type=str,
+                        help=f"force the use of a chosen language: {list(LANGUAGES.keys())} {list(LANGUAGES.values())})")
 
     parser.add_argument("--task", type=str, default="transcribe", choices=[
                         "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
@@ -31,8 +34,18 @@ def main():
     output_dir: str = args.pop("output_dir")
     output_srt: bool = args.pop("output_srt")
     srt_only: bool = args.pop("srt_only")
+    language: str = args.pop("language")
     os.makedirs(output_dir, exist_ok=True)
 
+    if language is not None:
+        if language not in LANGUAGES:
+            raise Exception(
+                f'whisper: error: argument --language: invalid choice: {language} (choose from {list(LANGUAGES.keys())}) {list(LANGUAGES.values())}')
+        else:
+            warnings.warn(
+                f"You have forced the use of the {language} language.")
+            args["language"] = language
+
     if model_name.endswith(".en"):
         warnings.warn(
             f"{model_name} is an English-only model, forcing English detection.")
@@ -41,7 +54,8 @@ def main():
     model = whisper.load_model(model_name)
     audios = get_audio(args.pop("video"))
     subtitles = get_subtitles(
-        audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args)
+        audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(
+            audio_path, **args)
     )
 
     if srt_only: