diff --git a/README.md b/README.md index 1d21530..92b45bc 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,10 @@ Adding `--task translate` will translate the subtitles into English: auto_subtitle /path/to/video.mp4 --task translate +Adding '--language_out es' will translate the subtitles into Spanish. Change 'es' to any other language. + + auto_subtitle /path/to/video.mp4 --language_out es + Run the following to view all available options: auto_subtitle --help diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index 21cdc16..62519b2 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -5,6 +5,7 @@ import warnings import tempfile from .utils import filename, str2bool, write_srt +from deep_translator import GoogleTranslator # We'll use for translation def main(): @@ -27,6 +28,8 @@ def main(): "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], help="What is the origin language of the video? If unset, it is detected automatically.") + parser.add_argument("--language_out", type=str, default=None, + help="The target language for translation. If not set, no translation will be performed.") args = parser.parse_args().__dict__ model_name: str = args.pop("model") @@ -34,6 +37,7 @@ def main(): output_srt: bool = args.pop("output_srt") srt_only: bool = args.pop("srt_only") language: str = args.pop("language") + language_out: str = args.pop("language_out") os.makedirs(output_dir, exist_ok=True) @@ -51,6 +55,9 @@ def main(): audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args) ) + if language_out: + subtitles = translate_subtitles(subtitles, language_out) + if srt_only: return @@ -110,6 +117,31 @@ def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcri return subtitles_path +def translate_subtitles(subtitles_path: dict, target_language: str): + translator = GoogleTranslator(source='auto', target=target_language) + translated_subtitles = {} + + for path, srt_path in subtitles_path.items(): + print(f"Translating subtitles for {filename(path)} to {target_language}...") + + with open(srt_path, 'r', encoding='utf-8') as file: + lines = file.readlines() + + translated_lines = [] + for line in lines: + if line.strip() and not line[0].isdigit() and '-->' not in line: + translated_line = translator.translate(line.strip()) + translated_lines.append(translated_line + '\n') + else: + translated_lines.append(line) + + translated_srt_path = srt_path.replace('.srt', f'_{target_language}.srt') + with open(translated_srt_path, 'w', encoding='utf-8') as file: + file.writelines(translated_lines) + + translated_subtitles[path] = translated_srt_path + + return translated_subtitles if __name__ == '__main__': main() diff --git a/requirements.txt b/requirements.txt index 73bca28..1693669 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ openai-whisper +deep_translator diff --git a/setup.py b/setup.py index ca2ed5b..33fa266 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ author="Miguel Piedrafita", install_requires=[ 'openai-whisper', + 'deep_translator', ], description="Automatically generate and embed subtitles into your videos", entry_points={