From f2d5f5ba6ad4ea98f1bcc6b608e0bcff7afb5239 Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:12:04 -0600 Subject: [PATCH 1/7] Update cli.py to include subtitle translation --- auto_subtitle/cli.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index 21cdc16..e88ea6f 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -5,6 +5,7 @@ import warnings import tempfile from .utils import filename, str2bool, write_srt +from googletrans import Translator # We'll use googletrans for translation def main(): @@ -27,6 +28,8 @@ def main(): "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')") parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"], help="What is the origin language of the video? If unset, it is detected automatically.") + parser.add_argument("--language_out", type=str, default=None, + help="The target language for translation. If not set, no translation will be performed.") args = parser.parse_args().__dict__ model_name: str = args.pop("model") @@ -34,6 +37,7 @@ def main(): output_srt: bool = args.pop("output_srt") srt_only: bool = args.pop("srt_only") language: str = args.pop("language") + language_out: str = args.pop("language_out") os.makedirs(output_dir, exist_ok=True) @@ -51,6 +55,9 @@ def main(): audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args) ) + if language_out: + subtitles = translate_subtitles(subtitles, language_out) + if srt_only: return @@ -110,6 +117,31 @@ def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcri return subtitles_path +def translate_subtitles(subtitles_path: dict, target_language: str): + translator = Translator() + translated_subtitles = {} + + for path, srt_path in subtitles_path.items(): + print(f"Translating subtitles for {filename(path)} to {target_language}...") + + with open(srt_path, 'r', encoding='utf-8') as file: + lines = file.readlines() + + translated_lines = [] + for line in lines: + if line.strip() and not line[0].isdigit() and '-->' not in line: + translated_line = translator.translate(line.strip(), dest=target_language).text + translated_lines.append(translated_line + '\n') + else: + translated_lines.append(line) + + translated_srt_path = srt_path.replace('.srt', f'_{target_language}.srt') + with open(translated_srt_path, 'w', encoding='utf-8') as file: + file.writelines(translated_lines) + + translated_subtitles[path] = translated_srt_path + + return translated_subtitles if __name__ == '__main__': main() From ec52bcf42ebaf1744881afeadf45919113579491 Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:15:19 -0600 Subject: [PATCH 2/7] Update requirements.txt with googletrans --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 73bca28..77177fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ openai-whisper +googletrans From b3ff084097dd9f9935754604864c83da4ebf78df Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:15:56 -0600 Subject: [PATCH 3/7] Update setup.py added googletrans --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index ca2ed5b..9d8f534 100644 --- a/setup.py +++ b/setup.py @@ -8,6 +8,7 @@ author="Miguel Piedrafita", install_requires=[ 'openai-whisper', + 'googletrans', ], description="Automatically generate and embed subtitles into your videos", entry_points={ From ecb3b9588a7ad94d166188cc50ef7691521db379 Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:20:03 -0600 Subject: [PATCH 4/7] Update README.md added example with language_out --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 1d21530..92b45bc 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,10 @@ Adding `--task translate` will translate the subtitles into English: auto_subtitle /path/to/video.mp4 --task translate +Adding '--language_out es' will translate the subtitles into Spanish. Change 'es' to any other language. + + auto_subtitle /path/to/video.mp4 --language_out es + Run the following to view all available options: auto_subtitle --help From 66e22302641017ed8c4f9b8bb2c4d249b3887760 Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:30:49 -0600 Subject: [PATCH 5/7] Update setup.py changed to deep_translator --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9d8f534..33fa266 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ author="Miguel Piedrafita", install_requires=[ 'openai-whisper', - 'googletrans', + 'deep_translator', ], description="Automatically generate and embed subtitles into your videos", entry_points={ From e58577bbcbccb4268e54be81b0a8db6eb11bc192 Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:31:12 -0600 Subject: [PATCH 6/7] Update requirements.txt changed to deep_translator --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 77177fc..1693669 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ openai-whisper -googletrans +deep_translator From ef444f97902a20bcc08cb4e3e86a95f14c4705fa Mon Sep 17 00:00:00 2001 From: Andy <43553208+adills@users.noreply.github.com> Date: Sun, 13 Oct 2024 16:34:48 -0600 Subject: [PATCH 7/7] Update cli.py switched to deep_translator --- auto_subtitle/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/auto_subtitle/cli.py b/auto_subtitle/cli.py index e88ea6f..62519b2 100644 --- a/auto_subtitle/cli.py +++ b/auto_subtitle/cli.py @@ -5,7 +5,7 @@ import warnings import tempfile from .utils import filename, str2bool, write_srt -from googletrans import Translator # We'll use googletrans for translation +from deep_translator import GoogleTranslator # We'll use for translation def main(): @@ -118,7 +118,7 @@ def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcri return subtitles_path def translate_subtitles(subtitles_path: dict, target_language: str): - translator = Translator() + translator = GoogleTranslator(source='auto', target=target_language) translated_subtitles = {} for path, srt_path in subtitles_path.items(): @@ -130,7 +130,7 @@ def translate_subtitles(subtitles_path: dict, target_language: str): translated_lines = [] for line in lines: if line.strip() and not line[0].isdigit() and '-->' not in line: - translated_line = translator.translate(line.strip(), dest=target_language).text + translated_line = translator.translate(line.strip()) translated_lines.append(translated_line + '\n') else: translated_lines.append(line)