Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added subtitle translation into other languages #103

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ Adding `--task translate` will translate the subtitles into English:

auto_subtitle /path/to/video.mp4 --task translate

Adding '--language_out es' will translate the subtitles into Spanish. Change 'es' to any other language.

auto_subtitle /path/to/video.mp4 --language_out es

Run the following to view all available options:

auto_subtitle --help
Expand Down
32 changes: 32 additions & 0 deletions auto_subtitle/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import warnings
import tempfile
from .utils import filename, str2bool, write_srt
from deep_translator import GoogleTranslator # We'll use for translation


def main():
Expand All @@ -27,13 +28,16 @@ def main():
"transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
parser.add_argument("--language", type=str, default="auto", choices=["auto","af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","zh"],
help="What is the origin language of the video? If unset, it is detected automatically.")
parser.add_argument("--language_out", type=str, default=None,
help="The target language for translation. If not set, no translation will be performed.")

args = parser.parse_args().__dict__
model_name: str = args.pop("model")
output_dir: str = args.pop("output_dir")
output_srt: bool = args.pop("output_srt")
srt_only: bool = args.pop("srt_only")
language: str = args.pop("language")
language_out: str = args.pop("language_out")

os.makedirs(output_dir, exist_ok=True)

Expand All @@ -51,6 +55,9 @@ def main():
audios, output_srt or srt_only, output_dir, lambda audio_path: model.transcribe(audio_path, **args)
)

if language_out:
subtitles = translate_subtitles(subtitles, language_out)

if srt_only:
return

Expand Down Expand Up @@ -110,6 +117,31 @@ def get_subtitles(audio_paths: list, output_srt: bool, output_dir: str, transcri

return subtitles_path

def translate_subtitles(subtitles_path: dict, target_language: str):
translator = GoogleTranslator(source='auto', target=target_language)
translated_subtitles = {}

for path, srt_path in subtitles_path.items():
print(f"Translating subtitles for {filename(path)} to {target_language}...")

with open(srt_path, 'r', encoding='utf-8') as file:
lines = file.readlines()

translated_lines = []
for line in lines:
if line.strip() and not line[0].isdigit() and '-->' not in line:
translated_line = translator.translate(line.strip())
translated_lines.append(translated_line + '\n')
else:
translated_lines.append(line)

translated_srt_path = srt_path.replace('.srt', f'_{target_language}.srt')
with open(translated_srt_path, 'w', encoding='utf-8') as file:
file.writelines(translated_lines)

translated_subtitles[path] = translated_srt_path

return translated_subtitles

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
openai-whisper
deep_translator
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
author="Miguel Piedrafita",
install_requires=[
'openai-whisper',
'deep_translator',
],
description="Automatically generate and embed subtitles into your videos",
entry_points={
Expand Down