Skip to content

Commit

Permalink
Add subtitles translation using EasyNMT and OpusMT libraries
Browse files Browse the repository at this point in the history
  • Loading branch information
Sirozha1337 committed Jan 30, 2024
1 parent 1c0cdb6 commit 8f9d069
Show file tree
Hide file tree
Showing 15 changed files with 324 additions and 32 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pylint
pip install wheel
pip install -r requirements.txt
- name: Analysing the code with pylint
run: |
Expand Down
23 changes: 23 additions & 0 deletions .github/workflows/setup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Setup

on: [push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install application
run: |
pip install wheel
pip install -e .
- name: Check that package was installed successfully
run: |
faster_auto_subtitle -h
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ This repository uses `ffmpeg` and [OpenAI's Whisper](https://openai.com/blog/whi

## Installation

To get started, you'll need Python 3.7 or newer. Install the binary by running the following command:
To get started, you'll need Python 3.9 or newer. Install the binary by running the following command:

pip install wheel

pip install git+https://github.com/Sirozha1337/faster-auto-subtitle.git@dev

Expand Down Expand Up @@ -37,6 +39,12 @@ Adding `--task translate` will translate the subtitles into English:

faster_auto_subtitle /path/to/video.mp4 --task translate

Adding `--target_language {2-letter-language-code}` will translate the subtitles into specified language using [Opus-MT](https://github.com/Helsinki-NLP/Opus-MT):

faster_auto_subtitle /path/to/video.mp4 --target_language fr

This will require downloading the appropriate model. If direct translation is not available it will attempt translation from source to english and from english to source.

Run the following to view all available options:

faster_auto_subtitle --help
Expand All @@ -49,7 +57,7 @@ Higher `beam_size` usually leads to greater accuracy, but slows down the process

Setting higher `no_speech_threshold` could be useful for videos with a lot of background noise to stop Whisper from "hallucinating" subtitles for it.

In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.
In my experience settings option `condition_on_previous_text` to `False` dramatically increases accurracy for videos like TV Shows with an intro song at the start.

You can use `sample_interval` parameter to generate subtitles for a portion of the video to play around with those parameters:

Expand Down
7 changes: 6 additions & 1 deletion auto_subtitle/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,16 @@ def main():
parser.add_argument("--task", type=str, default="transcribe",
choices=["transcribe", "translate"],
help="whether to perform X->X speech recognition ('transcribe') \
or X->English translation ('translate')")
or X->Language translation ('translate')")
parser.add_argument("--language", type=str, default="auto",
choices=LANGUAGE_CODES,
help="What is the origin language of the video? \
If unset, it is detected automatically.")
parser.add_argument("--target_language", type=str, default="en",
choices=LANGUAGE_CODES,
help="Desired language to translate subtitles to. \
If language is not en, Opus-MT will be used. \
See https://github.com/Helsinki-NLP/Opus-MT.")

args = parser.parse_args().__dict__

Expand Down
78 changes: 60 additions & 18 deletions auto_subtitle/main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os
import warnings
import tempfile
from .utils.files import filename, write_srt
from .utils.ffmpeg import get_audio, overlay_subtitles
from .utils.whisper import WhisperAI
from .translation.easynmt_utils import EasyNMTWrapper


def process(args: dict):
Expand All @@ -12,52 +12,94 @@ def process(args: dict):
output_srt: bool = args.pop("output_srt")
srt_only: bool = args.pop("srt_only")
language: str = args.pop("language")
sample_interval: str = args.pop("sample_interval")
sample_interval: list = args.pop("sample_interval")
target_language: str = args.pop("target_language")

os.makedirs(output_dir, exist_ok=True)

if model_name.endswith(".en"):
warnings.warn(
f"{model_name} is an English-only model, forcing English detection.")
args["language"] = "en"
language = "en"
# if translate task used and language argument is set, then use it
elif language != "auto":
args["language"] = language

if target_language != 'en':
warnings.warn(
f"{target_language} is not English, Opus-MT will be used to perform translation.")
args['task'] = 'transcribe'

audios = get_audio(args.pop("video"), args.pop(
'audio_channel'), sample_interval)

model_args = {}
model_args["model_size_or_path"] = model_name
model_args["device"] = args.pop("device")
model_args["compute_type"] = args.pop("compute_type")
model_args = {
"model_size_or_path": model_name,
"device": args.pop("device"),
"compute_type": args.pop("compute_type")
}

subtitles = get_subtitles(audios, model_args, args)
print('Subtitles generated.')

if target_language != 'en':
print('Translating subtitles... This might take a while.')
subtitles = translate_subtitles(
subtitles, language, target_language, model_args)

srt_output_dir = output_dir if output_srt or srt_only else tempfile.gettempdir()
subtitles = get_subtitles(audios, srt_output_dir, model_args, args)
if output_srt or srt_only:
print('Saving subtitle files...')
save_subtitles(subtitles, output_dir)

if srt_only:
return

overlay_subtitles(subtitles, output_dir, sample_interval)


def get_subtitles(audio_paths: list, output_dir: str,
model_args: dict, transcribe_args: dict):
def translate_subtitles(subtitles: dict, source_lang: str, target_lang: str, model_args: dict):
model = EasyNMTWrapper(device=model_args['device'])

translated_subtitles = {}
for key, subtitle in subtitles.items():
src_lang = source_lang
if src_lang == '' or src_lang is None:
src_lang = subtitle['language']

translated_segments = model.translate(
subtitle['segments'], src_lang, target_lang)

translated_subtitle = subtitle.copy()
translated_subtitle['segments'] = translated_segments
translated_subtitles[key] = translated_subtitle

return translated_subtitles


def save_subtitles(subtitles: dict, output_dir: str):
for path, subtitle in subtitles.items():
subtitle["output_path"] = os.path.join(
output_dir, f"{filename(path)}.srt")

print(f'Saving to path {subtitle["output_path"]}')
with open(subtitle['output_path'], "w", encoding="utf-8") as srt:
write_srt(subtitle['segments'], file=srt)


def get_subtitles(audio_paths: dict, model_args: dict, transcribe_args: dict):
model = WhisperAI(model_args, transcribe_args)

subtitles_path = {}
subtitles = {}

for path, audio_path in audio_paths.items():
print(
f"Generating subtitles for {filename(path)}... This might take a while."
)
srt_path = os.path.join(output_dir, f"{filename(path)}.srt")

segments = model.transcribe(audio_path)

with open(srt_path, "w", encoding="utf-8") as srt:
write_srt(segments, file=srt)
segments, info = model.transcribe(audio_path)

subtitles_path[path] = srt_path
subtitles[path] = {'segments': list(
segments), 'language': info.language}

return subtitles_path
return subtitles
Empty file.
24 changes: 24 additions & 0 deletions auto_subtitle/translation/easynmt_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from easynmt import EasyNMT
from faster_whisper.transcribe import Segment
from .opusmt_utils import OpusMT


class EasyNMTWrapper:
def __init__(self, device):
self.translator = OpusMT()
self.model = EasyNMT('opus-mt',
translator=self.translator,
device=device if device != 'auto' else None)

def translate(self, segments: list[Segment], source_lang: str, target_lang: str):
source_text = [segment.text for segment in segments]
self.translator.load_available_models()

translated_text = self.model.translate(source_text, target_lang,
source_lang, show_progress_bar=True)
translated_segments = [None] * len(segments)
for index, segment in enumerate(segments):
translated_segments[index] = segment._replace(
text=translated_text[index])

return translated_segments
20 changes: 20 additions & 0 deletions auto_subtitle/translation/languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import langcodes
from transformers.models.marian.convert_marian_tatoeba_to_pytorch import GROUP_MEMBERS


def to_alpha2_languages(languages):
return set(item for sublist in [__to_alpha2_language(language) for language in languages] for item in sublist)


def __to_alpha2_language(language):
if len(language) == 2:
return [language]

if language in GROUP_MEMBERS:
return set([langcodes.Language.get(x).language for x in GROUP_MEMBERS[language][1]])

return [langcodes.Language.get(language).language]


def to_alpha3_language(language):
return langcodes.Language.get(language).to_alpha3()
Loading

0 comments on commit 8f9d069

Please sign in to comment.