-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add subtitles translation using EasyNMT and OpusMT libraries
- Loading branch information
1 parent
1c0cdb6
commit 8f9d069
Showing
15 changed files
with
324 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
name: Setup | ||
|
||
on: [push] | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python-version: ["3.9"] | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install application | ||
run: | | ||
pip install wheel | ||
pip install -e . | ||
- name: Check that package was installed successfully | ||
run: | | ||
faster_auto_subtitle -h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from easynmt import EasyNMT | ||
from faster_whisper.transcribe import Segment | ||
from .opusmt_utils import OpusMT | ||
|
||
|
||
class EasyNMTWrapper: | ||
def __init__(self, device): | ||
self.translator = OpusMT() | ||
self.model = EasyNMT('opus-mt', | ||
translator=self.translator, | ||
device=device if device != 'auto' else None) | ||
|
||
def translate(self, segments: list[Segment], source_lang: str, target_lang: str): | ||
source_text = [segment.text for segment in segments] | ||
self.translator.load_available_models() | ||
|
||
translated_text = self.model.translate(source_text, target_lang, | ||
source_lang, show_progress_bar=True) | ||
translated_segments = [None] * len(segments) | ||
for index, segment in enumerate(segments): | ||
translated_segments[index] = segment._replace( | ||
text=translated_text[index]) | ||
|
||
return translated_segments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import langcodes | ||
from transformers.models.marian.convert_marian_tatoeba_to_pytorch import GROUP_MEMBERS | ||
|
||
|
||
def to_alpha2_languages(languages): | ||
return set(item for sublist in [__to_alpha2_language(language) for language in languages] for item in sublist) | ||
|
||
|
||
def __to_alpha2_language(language): | ||
if len(language) == 2: | ||
return [language] | ||
|
||
if language in GROUP_MEMBERS: | ||
return set([langcodes.Language.get(x).language for x in GROUP_MEMBERS[language][1]]) | ||
|
||
return [langcodes.Language.get(language).language] | ||
|
||
|
||
def to_alpha3_language(language): | ||
return langcodes.Language.get(language).to_alpha3() |
Oops, something went wrong.