Skip to content

Commit

Permalink
Updated triton patch, added retry and backup mechanism for genius lyr…
Browse files Browse the repository at this point in the history
…ics fetch
  • Loading branch information
beveradb committed Jul 18, 2024
1 parent 99ce129 commit 4c64e58
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 54 deletions.
22 changes: 11 additions & 11 deletions .github/removetriton.patch
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
1164d1163
1160d1159
< triton = ">=2.0.0,<3"
2067d2065
< triton = {version = "2.3.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
2163,2185d2160
2081d2079
< triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
2177,2199d2174
< name = "triton"
< version = "2.3.0"
< version = "2.3.1"
< description = "A language and compiler for custom Deep Learning operations"
< optional = false
< python-versions = "*"
< files = [
< {file = "triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ce4b8ff70c48e47274c66f269cce8861cf1dc347ceeb7a67414ca151b1822d8"},
< {file = "triton-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c3d9607f85103afdb279938fc1dd2a66e4f5999a58eb48a346bd42738f986dd"},
< {file = "triton-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218d742e67480d9581bafb73ed598416cc8a56f6316152e5562ee65e33de01c0"},
< {file = "triton-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381ec6b3dac06922d3e4099cfc943ef032893b25415de295e82b1a82b0359d2c"},
< {file = "triton-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:038e06a09c06a164fef9c48de3af1e13a63dc1ba3c792871e61a8e79720ea440"},
< {file = "triton-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d8f636e0341ac348899a47a057c3daea99ea7db31528a225a3ba4ded28ccc65"},
< {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"},
< {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"},
< {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"},
< {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"},
< {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"},
< {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"},
< ]
<
< [package.dependencies]
Expand Down
36 changes: 26 additions & 10 deletions lyrics_transcriber/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from .utils import subtitles
from typing import List, Optional
from openai import OpenAI
from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type
import requests


class LyricsTranscriber:
Expand Down Expand Up @@ -536,6 +538,16 @@ def write_spotify_lyrics_plain_text(self):
self.outputs["spotify_lyrics_text"] += line["words"].strip() + "\n"
f.write(line["words"].strip() + "\n")

@retry(
stop=stop_after_delay(120), # Stop after 2 minutes
wait=wait_exponential(multiplier=1, min=4, max=60), # Exponential backoff starting at 4 seconds
retry=retry_if_exception_type(requests.exceptions.RequestException), # Retry on request exceptions
reraise=True, # Reraise the last exception if all retries fail
)
def fetch_genius_lyrics(self, genius, title, artist):
self.logger.debug(f"fetch_genius_lyrics attempting to fetch lyrics from Genius for {title} by {artist}")
return genius.search_song(title, artist)

def write_genius_lyrics_file(self):
if self.genius_api_token and self.song_known:
self.logger.debug(f"attempting genius fetch as genius_api_token and song name was set")
Expand All @@ -556,18 +568,22 @@ def write_genius_lyrics_file(self):
self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius")
genius = lyricsgenius.Genius(self.genius_api_token, verbose=(self.log_level == logging.DEBUG))

song = genius.search_song(self.title, self.artist)
if song is None:
self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
return
lyrics = self.clean_genius_lyrics(song.lyrics)
try:
song = self.fetch_genius_lyrics(genius, self.title, self.artist)
if song is None:
self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}')
return
lyrics = self.clean_genius_lyrics(song.lyrics)

self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
f.write(lyrics)
self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}")
with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f:
f.write(lyrics)

self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
self.outputs["genius_lyrics_text"] = lyrics
self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath
self.outputs["genius_lyrics_text"] = lyrics
except requests.exceptions.RequestException as e:
self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}")
raise

def clean_genius_lyrics(self, lyrics):
lyrics = lyrics.replace("\\n", "\n")
Expand Down
53 changes: 22 additions & 31 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lyrics-transcriber"
version = "0.16.3"
version = "0.16.4"
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand Down Expand Up @@ -30,6 +30,7 @@ openai-whisper = ">=20231117"
transformers = ">=4"
auditok = ">=0.2"
whisper-timestamped = ">=1"
tenacity = ">=8"
# Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton
# from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper.
# This was the only way I was able to get a working cross-platform build published to PyPI.
Expand All @@ -47,4 +48,4 @@ lyrics-transcriber = 'lyrics_transcriber.utils.cli:main'

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
build-backend = "poetry.core.masonry.api"

0 comments on commit 4c64e58

Please sign in to comment.