diff --git a/beetsplug/lyrics.py b/beetsplug/lyrics.py index 64657db7b7..147569c627 100644 --- a/beetsplug/lyrics.py +++ b/beetsplug/lyrics.py @@ -26,7 +26,11 @@ import unicodedata import urllib import warnings -from typing import TYPE_CHECKING +from contextlib import suppress +from dataclasses import dataclass +from functools import cached_property, partial, total_ordering +from http import HTTPStatus +from typing import TYPE_CHECKING, Iterable, Iterator import requests from typing_extensions import TypedDict @@ -112,6 +116,10 @@ """ +class NotFoundError(requests.exceptions.HTTPError): + pass + + # Utilities. @@ -310,8 +318,74 @@ class LRCLibItem(TypedDict): syncedLyrics: str | None +@dataclass +@total_ordering +class LRCLyrics: + #: Percentage tolerance for max duration difference between lyrics and item. + DURATION_DIFF_TOLERANCE = 0.05 + + target_duration: float + duration: float + instrumental: bool + plain: str + synced: str | None + + def __le__(self, other: LRCLyrics) -> bool: + """Compare two lyrics items by their score.""" + return self.dist < other.dist + + @classmethod + def make(cls, candidate: LRCLibItem, target_duration: float) -> LRCLyrics: + return cls( + target_duration, + candidate["duration"], + candidate["instrumental"], + candidate["plainLyrics"], + candidate["syncedLyrics"], + ) + + @cached_property + def duration_dist(self) -> float: + """Return the absolute difference between lyrics and target duration.""" + return abs(self.duration - self.target_duration) + + @cached_property + def is_valid(self) -> bool: + """Return whether the lyrics item is valid. + Lyrics duration must be within the tolerance defined by + :attr:`DURATION_DIFF_TOLERANCE`. + """ + return ( + self.duration_dist + <= self.target_duration * self.DURATION_DIFF_TOLERANCE + ) + + @cached_property + def dist(self) -> tuple[float, bool]: + """Distance/score of the given lyrics item. + + Return a tuple with the following values: + 1. Absolute difference between lyrics and target duration + 2. Boolean telling whether synced lyrics are available. + + Best lyrics match is the one that has the closest duration to + ``target_duration`` and has synced lyrics available. + """ + return self.duration_dist, not self.synced + + def get_text(self, want_synced: bool) -> str: + if self.instrumental: + return INSTRUMENTAL_LYRICS + + return self.synced if want_synced and self.synced else self.plain + + class LRCLib(Backend): - base_url = "https://lrclib.net/api/search" + """Fetch lyrics from the LRCLib API.""" + + BASE_URL = "https://lrclib.net/api" + GET_URL = f"{BASE_URL}/get" + SEARCH_URL = f"{BASE_URL}/search" def warn(self, message: str, *args) -> None: """Log a warning message with the class name.""" @@ -322,65 +396,51 @@ def fetch_json(self, *args, **kwargs): kwargs.setdefault("timeout", 10) kwargs.setdefault("headers", {"User-Agent": USER_AGENT}) r = requests.get(*args, **kwargs) + if r.status_code == HTTPStatus.NOT_FOUND: + raise NotFoundError("HTTP Error: Not Found", response=r) r.raise_for_status() return r.json() - @staticmethod - def get_rank( - target_duration: float, item: LRCLibItem - ) -> tuple[float, bool]: - """Rank the given lyrics item. + def fetch_candidates( + self, artist: str, title: str, album: str + ) -> Iterator[list[LRCLibItem]]: + """Yield lyrics candidates for the given song data. - Return a tuple with the following values: - 1. Absolute difference between lyrics and target duration - 2. Boolean telling whether synced lyrics are available. - """ - return ( - abs(item["duration"] - target_duration), - not item["syncedLyrics"], - ) + Firstly, attempt to GET lyrics directly, and then search the API if + lyrics are not found or the duration does not match. - @classmethod - def pick_lyrics( - cls, target_duration: float, data: list[LRCLibItem] - ) -> LRCLibItem: - """Return best matching lyrics item from the given list. + Return an iterator over lists of candidates. + """ + base_params = {"artist_name": artist, "track_name": title} + get_params = {**base_params, "album_name": album} + with suppress(NotFoundError): + yield [self.fetch_json(self.GET_URL, params=get_params)] - Best lyrics match is the one that has the closest duration to - ``target_duration`` and has synced lyrics available. + yield self.fetch_json(self.SEARCH_URL, params=base_params) - Note that the incoming list is guaranteed to be non-empty. - """ - return min(data, key=lambda item: cls.get_rank(target_duration, item)) + @classmethod + def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None: + """Return best matching lyrics item from the given list.""" + return min((li for li in lyrics if li.is_valid), default=None) def fetch( self, artist: str, title: str, album: str, length: float ) -> str | None: - """Fetch lyrics for the given artist, title, and album.""" - params = { - "artist_name": artist, - "track_name": title, - "album_name": album, - } - + """Fetch lyrics text for the given song data.""" + fetch = partial(self.fetch_candidates, artist, title, album) + make = partial(LRCLyrics.make, target_duration=length) + pick = self.pick_best_match try: - data = self.fetch_json(self.base_url, params=params) + return next( + filter(None, map(pick, (map(make, x) for x in fetch()))) + ).get_text(self.config["synced"]) + except StopIteration: + pass except requests.JSONDecodeError: self.warn("Could not decode response JSON data") except requests.RequestException as exc: self.warn("Request error: {}", exc) - else: - if data: - item = self.pick_lyrics(length, data) - - if item["instrumental"]: - return INSTRUMENTAL_LYRICS - - if self.config["synced"] and (synced := item["syncedLyrics"]): - return synced - - return item["plainLyrics"] return None diff --git a/docs/changelog.rst b/docs/changelog.rst index 8327abbfa4..d58151abcf 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -51,11 +51,12 @@ Bug fixes: * :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the artist or title is missing and ignore ``artist_sort`` value if it is empty. :bug:`2635` -* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. Instead of - attempting to fetch lyrics for a specific album, artist, title and duration - combination, the plugin now performs a search which yields many results. - Update the default ``sources`` configuration to prioritize ``lrclib`` over - other sources since it returns reliable results quicker than others. +* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. If we + cannot find lyrics for a specific album, artist, title combination, the + plugin now tries to search for the artist and title and picks the most + relevant result. Update the default ``sources`` configuration to prioritize + ``lrclib`` over other sources since it returns reliable results quicker than + others. :bug:`5102` For packagers: diff --git a/test/plugins/test_lyrics.py b/test/plugins/test_lyrics.py index 9fa455464b..7fe7a60400 100644 --- a/test/plugins/test_lyrics.py +++ b/test/plugins/test_lyrics.py @@ -207,7 +207,7 @@ def test_backend_source(self, backend): """ title = "Lady Madonna" - res = backend.fetch("The Beatles", title, "", 0.0) + res = backend.fetch("The Beatles", title, "", 180.0) assert res assert PHRASE_BY_TITLE[title] in res.lower() @@ -423,7 +423,8 @@ def request_kwargs(self, response_data): @pytest.fixture def fetch_lyrics(self, backend, requests_mock, request_kwargs): - requests_mock.get(backend.base_url, **request_kwargs) + requests_mock.get(backend.GET_URL, status_code=HTTPStatus.NOT_FOUND) + requests_mock.get(backend.SEARCH_URL, **request_kwargs) return partial(backend.fetch, "la", "la", "la", self.ITEM_DURATION) @@ -440,6 +441,7 @@ def test_synced_config_option(self, fetch_lyrics, expected_lyrics): [ _p([], None, id="handle non-matching lyrics"), _p([lyrics_match()], "synced", id="synced when available"), + _p([lyrics_match(duration=1)], None, id="none: duration too short"), _p( [lyrics_match(instrumental=True)], "[Instrumental]",