From b86654fbafc3b58919c065b6db84728f73ee30a5 Mon Sep 17 00:00:00 2001
From: sigma67 <16363825+sigma67@users.noreply.github.com>
Date: Tue, 17 Dec 2024 22:12:06 +0100
Subject: [PATCH] Added Lyrics w. Timestamps #662 (#693)

* Added Lyrics w. Timestamps

Added `get_lyrics_with_timestamps` to get lyrics with timestamps.
The Method doesn't try to parse the response as normal lyrics, if no lyrics with timestamps are returned. (could be changed to do so tho, the format is the same)

* Update browsing.py

* Combined both get_lyrics methods into one and fixed some typechecking errors

* Fixed a missing hasTimestamps and added the doccomments to the overloads, because vscode didn't show them otherwise

* Removed the old get_lyrics_with_timestamps method

because idk where it came from...

* fixed remaining issues

* Update uploads.py

reverted some changes that I moved to an extra PR

* removed variable `context` from the Mixin, as it's use was replaced by `yt.as_mobile()`

* fix some formatting complaints by ruff and mypy

* please the linter

* fix union syntax

* improve typing in ytmusic.py

* improve typing

* replace docsbuild.yml with RTD

---------

Co-authored-by: Hendrik Horstmann <65970327+heinrich26@users.noreply.github.com>
Co-authored-by: henrich26 <hendrik-horstmann@o2mail.de>
---
 .github/workflows/docsbuild.yml |  33 ----------
 tests/mixins/test_browsing.py   |  20 +++++-
 ytmusicapi/__init__.py          |   2 +-
 ytmusicapi/mixins/_protocol.py  |   9 ++-
 ytmusicapi/mixins/_utils.py     |   5 +-
 ytmusicapi/mixins/browsing.py   | 109 ++++++++++++++++++++++++++------
 ytmusicapi/mixins/library.py    |  14 ++--
 ytmusicapi/mixins/uploads.py    |  12 ++--
 ytmusicapi/models/__init__.py   |   3 +
 ytmusicapi/models/lyrics.py     |  46 ++++++++++++++
 ytmusicapi/navigation.py        |  10 +++
 ytmusicapi/ytmusic.py           |  65 +++++++++++++++----
 12 files changed, 246 insertions(+), 82 deletions(-)
 delete mode 100644 .github/workflows/docsbuild.yml
 create mode 100644 ytmusicapi/models/__init__.py
 create mode 100644 ytmusicapi/models/lyrics.py

diff --git a/.github/workflows/docsbuild.yml b/.github/workflows/docsbuild.yml
deleted file mode 100644
index 371abb0f..00000000
--- a/.github/workflows/docsbuild.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Build Documentation
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - ytmusicapi/**
-      - docs/**
-  pull_request:
-    branches:
-      - main
-    paths:
-      - ytmusicapi/**
-      - docs/**
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.x'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install . sphinx sphinx-rtd-theme
-    - name: Build documentation
-      run: |
-        cd docs
-        make html
diff --git a/tests/mixins/test_browsing.py b/tests/mixins/test_browsing.py
index 83c9909c..22446525 100644
--- a/tests/mixins/test_browsing.py
+++ b/tests/mixins/test_browsing.py
@@ -6,6 +6,7 @@
 import pytest
 
 from tests.test_helpers import is_ci
+from ytmusicapi.models.lyrics import LyricLine
 
 
 class TestBrowsing:
@@ -173,9 +174,24 @@ def test_get_song_related_content(self, yt_oauth, sample_video):
 
     def test_get_lyrics(self, config, yt, sample_video):
         playlist = yt.get_watch_playlist(sample_video)
+        # test normal lyrics
         lyrics_song = yt.get_lyrics(playlist["lyrics"])
-        assert lyrics_song["lyrics"] is not None
-        assert lyrics_song["source"] is not None
+        assert lyrics_song is not None
+        assert isinstance(lyrics_song["lyrics"], str)
+        assert lyrics_song["hasTimestamps"] is False
+
+        # test lyrics with timestamps
+        lyrics_song = yt.get_lyrics(playlist["lyrics"], timestamps=True)
+        assert lyrics_song is not None
+        assert len(lyrics_song["lyrics"]) >= 1
+        assert lyrics_song["hasTimestamps"] is True
+
+        # check the LyricLine object
+        song = lyrics_song["lyrics"][0]
+        assert isinstance(song, LyricLine)
+        assert isinstance(song.text, str)
+        assert song.start_time <= song.end_time
+        assert isinstance(song.id, int)
 
         playlist = yt.get_watch_playlist(config["uploads"]["private_upload_id"])
         assert playlist["lyrics"] is None
diff --git a/ytmusicapi/__init__.py b/ytmusicapi/__init__.py
index 4ed851d0..83bc676f 100644
--- a/ytmusicapi/__init__.py
+++ b/ytmusicapi/__init__.py
@@ -9,7 +9,7 @@
     # package is not installed
     pass
 
-__copyright__ = "Copyright 2023 sigma67"
+__copyright__ = "Copyright 2024 sigma67"
 __license__ = "MIT"
 __title__ = "ytmusicapi"
 __all__ = ["YTMusic", "setup", "setup_oauth"]
diff --git a/ytmusicapi/mixins/_protocol.py b/ytmusicapi/mixins/_protocol.py
index 9b8aeb0f..0ae2b959 100644
--- a/ytmusicapi/mixins/_protocol.py
+++ b/ytmusicapi/mixins/_protocol.py
@@ -1,8 +1,11 @@
 """protocol that defines the functions available to mixins"""
 
+from collections.abc import Iterator
+from contextlib import contextmanager
 from typing import Optional, Protocol
 
 from requests import Response
+from requests.structures import CaseInsensitiveDict
 
 from ytmusicapi.auth.types import AuthType
 from ytmusicapi.parsers.i18n import Parser
@@ -26,6 +29,10 @@ def _send_request(self, endpoint: str, body: dict, additionalParams: str = "") -
     def _send_get_request(self, url: str, params: Optional[dict] = None) -> Response:
         """for sending get requests to YouTube Music"""
 
+    @contextmanager
+    def as_mobile(self) -> Iterator[None]:
+        """context-manager, that allows requests as the YouTube Music Mobile-App"""
+
     @property
-    def headers(self) -> dict[str, str]:
+    def headers(self) -> CaseInsensitiveDict[str]:
         """property for getting request headers"""
diff --git a/ytmusicapi/mixins/_utils.py b/ytmusicapi/mixins/_utils.py
index 530015fd..9a262db9 100644
--- a/ytmusicapi/mixins/_utils.py
+++ b/ytmusicapi/mixins/_utils.py
@@ -1,8 +1,11 @@
 import re
 from datetime import date
+from typing import Literal
 
 from ytmusicapi.exceptions import YTMusicUserError
 
+LibraryOrderType = Literal["a_to_z", "z_to_a", "recently_added"]
+
 
 def prepare_like_endpoint(rating):
     if rating == "LIKE":
@@ -24,7 +27,7 @@ def validate_order_parameter(order):
         )
 
 
-def prepare_order_params(order):
+def prepare_order_params(order: LibraryOrderType):
     orders = ["a_to_z", "z_to_a", "recently_added"]
     if order is not None:
         # determine order_params via `.contents.singleColumnBrowseResultsRenderer.tabs[0].tabRenderer.content.sectionListRenderer.contents[1].itemSectionRenderer.header.itemSectionTabbedHeaderRenderer.endItems[1].dropdownRenderer.entries[].dropdownItemRenderer.onSelectCommand.browseEndpoint.params` of `/youtubei/v1/browse` response
diff --git a/ytmusicapi/mixins/browsing.py b/ytmusicapi/mixins/browsing.py
index 477c1ec7..b96ce4f5 100644
--- a/ytmusicapi/mixins/browsing.py
+++ b/ytmusicapi/mixins/browsing.py
@@ -1,12 +1,13 @@
 import re
 import warnings
-from typing import Any, Optional
+from typing import Any, Optional, Union, cast, overload
 
 from ytmusicapi.continuations import (
     get_continuations,
     get_reloadable_continuation_params,
 )
 from ytmusicapi.helpers import YTM_DOMAIN, sum_total_duration
+from ytmusicapi.models.lyrics import LyricLine, Lyrics, TimedLyrics
 from ytmusicapi.parsers.albums import parse_album_header_2024
 from ytmusicapi.parsers.browsing import (
     parse_album,
@@ -276,8 +277,10 @@ def get_artist(self, channelId: str) -> dict:
         artist.update(self.parser.parse_channel_contents(results))
         return artist
 
+    ArtistOrderType = Literal["Recency", "Popularity", "Alphabetical order"]
+
     def get_artist_albums(
-        self, channelId: str, params: str, limit: Optional[int] = 100, order: Optional[str] = None
+        self, channelId: str, params: str, limit: Optional[int] = 100, order: Optional[ArtistOrderType] = None
     ) -> list[dict]:
         """
         Get the full list of an artist's albums, singles or shows
@@ -836,36 +839,100 @@ def get_song_related(self, browseId: str):
         sections = nav(response, ["contents", *SECTION_LIST])
         return parse_mixed_content(sections)
 
-    def get_lyrics(self, browseId: str) -> dict:
+    @overload
+    def get_lyrics(self, browseId: str, timestamps: Literal[False] = False) -> Optional[Lyrics]:
+        """overload for mypy only"""
+
+    @overload
+    def get_lyrics(
+        self, browseId: str, timestamps: Literal[True] = True
+    ) -> Optional[Union[Lyrics, TimedLyrics]]:
+        """overload for mypy only"""
+
+    def get_lyrics(
+        self, browseId: str, timestamps: Optional[bool] = False
+    ) -> Optional[Union[Lyrics, TimedLyrics]]:
         """
-        Returns lyrics of a song or video.
+        Returns lyrics of a song or video. When `timestamps` is set, lyrics are returned with
+        timestamps, if available.
 
-        :param browseId: Lyrics browse id obtained from ``get_watch_playlist``
-        :return: Dictionary with song lyrics.
+        :param browseId: Lyrics browseId obtained from :py:func:`get_watch_playlist` (startswith ``MPLYt...``).
+        :param timestamps: Optional. Whether to return bare lyrics or lyrics with timestamps, if available. (Default: `False`)
+        :return: Dictionary with song lyrics or ``None``, if no lyrics are found.
+            The ``hasTimestamps``-key determines the format of the data.
 
-        Example::
 
-            {
-                "lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n",
-                "source": "Source: LyricFind"
-            }
+            Example when `timestamps=False`, or no timestamps are available::
+
+                {
+                    "lyrics": "Today is gonna be the day\\nThat they're gonna throw it back to you\\n",
+                    "source": "Source: LyricFind",
+                    "hasTimestamps": False
+                }
+
+            Example when `timestamps` is set to `True` and timestamps are available::
+
+                {
+                    "lyrics": [
+                        LyricLine(
+                            text="I was a liar",
+                            start_time=9200,
+                            end_time=10630,
+                            id=1
+                        ),
+                        LyricLine(
+                            text="I gave in to the fire",
+                            start_time=10680,
+                            end_time=12540,
+                            id=2
+                        ),
+                    ],
+                    "source": "Source: LyricFind",
+                    "hasTimestamps": True
+                }
 
         """
-        lyrics = {}
         if not browseId:
             raise YTMusicUserError("Invalid browseId provided. This song might not have lyrics.")
 
-        response = self._send_request("browse", {"browseId": browseId})
-        lyrics["lyrics"] = nav(
-            response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *DESCRIPTION], True
-        )
-        lyrics["source"] = nav(
-            response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, "footer", *RUN_TEXT], True
-        )
+        if timestamps:
+            # changes and restores the client to get lyrics with timestamps (mobile only)
+            with self.as_mobile():
+                response = self._send_request("browse", {"browseId": browseId})
+        else:
+            response = self._send_request("browse", {"browseId": browseId})
+
+        # unpack the response
+        lyrics: Union[Lyrics, TimedLyrics]
+        if timestamps and (data := nav(response, TIMESTAMPED_LYRICS, True)) is not None:
+            # we got lyrics with timestamps
+            assert isinstance(data, dict)
+
+            if "timedLyricsData" not in data:  # pragma: no cover
+                return None
+
+            lyrics = TimedLyrics(
+                lyrics=list(map(LyricLine.from_raw, data["timedLyricsData"])),
+                source=data.get("sourceMessage"),
+                hasTimestamps=True,
+            )
+        else:
+            lyrics_str = nav(
+                response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *DESCRIPTION], True
+            )
+
+            if lyrics_str is None:  # pragma: no cover
+                return None
+
+            lyrics = Lyrics(
+                lyrics=lyrics_str,
+                source=nav(response, ["contents", *SECTION_LIST_ITEM, *DESCRIPTION_SHELF, *RUN_TEXT], True),
+                hasTimestamps=False,
+            )
 
-        return lyrics
+        return cast(Union[Lyrics, TimedLyrics], lyrics)
 
-    def get_basejs_url(self):
+    def get_basejs_url(self) -> str:
         """
         Extract the URL for the `base.js` script from YouTube Music.
 
diff --git a/ytmusicapi/mixins/library.py b/ytmusicapi/mixins/library.py
index c82cde26..0517c96d 100644
--- a/ytmusicapi/mixins/library.py
+++ b/ytmusicapi/mixins/library.py
@@ -46,7 +46,7 @@ def get_library_playlists(self, limit: Optional[int] = 25) -> list[dict]:
         return playlists
 
     def get_library_songs(
-        self, limit: int = 25, validate_responses: bool = False, order: Optional[str] = None
+        self, limit: int = 25, validate_responses: bool = False, order: Optional[LibraryOrderType] = None
     ) -> list[dict]:
         """
         Gets the songs in the user's library (liked videos are not included).
@@ -116,7 +116,7 @@ def get_library_songs(
 
         return songs
 
-    def get_library_albums(self, limit: int = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_albums(self, limit: int = 25, order: Optional[LibraryOrderType] = None) -> list[dict]:
         """
         Gets the albums in the user's library.
 
@@ -151,7 +151,7 @@ def get_library_albums(self, limit: int = 25, order: Optional[str] = None) -> li
             response, lambda additionalParams: self._send_request(endpoint, body, additionalParams), limit
         )
 
-    def get_library_artists(self, limit: int = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_artists(self, limit: int = 25, order: Optional[LibraryOrderType] = None) -> list[dict]:
         """
         Gets the artists of the songs in the user's library.
 
@@ -179,7 +179,9 @@ def get_library_artists(self, limit: int = 25, order: Optional[str] = None) -> l
             response, lambda additionalParams: self._send_request(endpoint, body, additionalParams), limit
         )
 
-    def get_library_subscriptions(self, limit: int = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_subscriptions(
+        self, limit: int = 25, order: Optional[LibraryOrderType] = None
+    ) -> list[dict]:
         """
         Gets the artists the user has subscribed to.
 
@@ -198,7 +200,7 @@ def get_library_subscriptions(self, limit: int = 25, order: Optional[str] = None
             response, lambda additionalParams: self._send_request(endpoint, body, additionalParams), limit
         )
 
-    def get_library_podcasts(self, limit: int = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_podcasts(self, limit: int = 25, order: Optional[LibraryOrderType] = None) -> list[dict]:
         """
         Get podcasts the user has added to the library
 
@@ -244,7 +246,7 @@ def get_library_podcasts(self, limit: int = 25, order: Optional[str] = None) ->
             response, lambda additionalParams: self._send_request(endpoint, body, additionalParams), limit
         )
 
-    def get_library_channels(self, limit: int = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_channels(self, limit: int = 25, order: Optional[LibraryOrderType] = None) -> list[dict]:
         """
         Get channels the user has added to the library
 
diff --git a/ytmusicapi/mixins/uploads.py b/ytmusicapi/mixins/uploads.py
index feafbbd8..84afe6d3 100644
--- a/ytmusicapi/mixins/uploads.py
+++ b/ytmusicapi/mixins/uploads.py
@@ -19,11 +19,13 @@
 from ..enums import ResponseStatus
 from ..exceptions import YTMusicUserError
 from ._protocol import MixinProtocol
-from ._utils import prepare_order_params, validate_order_parameter
+from ._utils import LibraryOrderType, prepare_order_params, validate_order_parameter
 
 
 class UploadsMixin(MixinProtocol):
-    def get_library_upload_songs(self, limit: Optional[int] = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_upload_songs(
+        self, limit: Optional[int] = 25, order: Optional[LibraryOrderType] = None
+    ) -> list[dict]:
         """
         Returns a list of uploaded songs
 
@@ -70,7 +72,9 @@ def get_library_upload_songs(self, limit: Optional[int] = 25, order: Optional[st
 
         return songs
 
-    def get_library_upload_albums(self, limit: Optional[int] = 25, order: Optional[str] = None) -> list[dict]:
+    def get_library_upload_albums(
+        self, limit: Optional[int] = 25, order: Optional[LibraryOrderType] = None
+    ) -> list[dict]:
         """
         Gets the albums of uploaded songs in the user's library.
 
@@ -90,7 +94,7 @@ def get_library_upload_albums(self, limit: Optional[int] = 25, order: Optional[s
         )
 
     def get_library_upload_artists(
-        self, limit: Optional[int] = 25, order: Optional[str] = None
+        self, limit: Optional[int] = 25, order: Optional[LibraryOrderType] = None
     ) -> list[dict]:
         """
         Gets the artists of uploaded songs in the user's library.
diff --git a/ytmusicapi/models/__init__.py b/ytmusicapi/models/__init__.py
new file mode 100644
index 00000000..3cc09e58
--- /dev/null
+++ b/ytmusicapi/models/__init__.py
@@ -0,0 +1,3 @@
+from .lyrics import LyricLine, Lyrics, TimedLyrics
+
+__all__ = ["LyricLine", "Lyrics", "TimedLyrics"]
diff --git a/ytmusicapi/models/lyrics.py b/ytmusicapi/models/lyrics.py
new file mode 100644
index 00000000..534c6916
--- /dev/null
+++ b/ytmusicapi/models/lyrics.py
@@ -0,0 +1,46 @@
+from dataclasses import dataclass
+from typing import Literal, Optional, TypedDict
+
+
+@dataclass
+class LyricLine:
+    """Represents a line of lyrics with timestamps (in milliseconds).
+
+    Args:
+        text (str): The Songtext.
+        start_time (int): Begin of the lyric in milliseconds.
+        end_time (int): End of the lyric in milliseconds.
+        id (int): A Metadata-Id that probably uniquely identifies each lyric line.
+    """
+
+    text: str
+    start_time: int
+    end_time: int
+    id: int
+
+    @classmethod
+    def from_raw(cls, raw_lyric: dict):
+        """
+        Converts lyrics in the format from the api to a more reasonable format
+
+        :param raw_lyric: The raw lyric-data returned by the mobile api.
+        :return LyricLine: A `LyricLine`
+        """
+        text = raw_lyric["lyricLine"]
+        cue_range = raw_lyric["cueRange"]
+        start_time = int(cue_range["startTimeMilliseconds"])
+        end_time = int(cue_range["endTimeMilliseconds"])
+        id = int(cue_range["metadata"]["id"])
+        return cls(text, start_time, end_time, id)
+
+
+class Lyrics(TypedDict):
+    lyrics: str
+    source: Optional[str]
+    hasTimestamps: Literal[False]
+
+
+class TimedLyrics(TypedDict):
+    lyrics: list[LyricLine]
+    source: Optional[str]
+    hasTimestamps: Literal[True]
diff --git a/ytmusicapi/navigation.py b/ytmusicapi/navigation.py
index 7bf89fa3..5ab6d7c8 100644
--- a/ytmusicapi/navigation.py
+++ b/ytmusicapi/navigation.py
@@ -88,6 +88,16 @@
 CAROUSEL_TITLE = [*HEADER, "musicCarouselShelfBasicHeaderRenderer", *TITLE]
 CARD_SHELF_TITLE = [*HEADER, "musicCardShelfHeaderBasicRenderer", *TITLE_TEXT]
 FRAMEWORK_MUTATIONS = ["frameworkUpdates", "entityBatchUpdate", "mutations"]
+TIMESTAMPED_LYRICS = [
+    "contents",
+    "elementRenderer",
+    "newElement",
+    "type",
+    "componentType",
+    "model",
+    "timedLyricsModel",
+    "lyricsData",
+]
 
 
 @overload
diff --git a/ytmusicapi/ytmusic.py b/ytmusicapi/ytmusic.py
index 9847da68..91339812 100644
--- a/ytmusicapi/ytmusic.py
+++ b/ytmusicapi/ytmusic.py
@@ -2,7 +2,8 @@
 import json
 import locale
 import time
-from contextlib import suppress
+from collections.abc import Iterator
+from contextlib import contextmanager, suppress
 from functools import partial
 from pathlib import Path
 from typing import Optional, Union
@@ -89,8 +90,10 @@ def __init__(
             used for authentication flow.
         """
 
-        self._base_headers = None  #: for authless initializing requests during OAuth flow
-        self._headers = None  #: cache formed headers including auth
+        self._base_headers: Optional[CaseInsensitiveDict] = (
+            None  #: for authless initializing requests during OAuth flow
+        )
+        self._headers: Optional[CaseInsensitiveDict] = None  #: cache formed headers including auth
 
         self.auth = auth  #: raw auth
         self._input_dict: CaseInsensitiveDict = (
@@ -184,24 +187,26 @@ def __init__(
                 raise YTMusicUserError("Your cookie is missing the required value __Secure-3PAPISID")
 
     @property
-    def base_headers(self):
+    def base_headers(self) -> CaseInsensitiveDict:
         if not self._base_headers:
             if self.auth_type == AuthType.BROWSER or self.auth_type == AuthType.OAUTH_CUSTOM_FULL:
                 self._base_headers = self._input_dict
             else:
-                self._base_headers = {
-                    "user-agent": USER_AGENT,
-                    "accept": "*/*",
-                    "accept-encoding": "gzip, deflate",
-                    "content-type": "application/json",
-                    "content-encoding": "gzip",
-                    "origin": YTM_DOMAIN,
-                }
+                self._base_headers = CaseInsensitiveDict(
+                    {
+                        "user-agent": USER_AGENT,
+                        "accept": "*/*",
+                        "accept-encoding": "gzip, deflate",
+                        "content-type": "application/json",
+                        "content-encoding": "gzip",
+                        "origin": YTM_DOMAIN,
+                    }
+                )
 
         return self._base_headers
 
     @property
-    def headers(self):
+    def headers(self) -> CaseInsensitiveDict:
         # set on first use
         if not self._headers:
             self._headers = self.base_headers
@@ -218,6 +223,40 @@ def headers(self):
 
         return self._headers
 
+    @contextmanager
+    def as_mobile(self) -> Iterator[None]:
+        """
+        Not thread-safe!
+        ----------------
+
+        Temporarily changes the `context` to enable different results
+        from the API, meant for the Android mobile-app.
+        All calls inside the `with`-statement with emulate mobile behavior.
+
+        This context-manager has no `enter_result`, as it operates in-place
+        and only temporarily alters the underlying `YTMusic`-object.
+
+
+        Example::
+
+            with yt.as_mobile():
+                yt._send_request(...)  # results as mobile-app
+
+            yt._send_request(...)  # back to normal, like web-app
+
+        """
+
+        # change the context to emulate a mobile-app (Android)
+        copied_context_client = self.context["context"]["client"].copy()
+        self.context["context"]["client"].update({"clientName": "ANDROID_MUSIC", "clientVersion": "7.21.50"})
+
+        # this will not catch errors
+        try:
+            yield None
+        finally:
+            # safely restore the old context
+            self.context["context"]["client"] = copied_context_client
+
     def _send_request(self, endpoint: str, body: dict, additionalParams: str = "") -> dict:
         body.update(self.context)