Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various search improvements #375

Merged
merged 6 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, 3.10, 3.11, 3.12]
SathyaBhat marked this conversation as resolved.
Show resolved Hide resolved

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ yt-dlp>=2023.3.4
spotipy~=2.21
mutagen~=1.45
rich~=12.0
urllib3~=1.26
urllib3~=1.26
ytmusicapi~=1.6.0
Levenshtein~=0.25.1
5 changes: 3 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
setup(
name="spotify_dl",
version=VERSION,
python_requires=">=3.7",
python_requires=">=3.8",
install_requires=requirements,
author="Sathya Bhat",
author_email="[email protected]",
Expand All @@ -36,10 +36,11 @@
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Internet",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
Expand Down
17 changes: 17 additions & 0 deletions spotify_dl/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
import Levenshtein


def get_closest_match(results, expected) -> str:
"""
Returns closest matching result based on Levenshtein edit distance.
"""
best_r = ""
min_distance = float('inf')
for r in results:
curr_distance = Levenshtein.distance(r, expected)
if (curr_distance < min_distance):
min_distance = curr_distance
best_r = r
return best_r


def sanitize(name, replace_with=""):
"""
Removes some of the reserved characters from the name so it can be saved
Expand Down
34 changes: 27 additions & 7 deletions spotify_dl/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import mutagen
import csv
import yt_dlp
import ytmusicapi
from mutagen.easyid3 import EasyID3
from mutagen.id3 import APIC, ID3
from mutagen.mp3 import MP3
from spotify_dl.scaffold import log
from spotify_dl.utils import sanitize
from spotify_dl.utils import sanitize, get_closest_match
from spotify_dl.constants import DOWNLOAD_LIST


Expand All @@ -33,16 +34,26 @@ def dump_json(songs):
:param songs: the songs for which the JSON should be output
"""
for song in songs:
query = f"{song.get('artist')} - {song.get('name')} Lyrics".replace(
query = f"{song.get('artist')} - {song.get('name')}".replace(
":", ""
).replace('"', "")

ydl_opts = {"quiet": True}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
ytJson = ydl.extract_info("ytsearch:" + query, False)
print(json.dumps(ytJson.get("entries")))
ytJson = {}
with ytmusicapi.YTMusic() as ym:
# Reduce results to array of titles and video IDs
result_titles, result_ids = zip(*map(
lambda d: (f"{d['artists'][0]['name']} - {d['title']}".replace(":", "").replace('"', ""), d["videoId"]),
ym.search(query, filter="songs")
))
# Get ID of closest matching result by finding index in titles list
videoId = result_ids[result_titles.index(get_closest_match(result_titles, query))]

ytJson = ydl.extract_info(f"https://music.youtube.com/watch?v={videoId}", False)
print(json.dumps([ytJson])) # insert into array so that the format stays the same
except Exception as e: # skipcq: PYL-W0703
log.debug(e)
print(
Expand Down Expand Up @@ -144,7 +155,7 @@ def set_tags(temp, filename, kwargs):
def find_and_download_songs(kwargs):
"""
function handles actual download of the songs
the youtube_search lib is used to search for songs and get best url
the ytmusicapi lib is used to search for songs and get best url via YT Music
:param kwargs: dictionary of key value arguments to be used in download
"""
sponsorblock_postprocessor = []
Expand All @@ -160,7 +171,7 @@ def find_and_download_songs(kwargs):
int(temp[-1].replace("\n", "")),
)

query = f"{artist} - {name} Lyrics".replace(":", "").replace('"', "")
query = f"{artist} - {name}".replace(":", "").replace('"', "")
print(f"Initiating download for {query}.")

file_name = kwargs["file_name_f"](
Expand Down Expand Up @@ -200,6 +211,15 @@ def find_and_download_songs(kwargs):
print(f"File {mp3file_path} already exists, we do not overwrite it ")
continue

with ytmusicapi.YTMusic() as ym:
# Reduce search results to array of titles and video IDs
result_titles, result_ids = zip(*map(
lambda d: (f"{d['artists'][0]['name']} - {d['title']}".replace(":", "").replace('"', ""), d["videoId"]),
ym.search(query, filter="songs")
))
# Get ID of closest matching result by finding index in titles list
video_id = result_ids[result_titles.index(get_closest_match(result_titles, query))]

outtmpl = f"{file_path}.%(ext)s"
ydl_opts = {
"proxy": kwargs.get("proxy"),
Expand Down Expand Up @@ -227,7 +247,7 @@ def find_and_download_songs(kwargs):
ydl_opts["postprocessors"].append(mp3_postprocess_opts.copy())
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
try:
ydl.download([query])
ydl.download([f"https://music.youtube.com/watch?v={video_id}"])
except Exception as e: # skipcq: PYL-W0703
log.debug(e)
print(f"Failed to download {name}, make sure yt_dlp is up to date")
Expand Down
Loading