From 60e8b04e62eee84b64282586d8d48dcd47d22715 Mon Sep 17 00:00:00 2001 From: cyberboysumanjay Date: Tue, 9 Jun 2020 21:45:13 +0530 Subject: [PATCH 1/5] New Schema Test --- app.py | 82 +++++++++++++-------------- saavn.py | 165 ++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 175 insertions(+), 72 deletions(-) diff --git a/app.py b/app.py index f8a3d50..7b3ced8 100644 --- a/app.py +++ b/app.py @@ -17,74 +17,76 @@ def home(): @app.route('/result/', methods=['GET', 'POST']) def result(): - data = '' lyrics = False + false = False + true = True query = request.args.get('query') lyrics_ = request.args.get('lyrics') if lyrics_ and lyrics_.lower()!='false': lyrics = True - if not query.startswith('https://www.jiosaavn.com'): - query = "https://www.jiosaavn.com/search/"+query - + if 'saavn' not in query: + return jsonify(saavn.search_from_query(query)) + print("Checking Lyrics Tag:",lyrics) try: - print("Query received: ", query) if '/song/' in query: print("Song") - song = saavn.get_songs(query)[0] - song['image_url'] = saavn.fix_image_url(song['image_url']) - song['title'] = saavn.fix_title(song['title']) - song['url'] = saavn.decrypt_url(song['url']) - song['album'] = saavn.fix_title(song['album']) + song = saavn.get_song_id(query) + song = (saavn.search_from_song_id(song)) if lyrics: - song['lyrics'] = saavn.get_lyrics(query) + if song['has_lyrics']: + song['lyrics'] = saavn.get_lyrics(song['perma_url']) + else: + song['lyrics'] = None + song['status'] = True + song['media_url'] = saavn.check_media_url(song['media_url']) return jsonify(song) - elif '/search/' in query: - print("Text Query Detected") - songs = saavn.get_songs(query) - for song in songs: - song['image_url'] = saavn.fix_image_url(song['image_url']) - song['title'] = saavn.fix_title(song['title']) - song['url'] = saavn.decrypt_url(song['url']) - song['album'] = saavn.fix_title(song['album']) - if lyrics: - song['lyrics'] = saavn.get_lyrics(song['tiny_url']) - return jsonify(songs) + ''' + elif '/search/' in query: + songs = saavn.get_songs(query) + for song in songs: + song['image_url'] = saavn.fix_image_url(song['image_url']) + song['title'] = saavn.fix_title(song['title']) + song['url'] = saavn.decrypt_url(song['url']) + song['album'] = saavn.fix_title(song['album']) + if lyrics: + song['lyrics'] = saavn.get_lyrics(song['tiny_url']) + return jsonify(songs) + ''' elif '/album/' in query: print("Album") id = saavn.AlbumId(query) songs = saavn.getAlbum(id) - for song in songs["songs"]: - song['image'] = saavn.fix_image_url(song['image']) - song['song'] = saavn.fix_title(song['song']) - song['album'] = saavn.fix_title(song['album']) + for song in songs['songs']: + song['media_url'] = saavn.check_media_url(song['media_url']) if lyrics: - song['lyrics'] = saavn.get_lyrics(song['perma_url']) - song['encrypted_media_path'] = saavn.decrypt_url( - song['encrypted_media_path']) + if song['has_lyrics']: + song['lyrics'] = saavn.get_lyrics(song['perma_url']) + else: + song['lyrics'] = None + songs['status'] = True return jsonify(songs) elif '/playlist/' or '/featured/' in query: print("Playlist") id = saavn.getListId(query) songs = saavn.getPlayList(id) for song in songs['songs']: - song['image'] = saavn.fix_image_url(song['image']) - song['song'] = saavn.fix_title(song['song']) + song['media_url'] = saavn.check_media_url(song['media_url']) if lyrics: - song['lyrics'] = saavn.get_lyrics(song['perma_url']) - song['encrypted_media_path'] = saavn.decrypt_url( - song['encrypted_media_path']) + if song['has_lyrics']: + song['lyrics'] = saavn.get_lyrics(song['perma_url']) + else: + song['lyrics'] = None + songs['status'] = True return jsonify(songs) - raise AssertionError except Exception as e: - errors = [] print_exc() error = { - "status": str(e) + "status": True, + "error":str(e) } - errors.append(error) - return jsonify(errors) - return data + return jsonify(error) + return None if __name__ == '__main__': diff --git a/saavn.py b/saavn.py index c091af1..21cb481 100644 --- a/saavn.py +++ b/saavn.py @@ -14,23 +14,93 @@ des_cipher = des(b"38346591", ECB, b"\0\0\0\0\0\0\0\0", pad=None, padmode=PAD_PKCS5) -base_url = 'http://h.saavncdn.com' +base_url = 'https://h.saavncdn.com' json_decoder = json.JSONDecoder() headers = { 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0' } -def get_songs(query): - if not query.startswith('https://www.jiosaavn.com'): - url = "https://www.jiosaavn.com/search/"+query - else: - url = query +def search_from_song_id(song_id): + song_base_url = "https://www.jiosaavn.com/api.php?cc=in&_marker=0%3F_marker%3D0&_format=json&model=Redmi_5A&__call=song.getDetails&pids=" + \ + str(song_id) + song_response = requests.get(song_base_url) + songs_json = list(filter(lambda x: x.startswith( + "{"), song_response.text.splitlines()))[0] + songs_json = json.loads(songs_json) + try: + songs_json[song_id]['media_url'] = generate_media_url( + songs_json[song_id]['media_preview_url']) + except KeyError: + songs_json[song_id]['media_url'] = decrypt_url( + songs_json[song_id]['encrypted_media_url']) + songs_json[song_id]['image'] = fix_image_url(songs_json[song_id]['image']) + songs_json[song_id]['song'] = fix_title(songs_json[song_id]['song']) + songs_json[song_id]['album'] = fix_title(songs_json[song_id]['album']) + return songs_json[song_id] + + +def search_from_query(query): + base_url = f"https://www.saavn.com/api.php?__call=autocomplete.get&_marker=0&query={query}&ctx=android&_format=json&_marker=0" + response = requests.get(base_url) + songs_json = list(filter(lambda x: x.startswith( + "{"), response.text.splitlines()))[0] + songs_json = json.loads(songs_json) + songs_data = songs_json['songs']['data'] + songs = [] + for song in songs_data: + song_id = song['id'] + song_base_url = "https://www.jiosaavn.com/api.php?cc=in&_marker=0%3F_marker%3D0&_format=json&model=Redmi_5A&__call=song.getDetails&pids="+song_id + song_response = requests.get(song_base_url) + songs_json = list(filter(lambda x: x.startswith( + "{"), song_response.text.splitlines()))[0] + songs_json = json.loads(songs_json) + try: + songs_json[song_id]['media_url'] = generate_media_url( + songs_json[song_id]['media_preview_url']) + except KeyError: + songs_json[song_id]['media_url'] = decrypt_url( + songs_json[song_id]['media_url']) + songs_json[song_id]['image'] = fix_image_url( + songs_json[song_id]['image']) + songs_json[song_id]['song'] = fix_title(songs_json[song_id]['song']) + + songs_json[song_id]['album'] = fix_title(songs_json[song_id]['album']) + songs.append(songs_json[song_id]) + return songs + + +def generate_media_url(url): + url = url.replace("preview", "h") + url = url.replace("_96_p.mp4", "_320.mp3") + return url + + +def get_song_id(query): + url = query songs = [] try: res = requests.get(url, headers=headers, data=[('bitrate', '320')]) soup = BeautifulSoup(res.text, "lxml") all_song_divs = soup.find_all('div', {"class": "hide song-json"}) + song_divs = all_song_divs[0] + try: + song_info = json.loads(song_divs.text) + return song_info['songid'] + except: + esc_text = re.sub(r'.\(\bFrom .*?"\)', "", str(song_divs.text)) + try: + song_info = json_decoder.decode(esc_text) + return song_info['songid'] + except: + try: + song_info = json.loads(esc_text) + return song_info['songid'] + except: + print(esc_text) + return None + + ''' for i in all_song_divs: try: try: @@ -47,15 +117,15 @@ def get_songs(query): songs.append(song_info) except: print(esc_text) - except Exception as e: print_exc() continue if len(songs) > 0: return songs + ''' except Exception as e: print_exc() - return songs + return None def getAlbum(albumId): @@ -67,6 +137,18 @@ def getAlbum(albumId): songs_json = list(filter(lambda x: x.startswith( "{"), response.text.splitlines()))[0] songs_json = json.loads(songs_json) + songs_json['name'] = fix_title(songs_json['name']) + songs_json['image'] = fix_image_url(songs_json['image']) + for songs in songs_json['songs']: + try: + songs['media_url'] = generate_media_url( + songs['media_preview_url']) + except KeyError: + songs['media_url'] = decrypt_url( + songs['encrypted_media_url']) + songs['image'] = fix_image_url(songs['image']) + songs['song'] = fix_title(songs['song']) + songs['album'] = fix_title(songs['album']) return songs_json except Exception as e: print_exc() @@ -91,7 +173,7 @@ def AlbumId(input_url): return(getAlbumID) except Exception: print_exc() - pass + return None def setProxy(): @@ -111,10 +193,24 @@ def getPlayList(listId): songs_json = list( filter(lambda x: x.endswith("}"), response_text))[0] songs_json = json.loads(songs_json) + songs_json['firstname'] = fix_title(songs_json['firstname']) + songs_json['listname'] = fix_title(songs_json['listname']) + songs_json['image'] = fix_image_url(songs_json['image']) + for songs in songs_json['songs']: + songs['image'] = fix_image_url(songs['image']) + songs['song'] = fix_title(songs['song']) + songs['album'] = fix_title(songs['album']) + try: + songs['media_url'] = generate_media_url( + songs['media_preview_url']) + except KeyError: + songs['media_url'] = decrypt_url( + songs['encrypted_media_url']) return songs_json return None except Exception: print_exc() + return None def getListId(input_url): @@ -124,11 +220,11 @@ def getListId(input_url): try: getPlayListID = soup.select(".flip-layout")[0]["data-listid"] - if getPlayListID is not None: - return(getPlayListID) - except Exception as e: - print('Unable to scrape Playlist ID', e) - return None + return getPlayListID + except Exception: + getPlayListID = res.text.split('data-listid="')[1] + getPlayListID = getPlayListID.split('">')[0] + return getPlayListID def getSongsJSON(listId): @@ -149,23 +245,7 @@ def decrypt_url(url): enc_url = base64.b64decode(url.strip()) dec_url = des_cipher.decrypt(enc_url, padmode=PAD_PKCS5).decode('utf-8') dec_url = dec_url.replace("_96.mp4", "_320.mp3") - dec_url = dec_url.replace("http://aac.saavncdn.com","https://h.saavncdn.com") - try: - r = requests.head(dec_url) - if r.status_code == 200 or r.status_code == 302: - return dec_url - else: - dec_url = dec_url.replace('_320.mp3', '_160.mp3') - r = requests.head(dec_url) - if r.status_code == 200 or r.status_code == 302: - return dec_url - else: - dec_url = dec_url.replace("_160.mp3", "_96.mp3") - if r.status_code == 200 or r.status_code == 302: - return dec_url - except Exception as e: - return None - return None + return dec_url def fix_title(title): @@ -175,6 +255,8 @@ def fix_title(title): def fix_image_url(url): url = str(url) + if 'http://' in url: + url = url.replace("http://", "https://") url = url.replace('150x150', '500x500') return url @@ -192,4 +274,23 @@ def get_lyrics(link): return (lyrics) except Exception: print_exc() - return + return None + + +def expand_url(url): + try: + session = requests.Session() + resp = session.head(url, allow_redirects=True) + return(resp.url) + except Exception as e: + print("URL Redirect Error: ",e) + return url + +def check_media_url(dec_url): + ex_dec_url = expand_url(dec_url) + r = requests.head(ex_dec_url) + if r.status_code!=200: + fixed_dec_url = dec_url.replace(".mp3",'.mp4') + fixed_dec_url = expand_url(fixed_dec_url) + return fixed_dec_url + return ex_dec_url \ No newline at end of file From 79414c35be845ca050fd1bd3e17dd621926ec493 Mon Sep 17 00:00:00 2001 From: cyberboysumanjay Date: Sun, 14 Jun 2020 21:20:22 +0530 Subject: [PATCH 2/5] Fix Typo --- saavn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/saavn.py b/saavn.py index 21cb481..3fe3f64 100644 --- a/saavn.py +++ b/saavn.py @@ -60,7 +60,7 @@ def search_from_query(query): songs_json[song_id]['media_preview_url']) except KeyError: songs_json[song_id]['media_url'] = decrypt_url( - songs_json[song_id]['media_url']) + songs_json[song_id]['encrypted_media_url']) songs_json[song_id]['image'] = fix_image_url( songs_json[song_id]['image']) songs_json[song_id]['song'] = fix_title(songs_json[song_id]['song']) From 0051495acbc0dc3106062dfa40ef0a760ea46ca9 Mon Sep 17 00:00:00 2001 From: cyberboysumanjay Date: Wed, 17 Jun 2020 21:32:21 +0530 Subject: [PATCH 3/5] Fixed Media URL for Query Search --- saavn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/saavn.py b/saavn.py index 3fe3f64..36f0b40 100644 --- a/saavn.py +++ b/saavn.py @@ -66,6 +66,7 @@ def search_from_query(query): songs_json[song_id]['song'] = fix_title(songs_json[song_id]['song']) songs_json[song_id]['album'] = fix_title(songs_json[song_id]['album']) + songs_json[song_id]['media_url'] = check_media_url(songs_json[song_id]['media_url']) songs.append(songs_json[song_id]) return songs From e11d7530dd3870d153cfed6cccf042d7b94aa7e9 Mon Sep 17 00:00:00 2001 From: cyberboysumanjay Date: Thu, 18 Jun 2020 23:11:31 +0530 Subject: [PATCH 4/5] Fixed Lyrics Search --- saavn.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/saavn.py b/saavn.py index 36f0b40..ccf030a 100644 --- a/saavn.py +++ b/saavn.py @@ -66,7 +66,8 @@ def search_from_query(query): songs_json[song_id]['song'] = fix_title(songs_json[song_id]['song']) songs_json[song_id]['album'] = fix_title(songs_json[song_id]['album']) - songs_json[song_id]['media_url'] = check_media_url(songs_json[song_id]['media_url']) + songs_json[song_id]['media_url'] = check_media_url( + songs_json[song_id]['media_url']) songs.append(songs_json[song_id]) return songs @@ -266,12 +267,18 @@ def get_lyrics(link): try: if '/song/' in link: link = link.replace("/song/", '/lyrics/') + link_=link.split('/') + link_[-2]=link_[-2]+'-lyrics' + link='/'.join(link_) source = requests.get(link).text soup = BeautifulSoup(source, 'lxml') - res = soup.find('p', class_='lyrics') - lyrics = str(res).replace("
", "\n") + res = soup.find(class_='u-disable-select') + lyrics = str(res).replace("", "") + lyrics = lyrics.replace("", "") + lyrics = lyrics.replace("
", "\n") lyrics = lyrics.replace('

', '') lyrics = lyrics.replace("

", '') + lyrics = lyrics.split("

")[1] return (lyrics) except Exception: print_exc() @@ -284,14 +291,15 @@ def expand_url(url): resp = session.head(url, allow_redirects=True) return(resp.url) except Exception as e: - print("URL Redirect Error: ",e) + print("URL Redirect Error: ", e) return url + def check_media_url(dec_url): ex_dec_url = expand_url(dec_url) r = requests.head(ex_dec_url) - if r.status_code!=200: - fixed_dec_url = dec_url.replace(".mp3",'.mp4') - fixed_dec_url = expand_url(fixed_dec_url) - return fixed_dec_url - return ex_dec_url \ No newline at end of file + if r.status_code != 200: + fixed_dec_url = dec_url.replace(".mp3", '.mp4') + fixed_dec_url = expand_url(fixed_dec_url) + return fixed_dec_url + return ex_dec_url From 08c982df4001c409fff5eb48cafa41238fccb7dc Mon Sep 17 00:00:00 2001 From: cyberboysumanjay Date: Thu, 18 Jun 2020 23:36:39 +0530 Subject: [PATCH 5/5] Fixed Playlist and Album Search --- saavn.py | 75 ++++++++------------------------------------------------ 1 file changed, 10 insertions(+), 65 deletions(-) diff --git a/saavn.py b/saavn.py index ccf030a..919cb86 100644 --- a/saavn.py +++ b/saavn.py @@ -83,48 +83,8 @@ def get_song_id(query): songs = [] try: res = requests.get(url, headers=headers, data=[('bitrate', '320')]) - soup = BeautifulSoup(res.text, "lxml") - all_song_divs = soup.find_all('div', {"class": "hide song-json"}) - song_divs = all_song_divs[0] - try: - song_info = json.loads(song_divs.text) - return song_info['songid'] - except: - esc_text = re.sub(r'.\(\bFrom .*?"\)', "", str(song_divs.text)) - try: - song_info = json_decoder.decode(esc_text) - return song_info['songid'] - except: - try: - song_info = json.loads(esc_text) - return song_info['songid'] - except: - print(esc_text) - return None - - ''' - for i in all_song_divs: - try: - try: - song_info = json.loads(i.text) - songs.append(song_info) - except: - esc_text = re.sub(r'.\(\bFrom .*?"\)', "", str(i.text)) - try: - song_info = json_decoder.decode(esc_text) - songs.append(song_info) - except: - try: - song_info = json.loads(esc_text) - songs.append(song_info) - except: - print(esc_text) - except Exception as e: - print_exc() - continue - if len(songs) > 0: - return songs - ''' + id = res.text.split('"song":{"type":"')[1].split('","image":')[0].split('"id":"')[-1] + return id except Exception as e: print_exc() return None @@ -163,16 +123,9 @@ def AlbumId(input_url): res = requests.get(input_url, headers=headers) if 'internal error' in res.text: return None - except Exception: - print_exc() - return None - soup = BeautifulSoup(res.text, "html.parser") - try: - getAlbumID = soup.select(".play")[0]["onclick"] - getAlbumID = ast.literal_eval( - re.search("\[(.*?)\]", getAlbumID).group())[1] - if getAlbumID is not None: - return(getAlbumID) + else: + id = res.text.split('"album_id":"')[1].split('"')[0] + return id except Exception: print_exc() return None @@ -217,16 +170,8 @@ def getPlayList(listId): def getListId(input_url): headers = setProxy() - res = requests.get(input_url, headers=headers) - soup = BeautifulSoup(res.text, "html.parser") - - try: - getPlayListID = soup.select(".flip-layout")[0]["data-listid"] - return getPlayListID - except Exception: - getPlayListID = res.text.split('data-listid="')[1] - getPlayListID = getPlayListID.split('">')[0] - return getPlayListID + res = requests.get(input_url, headers=headers).text + return res.split('"type":"playlist","id":"')[1].split('"')[0] def getSongsJSON(listId): @@ -267,9 +212,9 @@ def get_lyrics(link): try: if '/song/' in link: link = link.replace("/song/", '/lyrics/') - link_=link.split('/') - link_[-2]=link_[-2]+'-lyrics' - link='/'.join(link_) + link_ = link.split('/') + link_[-2] = link_[-2]+'-lyrics' + link = '/'.join(link_) source = requests.get(link).text soup = BeautifulSoup(source, 'lxml') res = soup.find(class_='u-disable-select')