-
Notifications
You must be signed in to change notification settings - Fork 305
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2618 from ronie/script.cu.lrclyrics-6.6.4
[script.cu.lrclyrics] 6.6.4
- Loading branch information
Showing
103 changed files
with
3,618 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Dummy file to make this directory a package. |
101 changes: 101 additions & 0 deletions
101
script.cu.lrclyrics/lib/broken-scrapers/gomaudio/lyricsScraper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#-*- coding: UTF-8 -*- | ||
''' | ||
Scraper for http://newlyrics.gomtv.com/ | ||
edge | ||
''' | ||
|
||
import sys | ||
import hashlib | ||
import requests | ||
import urllib.parse | ||
import re | ||
import unicodedata | ||
from lib.utils import * | ||
from lib.audiofile import AudioFile | ||
|
||
__title__ = 'GomAudio' | ||
__priority__ = '110' | ||
__lrc__ = True | ||
|
||
|
||
GOM_URL = 'http://newlyrics.gomtv.com/cgi-bin/lyrics.cgi?cmd=find_get_lyrics&file_key=%s&title=%s&artist=%s&from=gomaudio_local' | ||
|
||
def remove_accents(data): | ||
nfkd_data = unicodedata.normalize('NFKD', data) | ||
return u"".join([c for c in nfkd_data if not unicodedata.combining(c)]) | ||
|
||
|
||
class gomClient(object): | ||
''' | ||
privide Gom specific function, such as key from mp3 | ||
''' | ||
@staticmethod | ||
def GetKeyFromFile(file): | ||
musf = AudioFile() | ||
musf.Open(file) | ||
buf = musf.ReadAudioStream(100*1024) # 100KB from audio data | ||
musf.Close() | ||
# buffer will be empty for streaming audio | ||
if not buf: | ||
return | ||
# calculate hashkey | ||
m = hashlib.md5() | ||
m.update(buf) | ||
return m.hexdigest() | ||
|
||
@staticmethod | ||
def mSecConv(msec): | ||
s,ms = divmod(msec/10,100) | ||
m,s = divmod(s,60) | ||
return m,s,ms | ||
|
||
class LyricsFetcher: | ||
def __init__(self, *args, **kwargs): | ||
self.DEBUG = kwargs['debug'] | ||
self.settings = kwargs['settings'] | ||
self.base_url = 'http://newlyrics.gomtv.com/' | ||
|
||
def get_lyrics(self, song, key=None, ext=None): | ||
log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG) | ||
lyrics = Lyrics(settings=self.settings) | ||
lyrics.song = song | ||
lyrics.source = __title__ | ||
lyrics.lrc = __lrc__ | ||
try: | ||
if not ext: | ||
ext = os.path.splitext(song.filepath)[1].lower() | ||
sup_ext = ['.mp3', '.ogg', '.wma', '.flac', '.ape', '.wav'] | ||
if ext in sup_ext and key == None: | ||
key = gomClient.GetKeyFromFile(song.filepath) | ||
if not key: | ||
return None | ||
url = GOM_URL %(key, urllib.parse.quote(remove_accents(song.title).encode('euc-kr')), urllib.parse.quote(remove_accents(song.artist).encode('euc-kr'))) | ||
response = requests.get(url, timeout=10) | ||
response.encoding = 'euc-kr' | ||
Page = response.text | ||
except: | ||
log('%s: %s::%s (%d) [%s]' % ( | ||
__title__, self.__class__.__name__, | ||
sys.exc_info()[2].tb_frame.f_code.co_name, | ||
sys.exc_info()[2].tb_lineno, | ||
sys.exc_info()[1] | ||
), debug=self.DEBUG) | ||
return None | ||
if Page[:Page.find('>')+1] != '<lyrics_reply result="0">': | ||
return None | ||
syncs = re.compile('<sync start="(\d+)">([^<]*)</sync>').findall(Page) | ||
lyrline = [] | ||
lyrline.append('[ti:%s]' %song.title) | ||
lyrline.append('[ar:%s]' %song.artist) | ||
for sync in syncs: | ||
# timeformat conversion | ||
t = '%02d:%02d.%02d' % gomClient.mSecConv(int(sync[0])) | ||
# unescape string | ||
try: | ||
s = sync[1].replace(''',"'").replace('"','"') | ||
lyrline.append('[%s]%s' %(t,s)) | ||
except: | ||
pass | ||
lyrics.lyrics = '\n'.join(lyrline) | ||
return lyrics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Dummy file to make this directory a package. |
161 changes: 161 additions & 0 deletions
161
script.cu.lrclyrics/lib/broken-scrapers/minilyrics/lyricsScraper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
#-*- coding: UTF-8 -*- | ||
''' | ||
Scraper for http://www.viewlyrics.com | ||
PedroHLC | ||
https://github.com/PedroHLC/ViewLyricsOpenSearcher | ||
rikels | ||
https://github.com/rikels/LyricsSearch | ||
''' | ||
|
||
import re | ||
import hashlib | ||
import difflib | ||
import chardet | ||
import requests | ||
from lib.utils import * | ||
|
||
__title__ = 'MiniLyrics' | ||
__priority__ = '100' | ||
__lrc__ = True | ||
|
||
|
||
class MiniLyrics(object): | ||
''' | ||
Minilyrics specific functions | ||
''' | ||
@staticmethod | ||
def hexToStr(hexx): | ||
string = '' | ||
i = 0 | ||
while (i < (len(hexx) - 1)): | ||
string += chr(int(hexx[i] + hexx[i + 1], 16)) | ||
i += 2 | ||
return string | ||
|
||
@staticmethod | ||
def vl_enc(data, md5_extra): | ||
datalen = len(data) | ||
md5 = hashlib.md5() | ||
md5.update(data + md5_extra) | ||
hasheddata = MiniLyrics.hexToStr(md5.hexdigest()) | ||
j = 0 | ||
i = 0 | ||
while (i < datalen): | ||
try: | ||
j += data[i] | ||
except TypeError: | ||
j += ord(data[i]) | ||
i += 1 | ||
magickey = chr(int(round(float(j) / float(datalen)))) | ||
encddata = list(range(len(data))) | ||
if isinstance(magickey, int): | ||
pass | ||
else: | ||
magickey = ord(magickey) | ||
for i in range(datalen): | ||
if isinstance(data[i], int): | ||
encddata[i] = data[i] ^ magickey | ||
else: | ||
encddata[i] = ord(data[i]) ^ magickey | ||
try: | ||
result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + bytearray(encddata).decode('utf-8') | ||
except UnicodeDecodeError: | ||
ecd = chardet.detect(bytearray(encddata)) | ||
if ecd['encoding']: | ||
try: | ||
result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + bytearray(encddata).decode(ecd['encoding']) | ||
except: | ||
result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + "".join(map(chr, bytearray(encddata))) | ||
else: | ||
result = '\x02' + chr(magickey) + '\x04\x00\x00\x00' + str(hasheddata) + "".join(map(chr, bytearray(encddata))) | ||
return result | ||
|
||
@staticmethod | ||
def vl_dec(data): | ||
magickey = data[1] | ||
result = "" | ||
i = 22 | ||
datalen = len(data) | ||
if isinstance(magickey, int): | ||
pass | ||
else: | ||
magickey = ord(magickey) | ||
for i in range(22, datalen): | ||
if isinstance(data[i], int): | ||
result += chr(data[i] ^ magickey) | ||
else: | ||
result += chr(ord(data[i]) ^ magickey) | ||
return result | ||
|
||
class LyricsFetcher: | ||
def __init__(self, *args, **kwargs): | ||
self.DEBUG = kwargs['debug'] | ||
self.settings = kwargs['settings'] | ||
self.proxy = None | ||
|
||
def htmlDecode(self,string): | ||
entities = {''':'\'','"':'"','>':'>','<':'<','&':'&'} | ||
for i in entities: | ||
string = string.replace(i,entities[i]) | ||
return string | ||
|
||
def get_lyrics(self, song): | ||
log('%s: searching lyrics for %s - %s' % (__title__, song.artist, song.title), debug=self.DEBUG) | ||
lyrics = Lyrics(settings=self.settings) | ||
lyrics.song = song | ||
lyrics.source = __title__ | ||
lyrics.lrc = __lrc__ | ||
search_url = 'http://search.crintsoft.com/searchlyrics.htm' | ||
search_query_base = "<?xml version='1.0' encoding='utf-8' standalone='yes' ?><searchV1 client=\"ViewLyricsOpenSearcher\" artist=\"{artist}\" title=\"{title}\" OnlyMatched=\"1\" />" | ||
search_useragent = 'MiniLyrics' | ||
search_md5watermark = b'Mlv1clt4.0' | ||
search_encquery = MiniLyrics.vl_enc(search_query_base.format(artist=song.artist, title=song.title).encode('utf-8'), search_md5watermark) | ||
headers = {"User-Agent": "{ua}".format(ua=search_useragent), | ||
"Content-Length": "{content_length}".format(content_length=len(search_encquery)), | ||
"Connection": "Keep-Alive", | ||
"Expect": "100-continue", | ||
"Content-Type": "application/x-www-form-urlencoded" | ||
} | ||
try: | ||
request = requests.post(search_url, data=search_encquery, headers=headers, timeout=10) | ||
search_result = request.text | ||
except: | ||
return | ||
rawdata = MiniLyrics.vl_dec(search_result) | ||
# might be a better way to parse the data | ||
lrcdata = rawdata.replace('\x00', '*') | ||
artistmatch = re.search('artist\*(.*?)\*',lrcdata) | ||
if not artistmatch: | ||
return | ||
titlematch = re.search('title\*(.*?)\*',lrcdata) | ||
if not titlematch: | ||
return | ||
artist = artistmatch.group(1) | ||
title = titlematch.group(1) | ||
links = [] | ||
if (difflib.SequenceMatcher(None, song.artist.lower(), artist.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), title.lower()).ratio() > 0.8): | ||
results = re.findall('[a-z0-9/_]*?\.lrc', lrcdata) | ||
for item in results: | ||
links.append((artist + ' - ' + title, item, artist, title)) | ||
if len(links) == 0: | ||
return None | ||
elif len(links) > 1: | ||
lyrics.list = links | ||
lyr = self.get_lyrics_from_list(links[0]) | ||
if not lyr: | ||
return None | ||
lyrics.lyrics = lyr | ||
return lyrics | ||
|
||
def get_lyrics_from_list(self, link): | ||
title,url,artist,song = link | ||
try: | ||
f = requests.get('http://search.crintsoft.com/l/' + url, timeout=10) | ||
lyrics = f.content | ||
except: | ||
return | ||
enc = chardet.detect(lyrics) | ||
lyrics = lyrics.decode(enc['encoding'], 'ignore') | ||
return lyrics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Dummy file to make this directory a package. |
66 changes: 66 additions & 0 deletions
66
script.cu.lrclyrics/lib/culrcscrapers/lrclib/lyricsScraper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#-*- coding: UTF-8 -*- | ||
''' | ||
Scraper for https://lrclib.net/ | ||
lrclib | ||
https://github.com/rtcq/syncedlyrics | ||
''' | ||
|
||
import requests | ||
import difflib | ||
from lib.utils import * | ||
|
||
__title__ = "lrclib" | ||
__priority__ = '110' | ||
__lrc__ = True | ||
|
||
|
||
class LyricsFetcher: | ||
def __init__(self, *args, **kwargs): | ||
self.DEBUG = kwargs['debug'] | ||
self.settings = kwargs['settings'] | ||
self.SEARCH_URL = 'https://lrclib.net/api/search?q=%s-%s' | ||
self.LYRIC_URL = 'https://lrclib.net/api/get/%i' | ||
|
||
def get_lyrics(self, song): | ||
log("%s: searching lyrics for %s - %s" % (__title__, song.artist, song.title), debug=self.DEBUG) | ||
lyrics = Lyrics(settings=self.settings) | ||
lyrics.song = song | ||
lyrics.source = __title__ | ||
lyrics.lrc = __lrc__ | ||
try: | ||
url = self.SEARCH_URL % (song.artist, song.title) | ||
response = requests.get(url, timeout=10) | ||
result = response.json() | ||
except: | ||
return None | ||
links = [] | ||
for item in result: | ||
artistname = item['artistName'] | ||
songtitle = item['name'] | ||
songid = item['id'] | ||
if (difflib.SequenceMatcher(None, song.artist.lower(), artistname.lower()).ratio() > 0.8) and (difflib.SequenceMatcher(None, song.title.lower(), songtitle.lower()).ratio() > 0.8): | ||
links.append((artistname + ' - ' + songtitle, self.LYRIC_URL % songid, artistname, songtitle)) | ||
if len(links) == 0: | ||
return None | ||
elif len(links) > 1: | ||
lyrics.list = links | ||
for link in links: | ||
lyr = self.get_lyrics_from_list(link) | ||
if lyr: | ||
lyrics.lyrics = lyr | ||
return lyrics | ||
return None | ||
|
||
def get_lyrics_from_list(self, link): | ||
title,url,artist,song = link | ||
try: | ||
log('%s: search url: %s' % (__title__, url), debug=self.DEBUG) | ||
response = requests.get(url, timeout=10) | ||
result = response.json() | ||
except: | ||
return None | ||
if 'syncedLyrics' in result: | ||
lyrics = result['syncedLyrics'] | ||
return lyrics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Dummy file to make this directory a package. |
Oops, something went wrong.