-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRussia2017.py
106 lines (87 loc) · 3.19 KB
/
Russia2017.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# -*- coding: utf-8 -*-
import requests
#from bs4 import BeautifulSoup as bs
import re
from scrapy.selector import Selector as Sel
from scrapy.http import HtmlResponse as HR
import csv
import ast
agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0'
headers = {
'User-Agent': agent
}
url = 'http://www.europaplus.ru/index.php?go=Chart40'
def saveHTML(url,name, headers = headers):
try:
req = requests.get(url, headers = headers)
print req.encoding
with open(name, 'w') as f:
text = req.text.splitlines()
for line in text:
try:
f.write(line.encode('windows-1251'))
except:
f.write(line)
print "File is saved"
except:
print "Bad request or not saved file. Sorry ;("
def writeCSV(List, filename = 'test.csv'):
with open('russia2017.csv', 'a') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
for row in List:
try:
writer.writerow([row[0], row[1], row[2], row[3], row[4], row[5]])
except UnicodeEncodeError:
writer.writerow([row[0].encode('utf-8'), row[1].encode('utf-8'), row[2].encode('utf-8'), row[3].encode('utf-8')])
finally:
#print "data view as: ", row[0], ' and ', row[1]
pass
def getAudio(author, sing):
reqStr = author + ' ' + sing
reqStr = reqStr.replace('& ','')
#print reqStr
reqStr = reqStr.replace(' ','+')
#print reqStr
googurl = 'https://www.google.ru/search?newwindow=1&ei=O41KWrz4J-nX6QTFgbWYDQ&q=%s'%reqStr
req = requests.get(googurl, headers = headers)
response = HR(url=googurl, body = req.text, encoding='utf-8')
try:
links = response.css('.kv ._Rm::text').extract()
mp3party = ''
for link in links:
if 'mp3party.net' in link:
mp3party = 'http://' + link
break
print mp3party
req = requests.get(mp3party,headers = headers)
response = HR(url=mp3party, body = req.text, encoding='utf-8')
audio = response.css('.jp-play::attr(href)').extract_first()
print audio
except:
audio = ''
return audio
def getCover(author, sing):
reqStr = author + ' ' + sing
reqStr = reqStr.replace(' ','+')
coverurl = 'https://www.google.ru/search?q=%s&newwindow=1&client=opera&hs=1DR&source=lnms&tbm=isch&sa=X&ved=0ahUKEwjRus-zrLDYAhXNJVAKHZTsBPQQ_AUICigB&biw=1277&bih=673'%reqStr
req = requests.get(coverurl, headers = headers)
response = HR(url=coverurl, body = req.text, encoding='utf-8')
img = response
img = unicode(img.css('.rg_bx .rg_meta::text').extract_first())
img = ast.literal_eval(img)
img = img['ou']
return img
req = requests.get(url, headers = headers)
response = HR(url=url, body = req.text, encoding='windows-1251')
songs = response.css('.songs-holder > .jp-title').extract()
authors = response.css('.songs-holder > .jp-title > strong > a::text').extract()
sings = response.css('.songs-holder > .jp-title > span::text').extract()
List = []
for i in xrange(len(songs)):
img = getCover(authors[i], sings[i])
audio = getAudio(authors[i], sings[i])
if audio == None:
audio = ''
List.append([authors[i], sings[i], img, audio, 'Russia', '2017'])
writeCSV(List, filename = 'Russia2017.csv')
#saveHTML(url,'test.html')