-
Notifications
You must be signed in to change notification settings - Fork 6
/
url_finders.py
95 lines (69 loc) · 2.52 KB
/
url_finders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from googlesearch import search as google_web_search
from time import sleep
from time import time
import sys
from urllib.error import HTTPError
import tools
from bs4 import BeautifulSoup
from urllib.parse import quote
last = None
def google_search(query, limit):
global last
ret_url_list = list()
for tries in range(1, 10):
try:
if last:
sleep(int(60 - (time() - last)))
except ValueError:
pass
last = time()
try:
for url in google_web_search(query, stop=limit):
if 'youtube.com/watch?v=' in url:
ret_url_list.append(url.split('&')[0])
except KeyboardInterrupt:
raise
except HTTPError as e:
print('google search service unavailable.')
if tries > 3:
print('Failed to download google search result. Reason: ' + str(e))
raise
print('Failed to download google search result, retrying. Reason: ' + str(e))
sleep(1)
except:
e = sys.exc_info()[0]
if tries > 3:
print('Failed to download google search result. Reason: ' + str(e))
raise
print('Failed to download google search result, retrying. Reason: ' + str(e))
sleep(1)
else:
break
return ret_url_list[:limit]
def youtube_search(query, limit):
ret_url_list = list()
for tries in range(1, 10):
try:
response = tools.retrieve_web_page('https://www.youtube.com/results?search_query=' +
quote(query.encode('utf-8')),
'youtube search result')
except KeyboardInterrupt:
raise
except:
e = sys.exc_info()[0]
if tries > 3:
print('Failed to download google search result. Reason: ' + str(e))
raise
print('Failed to download google search result, retrying. Reason: ' + str(e))
sleep(1)
else:
if response:
soup = BeautifulSoup(response, "html.parser")
for item in soup.findAll(attrs={'class': 'yt-uix-tile-link'}):
url = 'https://www.youtube.com' + item['href']
ret_url_list.append(url.split('&')[0])
break
return ret_url_list[:limit]
def youtube_channel_search(query, limit):
# todo (1): implement youtube_channel_search.
pass