Skip to content

Commit

Permalink
First release
Browse files Browse the repository at this point in the history
  • Loading branch information
chdu-merkle committed Jun 20, 2018
0 parents commit 909e724
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# ---> no_extension_pyc
# Ignore all
*

# Unignore all with extensions
!*.*

# Unignore all dirs
!*/

### Above combination will ignore all files without extension ###

**/__pycache__

**/test.py

*.list

download/
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Usage
1. make a list of youtube links naming it as `youtube_video.list`, e.g.
```
https://www.youtube.com/watch?v=bY6m6_IIN94
https://www.youtube.com/watch?v=f4KOjWS_KZs
```
2. `generate_download_list.py` to get the video link
3. `youtube_download.py` to download the video using proxy
71 changes: 71 additions & 0 deletions generate_download_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import requests
from requests import Request, Session
import urllib, json
import bs4 as bs
import re
import time

QUERY_URL = 'https://y2mate.com/analyze/ajax'
VIDEO_LIST = 'youtube_video.list'


def read_video_list():
with open(VIDEO_LIST, 'r') as f:
videoList = f.read().split('\n')
return videoList


def query_link_generate(youtube_link):
'''
youtube_link = 'https://www.youtube.com/watch?v=iAzShkKzpJo'
'''
data = 'url={}&ajax=1'.format(urllib.parse.quote_plus(youtube_link))
headers = {
"accept": "*/*",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"origin": "https://y2mate.com",
"referer": "https://y2mate.com/youtube/Xi52tx6phRU",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36",
"x-requested-with": "XMLHttpRequest",
}

Sess = requests.session()
req = Request('POST', QUERY_URL, data=data, headers=headers)
prepped = Sess.prepare_request(req)
resp = Sess.send(prepped)
resp_text = resp.content.decode('utf-8')

# print(data)
# print(resp_text)
result = json.loads(resp_text)

# #mp4 > table > tbody > tr:nth-child(1) > td.txt-center > a
soup = bs.BeautifulSoup(result["result"], 'lxml')
videoDownloadLink = soup.select('#mp4 > table > tbody > tr > td.txt-center > a')[0]['href']
# print(videoDownloadLink)
videoName = urllib.parse.unquote_plus(re.findall(r'(?<=&title=).*(?=&)', videoDownloadLink)[0]).split(' || ')[0]
print(videoName)
return videoName, videoDownloadLink


if __name__ == '__main__':
# test_link = 'https://www.youtube.com/watch?v=f4KOjWS_KZs'
# query_link_generate(test_link)

# videoDownloadLinkList = list(map(query_link_generate, read_video_list()))

count = 1

with open('youtube_video_download.list', 'w') as f:
f.write('')

for i in read_video_list():
videoName, videoDownloadLink = query_link_generate(i)
with open('youtube_video_download.list', 'a') as f:
f.write('{} {} |#| {}\n'.format(count, videoName, videoDownloadLink))
time.sleep(1)
count += 1



62 changes: 62 additions & 0 deletions youtube_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import requests
import os


VIDEO_LIST = 'youtube_video_download.list'
proxy = '127.0.0.1:1080'
downloadPath = 'download/'


def remove_illegal_char(fileName):
'''
Remove reserved characters from file name
'''
RESERVED_CHAR = ['<', '>', ':', '"', '/', '\\', '|', '?', '*',]

for char in RESERVED_CHAR:
fileName = fileName.replace(char, '_')
return fileName

def read_list():
with open(VIDEO_LIST, 'r') as f:
downloadList_tmp = f.read().split('\n')

downloadList = []
for i in downloadList_tmp:
# get fileName, downloadUrl
if not i:
continue

downloadItem = i.split(' |#| ')
downloadItem[0] = remove_illegal_char(downloadItem[0].strip()) + '.mp4'
downloadList.append(downloadItem)

return(downloadList)

def download_file(fileName, url):
'''
Download file with proxy
'''
print('Downloading {}'.format(fileName))
proxies = {'http': 'http://{}'.format(proxy),
'https': 'https://{}'.format(proxy)}
res = requests.get(url,proxies=proxies)
con = res.content
with open(fileName, 'wb') as f:
f.write(con)
print('Downloading finished')

def create_download_dir():
script_dir = os.path.dirname(os.path.realpath(__file__))
try:
if downloadPath not in os.listdir(script_dir):
os.mkdir(downloadPath)
except FileExistsError as e:
pass



if __name__ == '__main__':
create_download_dir()
for i in read_list():
download_file(downloadPath + i[0], i[1])

0 comments on commit 909e724

Please sign in to comment.