From 909e7244ca929ce3e709b6ea856f4667275a251b Mon Sep 17 00:00:00 2001
From: Chenny Du <chdu@merkleinc.com>
Date: Wed, 20 Jun 2018 10:00:48 +0800
Subject: [PATCH] First release

---
 .gitignore                | 19 +++++++++++
 README.md                 |  8 +++++
 generate_download_list.py | 71 +++++++++++++++++++++++++++++++++++++++
 youtube_download.py       | 62 ++++++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 generate_download_list.py
 create mode 100644 youtube_download.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e6852ba
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,19 @@
+# ---> no_extension_pyc
+# Ignore all
+*
+
+# Unignore all with extensions
+!*.*
+
+# Unignore all dirs
+!*/
+
+### Above combination will ignore all files without extension ###
+
+**/__pycache__
+
+**/test.py
+
+*.list
+
+download/
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..56c220a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,8 @@
+# Usage
+1. make a list of youtube links naming it as `youtube_video.list`, e.g.
+```
+https://www.youtube.com/watch?v=bY6m6_IIN94
+https://www.youtube.com/watch?v=f4KOjWS_KZs
+```
+2. `generate_download_list.py` to get the video link
+3. `youtube_download.py` to download the video using proxy
\ No newline at end of file
diff --git a/generate_download_list.py b/generate_download_list.py
new file mode 100644
index 0000000..2028ed9
--- /dev/null
+++ b/generate_download_list.py
@@ -0,0 +1,71 @@
+import requests
+from requests import Request, Session
+import urllib, json
+import bs4 as bs
+import re
+import time
+
+QUERY_URL = 'https://y2mate.com/analyze/ajax'
+VIDEO_LIST = 'youtube_video.list'
+
+
+def read_video_list():
+	with open(VIDEO_LIST, 'r') as f:
+		videoList = f.read().split('\n')
+	return videoList
+
+
+def query_link_generate(youtube_link):
+	'''
+	youtube_link = 'https://www.youtube.com/watch?v=iAzShkKzpJo'
+	'''
+	data = 'url={}&ajax=1'.format(urllib.parse.quote_plus(youtube_link))
+	headers = {
+		"accept": "*/*", 
+		"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7", 
+		"content-type": "application/x-www-form-urlencoded; charset=UTF-8", 
+		"origin": "https://y2mate.com", 
+		"referer": "https://y2mate.com/youtube/Xi52tx6phRU", 
+		"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36", 
+		"x-requested-with": "XMLHttpRequest", 
+	}
+
+	Sess = requests.session()
+	req = Request('POST', QUERY_URL, data=data, headers=headers)
+	prepped = Sess.prepare_request(req)
+	resp = Sess.send(prepped)
+	resp_text = resp.content.decode('utf-8')
+
+	# print(data)
+	# print(resp_text)
+	result = json.loads(resp_text)
+
+	# #mp4 > table > tbody > tr:nth-child(1) > td.txt-center > a
+	soup = bs.BeautifulSoup(result["result"], 'lxml')
+	videoDownloadLink = soup.select('#mp4 > table > tbody > tr > td.txt-center > a')[0]['href']
+	# print(videoDownloadLink)
+	videoName = urllib.parse.unquote_plus(re.findall(r'(?<=&title=).*(?=&)', videoDownloadLink)[0]).split(' || ')[0]
+	print(videoName)
+	return videoName, videoDownloadLink
+
+
+if __name__ == '__main__':
+	# test_link = 'https://www.youtube.com/watch?v=f4KOjWS_KZs'
+	# query_link_generate(test_link)
+	
+	# videoDownloadLinkList = list(map(query_link_generate, read_video_list()))
+
+	count = 1
+
+	with open('youtube_video_download.list', 'w') as f:
+		f.write('')
+
+	for i in read_video_list():
+		videoName, videoDownloadLink = query_link_generate(i)
+		with open('youtube_video_download.list', 'a') as f:
+			f.write('{} {} |#| {}\n'.format(count, videoName, videoDownloadLink))
+		time.sleep(1)
+		count += 1
+
+
+
diff --git a/youtube_download.py b/youtube_download.py
new file mode 100644
index 0000000..45f0bbe
--- /dev/null
+++ b/youtube_download.py
@@ -0,0 +1,62 @@
+import requests
+import os
+
+
+VIDEO_LIST = 'youtube_video_download.list'
+proxy = '127.0.0.1:1080'
+downloadPath = 'download/'
+
+
+def remove_illegal_char(fileName):
+	'''
+	Remove reserved characters from file name
+	'''
+	RESERVED_CHAR = ['<', '>', ':', '"', '/', '\\', '|', '?', '*',]
+
+	for char in RESERVED_CHAR:
+		fileName = fileName.replace(char, '_')
+	return fileName
+
+def read_list():
+	with open(VIDEO_LIST, 'r') as f:
+		downloadList_tmp = f.read().split('\n')
+
+	downloadList = []
+	for i in downloadList_tmp:
+		# get fileName, downloadUrl
+		if not i:
+			continue
+
+		downloadItem = i.split(' |#| ')
+		downloadItem[0] = remove_illegal_char(downloadItem[0].strip()) + '.mp4'
+		downloadList.append(downloadItem)
+
+	return(downloadList)
+
+def download_file(fileName, url):
+	'''
+	Download file with proxy
+	'''
+	print('Downloading {}'.format(fileName))
+	proxies = {'http': 'http://{}'.format(proxy),
+	           'https': 'https://{}'.format(proxy)}
+	res = requests.get(url,proxies=proxies)
+	con = res.content
+	with open(fileName, 'wb') as f:
+		f.write(con)
+	print('Downloading finished')
+
+def create_download_dir():
+	script_dir = os.path.dirname(os.path.realpath(__file__))
+	try:
+		if downloadPath not in os.listdir(script_dir):
+			os.mkdir(downloadPath)
+	except FileExistsError as e:
+		pass
+
+
+
+if __name__ == '__main__':
+	create_download_dir()
+	for i in read_list():
+		download_file(downloadPath + i[0], i[1])