-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathakniga_dl.py
113 lines (99 loc) · 5.13 KB
/
akniga_dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import argparse
import json
import shutil
import subprocess
import brotli
from pathlib import Path
from pathvalidate import sanitize_filename
from selenium.webdriver.chrome.service import Service as ChromeService
from seleniumwire import webdriver
from webdriver_manager.chrome import ChromeDriverManager
def get_book_requests(book_url: str) -> list:
print("Getting book requests. Please wait...")
service = ChromeService(executable_path=ChromeDriverManager().install())
options = webdriver.ChromeOptions()
options.add_argument('headless')
with webdriver.Chrome(service=service, options=options) as driver:
driver.get(book_url)
return driver.requests
def analyse_book_requests(book_requests: list) -> tuple:
print('Analysing book requests...')
try:
# find request with book json data
book_json_requests = [r for r in book_requests if r.method == 'POST' and r.path.startswith('/ajax/b/')]
# assert that we have only 1 request for book data found
assert len(book_json_requests) == 1, 'Error: Book data not found. Exiting.'
print('Book data found')
# find request with m3u8 file
m3u8_file_requests = [r for r in book_requests if 'm3u8' in r.url]
# assert that we have only 1 request for m3u8 file found
assert len(m3u8_file_requests) == 1, 'Error: m3u8 file request not found. Exiting.'
print('m3u8 file found')
book_json = json.loads(brotli.decompress(book_json_requests[0].response.body))
return book_json, m3u8_file_requests[0].url
except AssertionError as message:
print(message)
exit()
def separate_into_chapters(book_json: dict, full_mp3_filepath: Path, book_folder: Path):
print('Separating chapters. Please wait...')
for chapter in book_json['chapters']:
chapter_path = book_folder / sanitize_filename(chapter['title'])
ffmpeg_command = ['ffmpeg', '-i', f'{full_mp3_filepath}.mp3', '-acodec', 'copy', '-ss',
str(chapter['time_from_start']), '-to', str(chapter['time_finish']), f'{chapter_path}.mp3']
subprocess.run(ffmpeg_command)
def download_book(book_json: dict, target_folder: Path, single_chapter: bool = False):
print('Downloading book. Please wait...')
if single_chapter:
filepath = target_folder / book_json['chapters'][0]['title']
else:
filepath = target_folder / book_json['title']
ffmpeg_command = ['ffmpeg', '-i', book_json['m3u8_url'], f'{filepath}.mp3']
subprocess.run(ffmpeg_command)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Download a book from akniga.org')
parser.add_argument('url', help='Book\'s url for downloading')
parser.add_argument('output', help='Absolute or relative path where book will be downloaded')
group = parser.add_mutually_exclusive_group()
group.add_argument('-d', '--delete', action='store_true',
help='Delete full book folder, after chapter separation is done')
group.add_argument('-f', '--full', action='store_true',
help='Do not separate the book into multiple chapters, if any')
args = parser.parse_args()
print(args)
book_requests = get_book_requests(args.url)
book_json, m3u8_url = analyse_book_requests(book_requests)
book_json['m3u8_url'] = m3u8_url
book_json['title'] = sanitize_filename(book_json['title'])
book_json['chapters'] = json.loads(book_json['items'])
# check if output folder is an absolute or relative
if Path(args.output).is_absolute():
output_path = args.output
else:
output_path = Path(__file__).parent / args.output
# create book folder
book_folder = Path(output_path) / book_json['title']
Path(book_folder).mkdir(parents=True)
if len(book_json['chapters']) == 1:
print('Only one chapter found')
# download book directly in book folder
download_book(book_json, book_folder, single_chapter=True)
elif len(book_json['chapters']) >= 2:
print('Multiple chapters found')
if args.f:
print("Downloading full book without chapters separation")
# download book directly in book folder
download_book(book_json, book_folder, single_chapter=True)
else:
# create full book folder
full_book_folder = book_folder / 'full_book'
Path(full_book_folder).mkdir()
if args.d:
print("Downloading full book with chapters separation, deleting full book folder afterwards")
# download book in full book folder, delete it afterward
download_book(book_json, full_book_folder, single_chapter=False)
separate_into_chapters(book_json, full_book_folder / book_json['title'], book_folder)
shutil.rmtree(full_book_folder, ignore_errors=True)
else:
print("Downloading full book with chapters separation and keeping full book folder afterwards")
download_book(book_json, full_book_folder, single_chapter=False)
separate_into_chapters(book_json, full_book_folder / book_json['title'], book_folder)