diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a7988f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.idea/ +/venv/ +/__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md index 0f9e4ee..a6611da 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,9 @@ ffmpeg is also required * Make sure you've added ffmpeg.exe path to PATH environment variable as on the video # Usage: -usage: akniga_dl.py [-h] [-d | -f] url output +usage: akniga_dl.py [-h] [-f] url output +if you know what to do, you can do easier: +watch commented lines in main.py Download a book from akniga.org @@ -21,7 +23,6 @@ positional arguments: options: -h, --help show this help message and exit - -d, --delete Delete full book folder, after chapter separation is done -f, --full Do not separate the book into multiple chapters, if any Where: diff --git a/akniga_dl.py b/akniga_dl.py deleted file mode 100644 index fa55ff0..0000000 --- a/akniga_dl.py +++ /dev/null @@ -1,113 +0,0 @@ -import argparse -import json -import shutil -import subprocess -import brotli -from pathlib import Path -from pathvalidate import sanitize_filename -from selenium.webdriver.chrome.service import Service as ChromeService -from seleniumwire import webdriver -from webdriver_manager.chrome import ChromeDriverManager - - -def get_book_requests(book_url: str) -> list: - print("Getting book requests. Please wait...") - service = ChromeService(executable_path=ChromeDriverManager().install()) - options = webdriver.ChromeOptions() - options.add_argument('headless') - with webdriver.Chrome(service=service, options=options) as driver: - driver.get(book_url) - return driver.requests - - -def analyse_book_requests(book_requests: list) -> tuple: - print('Analysing book requests...') - try: - # find request with book json data - book_json_requests = [r for r in book_requests if r.method == 'POST' and r.path.startswith('/ajax/b/')] - # assert that we have only 1 request for book data found - assert len(book_json_requests) == 1, 'Error: Book data not found. Exiting.' - print('Book data found') - # find request with m3u8 file - m3u8_file_requests = [r for r in book_requests if 'm3u8' in r.url] - # assert that we have only 1 request for m3u8 file found - assert len(m3u8_file_requests) == 1, 'Error: m3u8 file request not found. Exiting.' - print('m3u8 file found') - book_json = json.loads(brotli.decompress(book_json_requests[0].response.body)) - return book_json, m3u8_file_requests[0].url - except AssertionError as message: - print(message) - exit() - - -def separate_into_chapters(book_json: dict, full_mp3_filepath: Path, book_folder: Path): - print('Separating chapters. Please wait...') - for chapter in book_json['chapters']: - chapter_path = book_folder / sanitize_filename(chapter['title']) - ffmpeg_command = ['ffmpeg', '-i', f'{full_mp3_filepath}.mp3', '-acodec', 'copy', '-ss', - str(chapter['time_from_start']), '-to', str(chapter['time_finish']), f'{chapter_path}.mp3'] - subprocess.run(ffmpeg_command) - - -def download_book(book_json: dict, target_folder: Path, single_chapter: bool = False): - print('Downloading book. Please wait...') - if single_chapter: - filepath = target_folder / book_json['chapters'][0]['title'] - else: - filepath = target_folder / book_json['title'] - ffmpeg_command = ['ffmpeg', '-i', book_json['m3u8_url'], f'{filepath}.mp3'] - subprocess.run(ffmpeg_command) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Download a book from akniga.org') - parser.add_argument('url', help='Book\'s url for downloading') - parser.add_argument('output', help='Absolute or relative path where book will be downloaded') - group = parser.add_mutually_exclusive_group() - group.add_argument('-d', '--delete', action='store_true', - help='Delete full book folder, after chapter separation is done') - group.add_argument('-f', '--full', action='store_true', - help='Do not separate the book into multiple chapters, if any') - args = parser.parse_args() - print(args) - - book_requests = get_book_requests(args.url) - book_json, m3u8_url = analyse_book_requests(book_requests) - book_json['m3u8_url'] = m3u8_url - book_json['title'] = sanitize_filename(book_json['title']) - book_json['chapters'] = json.loads(book_json['items']) - - # check if output folder is an absolute or relative - if Path(args.output).is_absolute(): - output_path = args.output - else: - output_path = Path(__file__).parent / args.output - - # create book folder - book_folder = Path(output_path) / book_json['title'] - Path(book_folder).mkdir(parents=True) - - if len(book_json['chapters']) == 1: - print('Only one chapter found') - # download book directly in book folder - download_book(book_json, book_folder, single_chapter=True) - elif len(book_json['chapters']) >= 2: - print('Multiple chapters found') - if args.f: - print("Downloading full book without chapters separation") - # download book directly in book folder - download_book(book_json, book_folder, single_chapter=True) - else: - # create full book folder - full_book_folder = book_folder / 'full_book' - Path(full_book_folder).mkdir() - if args.d: - print("Downloading full book with chapters separation, deleting full book folder afterwards") - # download book in full book folder, delete it afterward - download_book(book_json, full_book_folder, single_chapter=False) - separate_into_chapters(book_json, full_book_folder / book_json['title'], book_folder) - shutil.rmtree(full_book_folder, ignore_errors=True) - else: - print("Downloading full book with chapters separation and keeping full book folder afterwards") - download_book(book_json, full_book_folder, single_chapter=False) - separate_into_chapters(book_json, full_book_folder / book_json['title'], book_folder) diff --git a/akniga_parser.py b/akniga_parser.py new file mode 100644 index 0000000..b5a3af2 --- /dev/null +++ b/akniga_parser.py @@ -0,0 +1,125 @@ +import subprocess +import brotli +from selenium.webdriver.chrome.service import Service as ChromeService +from seleniumwire import webdriver +from webdriver_manager.chrome import ChromeDriverManager +import json +import shutil +from pathlib import Path +from pathvalidate import sanitize_filename + + +class BookData: + def __init__(self, items): + self.title = items['title'] + self.res = items['res'] + self.hres = items['hres'] + self.srv = items['srv'] + self.sTextAuthor = items['sTextAuthor'] + self.sTextPerformer = items['sTextPerformer'] + self.sTextFav = items['sTextFav'] + self.items = items['items'] + self.topic_id = items['topic_id'] + self.titleonly = items['titleonly'] + self.slug = items['slug'] + self.version = items['version'] + self.bookurl = items['bookurl'] + self.preview = items['preview'] + self.author = items['author'] + self.sMsgTitle = items['sMsgTitle'] + self.sMsg = items['sMsg'] + self.bStateError = items['bStateError'] + self.m3u8_url = items['m3u8_url'] + self.chapters = items['chapters'] + + +class AKnigaParser: + book_url: str + book_requests: list + book_data: BookData + book_folder: Path + + def __init__(self, url, output_folder): + self.book_url = url + self.book_requests = self.get_book_requests() + book_json, m3u8_url = self.analyse_book_requests() + book_json['m3u8_url'] = m3u8_url + book_json['title'] = sanitize_filename(book_json['title']) + book_json['chapters'] = json.loads(book_json['items']) + self.book_data = BookData(book_json) + self.create_book_folder(output_folder) + + def get_book_requests(self) -> list: + print("Getting book requests. Please wait...") + service = ChromeService(executable_path=ChromeDriverManager().install()) + options = webdriver.ChromeOptions() + options.add_argument('headless') + with webdriver.Chrome(service=service, options=options) as driver: + driver.get(self.book_url) + return driver.requests + + def analyse_book_requests(self) -> tuple: + print('Analysing book requests...') + try: + # find request with book json data + book_json_requests = [r for r in self.book_requests if r.method == 'POST' and r.path.startswith('/ajax/b/')] + # assert that we have only 1 request for book data found + assert len(book_json_requests) == 1, 'Error: Book data not found. Exiting.' + print('Book data found') + # find request with m3u8 file + m3u8_file_requests = [r for r in self.book_requests if 'm3u8' in r.url] + # assert that we have only 1 request for m3u8 file found + assert len(m3u8_file_requests) == 1, 'Error: m3u8 file request not found. Exiting.' + print('m3u8 file found') + book_json = json.loads(brotli.decompress(book_json_requests[0].response.body)) + return book_json, m3u8_file_requests[0].url + except AssertionError as message: + print(message) + exit() + + def create_book_folder(self, output_folder: str): + output_path = output_folder if Path(output_folder).is_absolute() else Path(__file__).parent / output_folder + self.book_folder = Path(output_path) / self.book_data.title + Path(self.book_folder).mkdir(parents=True, exist_ok=True) + + def separate_into_chapters(self, full_mp3_filepath: Path): + print('Separating chapters. Please wait...') + for chapter in self.book_data.chapters: + chapter_path = self.book_folder / sanitize_filename(chapter['title']) + ffmpeg_command = ['ffmpeg', '-i', f'{full_mp3_filepath}.mp3', '-acodec', 'copy', '-ss', + str(chapter['time_from_start']), '-to', str(chapter['time_finish']), + f'{chapter_path}.mp3'] + subprocess.run(ffmpeg_command) + + def download_book(self, single_chapter: bool = False): + print('Downloading book. Please wait...') + if single_chapter: + filepath = self.book_folder / self.book_data.chapters[0]['title'] + else: + filepath = self.book_folder / self.book_data.title + + ffmpeg_command = ['ffmpeg', '-i', self.book_data.m3u8_url, f'{filepath}.mp3'] + subprocess.run(ffmpeg_command) + + def run(self, separate_into_chapters: bool = True): + if len(self.book_data.chapters) < 1: + return + if len(self.book_data.chapters) == 1 or not separate_into_chapters: + if len(self.book_data.chapters) == 1: + print("Only 1 chapter found") + else: + print("Multiple chapters found") + + print("Downloading full book without chapters separation") + self.download_book(single_chapter=True) # download directly in book folder + return + + print("Multiple chapters found") + full_book_folder = self.book_folder + Path(full_book_folder).mkdir(exist_ok=True) + + print( + f"Downloading full book with chapters separation, keeping full book afterwards") + + self.download_book(single_chapter=False) + self.separate_into_chapters(full_book_folder / self.book_data.title) diff --git a/main.py b/main.py new file mode 100644 index 0000000..def16ce --- /dev/null +++ b/main.py @@ -0,0 +1,27 @@ +import argparse +from akniga_parser import AKnigaParser + + +def parse_from_console(): + parser = argparse.ArgumentParser(description='Download a book from akniga.org') + parser.add_argument('url', help='Book\'s url for downloading') + parser.add_argument('output', help='Absolute or relative path where book will be downloaded') + group = parser.add_mutually_exclusive_group() + group.add_argument('-f', '--full', action='store_true', + help='Do not separate the book into multiple chapters, if any') + args = parser.parse_args() + AKnigaParser(args.url, args.output).run(not args.f) + + +if __name__ == "__main__": + parse_from_console() + + +# OR +# files = [ +# 'https://akniga.org/url1', +# 'https://akniga.org/url2', +# ] +# +# for url in files: +# AKnigaParser(url, '').run(True)