Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

some code refactoring #2

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/.idea/
/venv/
/__pycache__/
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ ffmpeg is also required
* Make sure you've added ffmpeg.exe path to PATH environment variable as on the video

# Usage:
usage: akniga_dl.py [-h] [-d | -f] url output
usage: akniga_dl.py [-h] [-f] url output
if you know what to do, you can do easier:
watch commented lines in main.py

Download a book from akniga.org

Expand All @@ -21,7 +23,6 @@ positional arguments:

options:
-h, --help show this help message and exit
-d, --delete Delete full book folder, after chapter separation is done
-f, --full Do not separate the book into multiple chapters, if any

Where:
Expand Down
113 changes: 0 additions & 113 deletions akniga_dl.py

This file was deleted.

125 changes: 125 additions & 0 deletions akniga_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import subprocess
import brotli
from selenium.webdriver.chrome.service import Service as ChromeService
from seleniumwire import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import json
import shutil
from pathlib import Path
from pathvalidate import sanitize_filename


class BookData:
def __init__(self, items):
self.title = items['title']
self.res = items['res']
self.hres = items['hres']
self.srv = items['srv']
self.sTextAuthor = items['sTextAuthor']
self.sTextPerformer = items['sTextPerformer']
self.sTextFav = items['sTextFav']
self.items = items['items']
self.topic_id = items['topic_id']
self.titleonly = items['titleonly']
self.slug = items['slug']
self.version = items['version']
self.bookurl = items['bookurl']
self.preview = items['preview']
self.author = items['author']
self.sMsgTitle = items['sMsgTitle']
self.sMsg = items['sMsg']
self.bStateError = items['bStateError']
self.m3u8_url = items['m3u8_url']
self.chapters = items['chapters']


class AKnigaParser:
book_url: str
book_requests: list
book_data: BookData
book_folder: Path

def __init__(self, url, output_folder):
self.book_url = url
self.book_requests = self.get_book_requests()
book_json, m3u8_url = self.analyse_book_requests()
book_json['m3u8_url'] = m3u8_url
book_json['title'] = sanitize_filename(book_json['title'])
book_json['chapters'] = json.loads(book_json['items'])
self.book_data = BookData(book_json)
self.create_book_folder(output_folder)

def get_book_requests(self) -> list:
print("Getting book requests. Please wait...")
service = ChromeService(executable_path=ChromeDriverManager().install())
options = webdriver.ChromeOptions()
options.add_argument('headless')
with webdriver.Chrome(service=service, options=options) as driver:
driver.get(self.book_url)
return driver.requests

def analyse_book_requests(self) -> tuple:
print('Analysing book requests...')
try:
# find request with book json data
book_json_requests = [r for r in self.book_requests if r.method == 'POST' and r.path.startswith('/ajax/b/')]
# assert that we have only 1 request for book data found
assert len(book_json_requests) == 1, 'Error: Book data not found. Exiting.'
print('Book data found')
# find request with m3u8 file
m3u8_file_requests = [r for r in self.book_requests if 'm3u8' in r.url]
# assert that we have only 1 request for m3u8 file found
assert len(m3u8_file_requests) == 1, 'Error: m3u8 file request not found. Exiting.'
print('m3u8 file found')
book_json = json.loads(brotli.decompress(book_json_requests[0].response.body))
return book_json, m3u8_file_requests[0].url
except AssertionError as message:
print(message)
exit()

def create_book_folder(self, output_folder: str):
output_path = output_folder if Path(output_folder).is_absolute() else Path(__file__).parent / output_folder
self.book_folder = Path(output_path) / self.book_data.title
Path(self.book_folder).mkdir(parents=True, exist_ok=True)

def separate_into_chapters(self, full_mp3_filepath: Path):
print('Separating chapters. Please wait...')
for chapter in self.book_data.chapters:
chapter_path = self.book_folder / sanitize_filename(chapter['title'])
ffmpeg_command = ['ffmpeg', '-i', f'{full_mp3_filepath}.mp3', '-acodec', 'copy', '-ss',
str(chapter['time_from_start']), '-to', str(chapter['time_finish']),
f'{chapter_path}.mp3']
subprocess.run(ffmpeg_command)

def download_book(self, single_chapter: bool = False):
print('Downloading book. Please wait...')
if single_chapter:
filepath = self.book_folder / self.book_data.chapters[0]['title']
else:
filepath = self.book_folder / self.book_data.title

ffmpeg_command = ['ffmpeg', '-i', self.book_data.m3u8_url, f'{filepath}.mp3']
subprocess.run(ffmpeg_command)

def run(self, separate_into_chapters: bool = True):
if len(self.book_data.chapters) < 1:
return
if len(self.book_data.chapters) == 1 or not separate_into_chapters:
if len(self.book_data.chapters) == 1:
print("Only 1 chapter found")
else:
print("Multiple chapters found")

print("Downloading full book without chapters separation")
self.download_book(single_chapter=True) # download directly in book folder
return

print("Multiple chapters found")
full_book_folder = self.book_folder
Path(full_book_folder).mkdir(exist_ok=True)

print(
f"Downloading full book with chapters separation, keeping full book afterwards")

self.download_book(single_chapter=False)
self.separate_into_chapters(full_book_folder / self.book_data.title)
27 changes: 27 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import argparse
from akniga_parser import AKnigaParser


def parse_from_console():
parser = argparse.ArgumentParser(description='Download a book from akniga.org')
parser.add_argument('url', help='Book\'s url for downloading')
parser.add_argument('output', help='Absolute or relative path where book will be downloaded')
group = parser.add_mutually_exclusive_group()
group.add_argument('-f', '--full', action='store_true',
help='Do not separate the book into multiple chapters, if any')
args = parser.parse_args()
AKnigaParser(args.url, args.output).run(not args.f)


if __name__ == "__main__":
parse_from_console()


# OR
# files = [
# 'https://akniga.org/url1',
# 'https://akniga.org/url2',
# ]
#
# for url in files:
# AKnigaParser(url, '').run(True)