-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScribbleHub.py
91 lines (70 loc) · 3.43 KB
/
ScribbleHub.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from bs4 import BeautifulSoup
from Functions import edit_json
from ebooklib import epub
import subprocess
from PIL import Image
from datetime import datetime
import cloudscraper
import re
import os
scraper = cloudscraper.create_scraper()
css = open("style.css", "r")
style = css.read()
css.close()
def scribble_book(toc, title, author, key, chapter_url, image, tags, description):
book = epub.EpubBook()
book.set_identifier(key)
book.set_title(title)
book.set_language('en')
book.add_author(author)
book.add_metadata('DC', 'publisher', 'Scribble Hub')
book.add_metadata('DC', 'identifier', 'url:' + toc)
book.add_metadata('DC', 'description', description)
book.add_metadata('DC', 'date', str(datetime.now().isoformat()))
for x in tags:
book.add_metadata('DC', 'subject', x)
book.add_metadata('DC', 'identifier', toc)
Image.open(scraper.get(image, stream = True).raw).save('image.png')
book.set_cover("image.png", open('image.png', 'rb').read())
page = BeautifulSoup(scraper.get(chapter_url).text, 'html5lib')
page_list = []
while True:
#for summary in page.find_all('div', class_=re.compile('^sp-head')):
#summary.name = 'summary'
#for details in page.find_all('div', class_=re.compile('sp-wrap')):
#details.name = 'details'
chapter = page.find('div', class_="chapter-title").text
content = page.find('div', id="chp_raw")
page_list.insert(0, [chapter, content])
print(chapter)
if page.find('a', title="Shortcut: [Ctrl] + [<-]")['href'] == '#':
break
chapter_url = page.find('a', class_="btn-wi btn-prev")['href']
page = BeautifulSoup(scraper.get(chapter_url).text, 'html5lib')
return create_epub(page_list, title, author, key, book, tags)
def create_epub(page_list, title, author, key, books, tags):
chapter = epub.EpubHtml(title='Table of Content', file_name='toc.xhtml', lang='en')
css = epub.EpubItem(uid="style", file_name="style/style.css", media_type="text/css", content=style)
books.add_item(css)
books.add_item(chapter)
for chapters in page_list:
chapter = epub.EpubHtml(
title=chapters[0], file_name=key+str(page_list.index(chapters)) + '.xhtml', lang='en')
chapter.add_item(css)
chapter.content = '<h1>'+chapters[0]+'</h1>' + str(chapters[1])
books.add_item(chapter)
books.spine.append(chapter)
books.toc.append(epub.Link(key+str(page_list.index(chapters)) + '.xhtml', chapters[0], key+str(page_list.index(chapters))))
books.spine.insert(0, 'nav')
books.spine.insert(0, 'cover')
books.add_item(epub.EpubNcx())
file_name = title.replace('?', '').replace(':', '') + " - " + author.replace('?', '').replace(':', '') + '.epub'
epub.write_epub(file_name, books)
os.system('ebook-meta "' + file_name + '" --to-opf metadata.opf')
book_id = subprocess.check_output('calibredb add -m overwrite "' + file_name + '" --library-path "http://localhost:8083/#Calibre_Library" --username TAL-05 --password 2CooL123', shell=True).decode("utf-8")
print(re.findall("\d+", book_id)[0])
os.system('calibredb set_metadata ' + re.findall("\d+", book_id)[0] + ' metadata.opf --library-path "http://localhost:8083/#Calibre_Library" --username TAL-05 --password 2CooL123')
os.remove(file_name)
os.remove("metadata.opf")
os.remove("image.png")
return ', '.join([item[0] for item in page_list])