Skip to content

Commit

Permalink
Fix linting issues
Browse files Browse the repository at this point in the history
  • Loading branch information
dipu-bd committed Jan 4, 2025
1 parent 4c6ec89 commit 827ebdd
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 40 deletions.
24 changes: 10 additions & 14 deletions scripts/index_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@
DATE_FORMAT = "%d %B %Y %I:%M:%S %p"

REPO_BRANCH = "master"
REPO_OWNER = 'dipu-bd'
REPO_NAME = 'lightnovel-crawler'
REPO_OWNER = "dipu-bd"
REPO_NAME = "lightnovel-crawler"
REPO_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}"
FILE_DOWNLOAD_URL = f"https://raw.githubusercontent.com/{REPO_OWNER}/{REPO_NAME}"
WHEEL_RELEASE_URL = f"{REPO_URL}/releases/download/v%s/lightnovel_crawler-%s-py3-none-any.whl"
WHEEL_RELEASE_URL = (
f"{REPO_URL}/releases/download/v%s/lightnovel_crawler-%s-py3-none-any.whl"
)

# Current git branch
try:
Expand Down Expand Up @@ -164,15 +166,9 @@ def git_history(file_path):
cmd = f'git log --follow --diff-filter=ACMT --pretty="%at||%aN||%aE||%s" "{file_path}"'
logs = subprocess.check_output(cmd, shell=True).decode("utf-8").strip()
logs = [
{
"time": int(x[0]),
"author": x[1],
"email": x[2],
"subject": x[3]
}
{"time": int(x[0]), "author": x[1], "email": x[2], "subject": x[3]}
for x in [
line.strip().split("||", maxsplit=4)
for line in logs.splitlines(False)
line.strip().split("||", maxsplit=4) for line in logs.splitlines(False)
]
]
return logs
Expand All @@ -197,7 +193,7 @@ def process_contributors(history):
username_cache[email] = author
contribs.add(author)
continue
if session.head(f'https://github.com/{author}/{REPO_NAME}').status_code == 200:
if session.head(f"https://github.com/{author}/{REPO_NAME}").status_code == 200:
username_cache[author] = author
username_cache[email] = author
contribs.add(author)
Expand Down Expand Up @@ -289,8 +285,8 @@ def process_file(py_file: Path) -> float:
print("%.3fs" % runtime)
if failures:
print("-" * 50)
print('\n'.join(failures))
print("\n".join(failures))

print("-" * 50)
print(
"%d crawlers." % len(INDEX_DATA["crawlers"]),
Expand Down
2 changes: 1 addition & 1 deletion sources/_examples/_12_chapter_only_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def parse_cover(self, soup: BeautifulSoup) -> str:
# The soup here is the result of `self.get_soup(self.novel_url)`
pass

# TODO: [OPTIONAL] Parse and return the novel author in the browser
# TODO: [OPTIONAL] Parse and return the novel author in the browser
def parse_authors_in_browser(self) -> Generator[str, None, None]:
# yield from self.parse_authors(self.browser.soup)
pass
Expand Down
2 changes: 1 addition & 1 deletion sources/en/8/888novel.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def search_novel(self, query):
"title": a.get("title"),
"url": a.get("href").strip(),
"info": self.cleaner.clean_text(
f"Author{'s' if len(author)>1 else ''} : {', '.join(author)}"
f"Author{'s' if len(author) > 1 else ''} : {', '.join(author)}"
),
}
)
Expand Down
55 changes: 31 additions & 24 deletions sources/en/f/faqwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,20 @@ def read_novel_info(self):
content = soup.select_one(".entry-content")

entry_title = soup.select_one("h1.entry-title")
assert isinstance(entry_title, Tag) # this must be here, is part of normal site structure/framework
assert isinstance(
entry_title, Tag
) # this must be here, is part of normal site structure/framework
self.novel_title = entry_title.text.strip()
# remove suffix from completed novels' title
if self.novel_title.endswith(" – All Chapters"):
self.novel_title = self.novel_title[0:self.novel_title.find(" – All Chapters")]
self.novel_title = self.novel_title[
0 : self.novel_title.find(" – All Chapters")
]
self.novel_author = "FaqWiki"
cover = content.select_one('.wp-block-image img')
cover = content.select_one(".wp-block-image img")
# is missing in some rarer cases
if cover:
src = str(cover['src'])
src = str(cover["src"])
# may be replaced with JS after load, in such case try and get the real img hidden in data-values
if src.startswith("data:"):
try:
Expand All @@ -46,7 +50,7 @@ def read_novel_info(self):
self.novel_cover = self.absolute_url(src)
# remove any optimized image size GET args from novel cover URL
if self.novel_cover and "?" in self.novel_cover:
self.novel_cover = self.novel_cover[0:self.novel_cover.find("?")]
self.novel_cover = self.novel_cover[0 : self.novel_cover.find("?")]

metadata_container = soup.select_one("div.book-review-block__meta-item-value")
keywords = {
Expand All @@ -55,27 +59,37 @@ def read_novel_info(self):
"genre": "Genre:",
"author": "Author(s):",
"status": "Status:",
"original_pub": "Original Publisher:"
"original_pub": "Original Publisher:",
}

if metadata_container:
metadata = metadata_container.text # doesn't have line breaks anyway so not splitting here
metadata = (
metadata_container.text
) # doesn't have line breaks anyway so not splitting here
pos_dict = {}
for key, sep in keywords.items():
pos_dict[key + "_start"] = metadata.find(sep)
pos_dict[key] = metadata.find(sep) + len(sep)

self.novel_synopsis = metadata[pos_dict["desc"]:pos_dict["alt_name_start"]].strip()
self.novel_tags = metadata[pos_dict["genre"]:pos_dict["author_start"]].strip().split(" ")
self.novel_author = metadata[pos_dict["author"]:pos_dict["status_start"]].strip()
self.novel_synopsis = metadata[
pos_dict["desc"] : pos_dict["alt_name_start"]
].strip()
self.novel_tags = (
metadata[pos_dict["genre"] : pos_dict["author_start"]]
.strip()
.split(" ")
)
self.novel_author = metadata[
pos_dict["author"] : pos_dict["status_start"]
].strip()

logger.info("Novel title: %s", self.novel_title)
logger.info("Novel synopsis: %s", self.novel_synopsis)
logger.info("Novel tags: %s", ",".join(self.novel_tags))
logger.info("Novel author: %s", self.novel_author)
logger.info("Novel cover: %s", self.novel_cover)

chap_list = soup.select_one('#lcp_instance_0').select("li>a")
chap_list = soup.select_one("#lcp_instance_0").select("li>a")

for idx, a in enumerate(chap_list):
if "chapter" not in a.text.lower():
Expand All @@ -84,11 +98,7 @@ def read_novel_info(self):
vol_id = 1 + len(self.chapters) // 100
vol_title = f"Volume {vol_id}"
if chap_id % 100 == 1:
self.volumes.append(
Volume(
id=vol_id,
title=vol_title
))
self.volumes.append(Volume(id=vol_id, title=vol_title))

# chapter name is only (sometimes) present in chapter page, not in overview
entry_title = f"Chapter {chap_id}"
Expand All @@ -99,7 +109,7 @@ def read_novel_info(self):
url=self.absolute_url(a["href"]),
title=entry_title,
volume=vol_id,
volume_title=vol_title
volume_title=vol_title,
),
)

Expand All @@ -116,7 +126,9 @@ def search_novel(self, query: str):
novel_selector = "article > div > header > h3.entry-title > a"
next_selector = "div.nav-links > a.next"

soup = self.get_soup(f"https://faqwiki.us/?s={query.replace(' ','+')}&post_type=page")
soup = self.get_soup(
f"https://faqwiki.us/?s={query.replace(' ', '+')}&post_type=page"
)
empty = "nothing found" in soup.select_one("h1.page-title").text.strip().lower()
if empty:
return []
Expand All @@ -137,10 +149,5 @@ def search_novel(self, query: str):
pass
# simple but at least won't taint results
if query.lower() in novel.text.lower():
results.append(
SearchResult(
title=novel.text,
url=novel["href"]
)
)
results.append(SearchResult(title=novel.text, url=novel["href"]))
return results

0 comments on commit 827ebdd

Please sign in to comment.