From 827ebddd7f564618ae0e59a95348322802e6d972 Mon Sep 17 00:00:00 2001 From: dipu-bd Date: Sat, 4 Jan 2025 13:03:45 +0400 Subject: [PATCH] Fix linting issues --- scripts/index_gen.py | 24 ++++---- sources/_examples/_12_chapter_only_browser.py | 2 +- sources/en/8/888novel.py | 2 +- sources/en/f/faqwiki.py | 55 +++++++++++-------- 4 files changed, 43 insertions(+), 40 deletions(-) diff --git a/scripts/index_gen.py b/scripts/index_gen.py index eec164c79..70b59239a 100644 --- a/scripts/index_gen.py +++ b/scripts/index_gen.py @@ -50,11 +50,13 @@ DATE_FORMAT = "%d %B %Y %I:%M:%S %p" REPO_BRANCH = "master" -REPO_OWNER = 'dipu-bd' -REPO_NAME = 'lightnovel-crawler' +REPO_OWNER = "dipu-bd" +REPO_NAME = "lightnovel-crawler" REPO_URL = f"https://github.com/{REPO_OWNER}/{REPO_NAME}" FILE_DOWNLOAD_URL = f"https://raw.githubusercontent.com/{REPO_OWNER}/{REPO_NAME}" -WHEEL_RELEASE_URL = f"{REPO_URL}/releases/download/v%s/lightnovel_crawler-%s-py3-none-any.whl" +WHEEL_RELEASE_URL = ( + f"{REPO_URL}/releases/download/v%s/lightnovel_crawler-%s-py3-none-any.whl" +) # Current git branch try: @@ -164,15 +166,9 @@ def git_history(file_path): cmd = f'git log --follow --diff-filter=ACMT --pretty="%at||%aN||%aE||%s" "{file_path}"' logs = subprocess.check_output(cmd, shell=True).decode("utf-8").strip() logs = [ - { - "time": int(x[0]), - "author": x[1], - "email": x[2], - "subject": x[3] - } + {"time": int(x[0]), "author": x[1], "email": x[2], "subject": x[3]} for x in [ - line.strip().split("||", maxsplit=4) - for line in logs.splitlines(False) + line.strip().split("||", maxsplit=4) for line in logs.splitlines(False) ] ] return logs @@ -197,7 +193,7 @@ def process_contributors(history): username_cache[email] = author contribs.add(author) continue - if session.head(f'https://github.com/{author}/{REPO_NAME}').status_code == 200: + if session.head(f"https://github.com/{author}/{REPO_NAME}").status_code == 200: username_cache[author] = author username_cache[email] = author contribs.add(author) @@ -289,8 +285,8 @@ def process_file(py_file: Path) -> float: print("%.3fs" % runtime) if failures: print("-" * 50) - print('\n'.join(failures)) - + print("\n".join(failures)) + print("-" * 50) print( "%d crawlers." % len(INDEX_DATA["crawlers"]), diff --git a/sources/_examples/_12_chapter_only_browser.py b/sources/_examples/_12_chapter_only_browser.py index 9b8191cc2..fcf26d7f1 100644 --- a/sources/_examples/_12_chapter_only_browser.py +++ b/sources/_examples/_12_chapter_only_browser.py @@ -61,7 +61,7 @@ def parse_cover(self, soup: BeautifulSoup) -> str: # The soup here is the result of `self.get_soup(self.novel_url)` pass - # TODO: [OPTIONAL] Parse and return the novel author in the browser + # TODO: [OPTIONAL] Parse and return the novel author in the browser def parse_authors_in_browser(self) -> Generator[str, None, None]: # yield from self.parse_authors(self.browser.soup) pass diff --git a/sources/en/8/888novel.py b/sources/en/8/888novel.py index 5cb508739..08a219b7b 100644 --- a/sources/en/8/888novel.py +++ b/sources/en/8/888novel.py @@ -46,7 +46,7 @@ def search_novel(self, query): "title": a.get("title"), "url": a.get("href").strip(), "info": self.cleaner.clean_text( - f"Author{'s' if len(author)>1 else ''} : {', '.join(author)}" + f"Author{'s' if len(author) > 1 else ''} : {', '.join(author)}" ), } ) diff --git a/sources/en/f/faqwiki.py b/sources/en/f/faqwiki.py index ce0f9d313..3950b5f0f 100644 --- a/sources/en/f/faqwiki.py +++ b/sources/en/f/faqwiki.py @@ -27,16 +27,20 @@ def read_novel_info(self): content = soup.select_one(".entry-content") entry_title = soup.select_one("h1.entry-title") - assert isinstance(entry_title, Tag) # this must be here, is part of normal site structure/framework + assert isinstance( + entry_title, Tag + ) # this must be here, is part of normal site structure/framework self.novel_title = entry_title.text.strip() # remove suffix from completed novels' title if self.novel_title.endswith(" – All Chapters"): - self.novel_title = self.novel_title[0:self.novel_title.find(" – All Chapters")] + self.novel_title = self.novel_title[ + 0 : self.novel_title.find(" – All Chapters") + ] self.novel_author = "FaqWiki" - cover = content.select_one('.wp-block-image img') + cover = content.select_one(".wp-block-image img") # is missing in some rarer cases if cover: - src = str(cover['src']) + src = str(cover["src"]) # may be replaced with JS after load, in such case try and get the real img hidden in data-values if src.startswith("data:"): try: @@ -46,7 +50,7 @@ def read_novel_info(self): self.novel_cover = self.absolute_url(src) # remove any optimized image size GET args from novel cover URL if self.novel_cover and "?" in self.novel_cover: - self.novel_cover = self.novel_cover[0:self.novel_cover.find("?")] + self.novel_cover = self.novel_cover[0 : self.novel_cover.find("?")] metadata_container = soup.select_one("div.book-review-block__meta-item-value") keywords = { @@ -55,19 +59,29 @@ def read_novel_info(self): "genre": "Genre:", "author": "Author(s):", "status": "Status:", - "original_pub": "Original Publisher:" + "original_pub": "Original Publisher:", } if metadata_container: - metadata = metadata_container.text # doesn't have line breaks anyway so not splitting here + metadata = ( + metadata_container.text + ) # doesn't have line breaks anyway so not splitting here pos_dict = {} for key, sep in keywords.items(): pos_dict[key + "_start"] = metadata.find(sep) pos_dict[key] = metadata.find(sep) + len(sep) - self.novel_synopsis = metadata[pos_dict["desc"]:pos_dict["alt_name_start"]].strip() - self.novel_tags = metadata[pos_dict["genre"]:pos_dict["author_start"]].strip().split(" ") - self.novel_author = metadata[pos_dict["author"]:pos_dict["status_start"]].strip() + self.novel_synopsis = metadata[ + pos_dict["desc"] : pos_dict["alt_name_start"] + ].strip() + self.novel_tags = ( + metadata[pos_dict["genre"] : pos_dict["author_start"]] + .strip() + .split(" ") + ) + self.novel_author = metadata[ + pos_dict["author"] : pos_dict["status_start"] + ].strip() logger.info("Novel title: %s", self.novel_title) logger.info("Novel synopsis: %s", self.novel_synopsis) @@ -75,7 +89,7 @@ def read_novel_info(self): logger.info("Novel author: %s", self.novel_author) logger.info("Novel cover: %s", self.novel_cover) - chap_list = soup.select_one('#lcp_instance_0').select("li>a") + chap_list = soup.select_one("#lcp_instance_0").select("li>a") for idx, a in enumerate(chap_list): if "chapter" not in a.text.lower(): @@ -84,11 +98,7 @@ def read_novel_info(self): vol_id = 1 + len(self.chapters) // 100 vol_title = f"Volume {vol_id}" if chap_id % 100 == 1: - self.volumes.append( - Volume( - id=vol_id, - title=vol_title - )) + self.volumes.append(Volume(id=vol_id, title=vol_title)) # chapter name is only (sometimes) present in chapter page, not in overview entry_title = f"Chapter {chap_id}" @@ -99,7 +109,7 @@ def read_novel_info(self): url=self.absolute_url(a["href"]), title=entry_title, volume=vol_id, - volume_title=vol_title + volume_title=vol_title, ), ) @@ -116,7 +126,9 @@ def search_novel(self, query: str): novel_selector = "article > div > header > h3.entry-title > a" next_selector = "div.nav-links > a.next" - soup = self.get_soup(f"https://faqwiki.us/?s={query.replace(' ','+')}&post_type=page") + soup = self.get_soup( + f"https://faqwiki.us/?s={query.replace(' ', '+')}&post_type=page" + ) empty = "nothing found" in soup.select_one("h1.page-title").text.strip().lower() if empty: return [] @@ -137,10 +149,5 @@ def search_novel(self, query: str): pass # simple but at least won't taint results if query.lower() in novel.text.lower(): - results.append( - SearchResult( - title=novel.text, - url=novel["href"] - ) - ) + results.append(SearchResult(title=novel.text, url=novel["href"])) return results