Skip to content

Commit

Permalink
[update] improve code (pylint)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jourdelune committed Jul 23, 2024
1 parent 331717d commit d4b9017
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 8 deletions.
6 changes: 1 addition & 5 deletions src/robots.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Class to respect robot.txt file
"""

import asyncio
import logging
import urllib.parse

Expand Down Expand Up @@ -48,10 +47,7 @@ async def __call__(self, url: str, log: logging.Logger = None) -> bool:

authorize = authorize = self._robots[robots_url].can_fetch(url, "*")
for agent in self._user_agent:
agents_on_site = [
agent_on_site
for agent_on_site in self._robots[robots_url]._user_agents.keys()
]
agents_on_site = list(self._robots[robots_url]._user_agents.keys())

if agent in agents_on_site:
authorize = self._robots[robots_url].can_fetch(url, agent)
Expand Down
4 changes: 2 additions & 2 deletions src/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

router = Router[BeautifulSoupCrawlingContext]()
robots_parser = RobotTXT()
regex = r"(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()!@:%_\+.~#?&\/\/=]*)\.(mp3|wav|ogg)"
REGEX = r"(https?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()!@:%_\+.~#?&\/\/=]*)\.(mp3|wav|ogg)"


@router.default_handler
Expand All @@ -29,7 +29,7 @@ async def default_handler(context: BeautifulSoupCrawlingContext) -> None:
url = context.request.url
html_page = str(context.soup).replace(r"\/", "/")

matches = re.finditer(regex, html_page)
matches = re.finditer(REGEX, html_page)

# get all audios links
audio_links = [html_page[match.start() : match.end()] for match in matches]
Expand Down
4 changes: 3 additions & 1 deletion src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ def process(file_name: str) -> None:
Args:
file_name (str): the file name to process
"""
data = json.load(open(file_name, encoding="utf-8"))

with open(file_name, encoding="utf-8") as file:
data = json.load(file)

unique_urls = set()
unique_data = []
Expand Down

0 comments on commit d4b9017

Please sign in to comment.