From 86726f7f89938c287d4e96a520f18796af6bf1a7 Mon Sep 17 00:00:00 2001 From: jourdelune Date: Tue, 23 Jul 2024 13:16:43 +0200 Subject: [PATCH] [update] change order of robots.txt cache --- src/robots.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/robots.py b/src/robots.py index 05d4034..86447f8 100644 --- a/src/robots.py +++ b/src/robots.py @@ -52,11 +52,11 @@ async def __call__(self, url: str, log: logging.Logger = None) -> bool: if agent in agents_on_site: authorize = self._robots[robots_url].can_fetch(url, agent) - if len(self._robots) > 1000: - older_keys = list(self._robots.keys())[-1] + if len(self._robots) >= 3: + older_keys = list(self._robots.keys())[0] self._robots.pop(older_keys) if log is not None: - log.info(f"Removing robots.txt for {robots_url}") + log.info(f"Removing robots.txt of {older_keys}") return authorize