Skip to content

Commit

Permalink
Raise qualified exceptions instead of generic 'Exception'
Browse files Browse the repository at this point in the history
  • Loading branch information
Gulin7 committed Nov 27, 2024
1 parent 92fafb1 commit 5134301
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 13 deletions.
24 changes: 18 additions & 6 deletions scraper/src/mindtouch2zim/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@
WEBP_OPTIONS = WebpMedium().options


class S3InvalidCredentialsError(Exception):
"""Raised when S3 credentials are invalid"""

pass


class S3CacheError(Exception):
"""Raised when there is a problem with the S3 cache"""

pass


class HeaderData(NamedTuple):
ident: str # ~version~ of the URL data to use for comparisons
content_type: str | None
Expand Down Expand Up @@ -111,7 +123,7 @@ def _process_asset_internal(
f"Exception while processing asset for {asset_url.value}: "
f"{exc}"
)
raise Exception( # noqa: B904
raise OSError( # noqa: B904

Check warning on line 126 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L126

Added line #L126 was not covered by tests
f"Asset failure threshold ({self.bad_assets_threshold}) "
"reached, stopping execution"
)
Expand Down Expand Up @@ -195,7 +207,7 @@ def _download_from_s3_cache(
self, s3_key: str, meta: dict[str, str]
) -> BytesIO | None:
if not self.s3_storage:
raise Exception("s3 storage must be set")
raise AttributeError("s3 storage must be set")

Check warning on line 210 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L210

Added line #L210 was not covered by tests
try:
asset_content = BytesIO()
self.s3_storage.download_matching_fileobj( # pyright: ignore[reportUnknownMemberType]
Expand All @@ -205,19 +217,19 @@ def _download_from_s3_cache(
except NotFoundError:
return None
except Exception as exc:
raise Exception(f"Failed to download {s3_key} from S3 cache") from exc
raise S3CacheError(f"Failed to download {s3_key} from S3 cache") from exc

Check warning on line 220 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L220

Added line #L220 was not covered by tests

def _upload_to_s3_cache(
self, s3_key: str, meta: dict[str, str], asset_content: BytesIO
):
if not self.s3_storage:
raise Exception("s3 storage must be set")
raise AttributeError("s3 storage must be set")

Check warning on line 226 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L226

Added line #L226 was not covered by tests
try:
self.s3_storage.upload_fileobj( # pyright: ignore[reportUnknownMemberType]
key=s3_key, fileobj=asset_content, meta=meta
)
except Exception as exc:
raise Exception(f"Failed to upload {s3_key} to S3 cache") from exc
raise S3CacheError(f"Failed to upload {s3_key} to S3 cache") from exc

Check warning on line 232 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L232

Added line #L232 was not covered by tests

def _download_from_online(self, asset_url: HttpUrl) -> BytesIO:
"""Download whole content from online server with retry from scraperlib"""
Expand Down Expand Up @@ -284,4 +296,4 @@ def _setup_s3(self):
f" Key ID: {self.s3_storage.params.get('keyid')}" # pyright: ignore[reportUnknownMemberType]
)
logger.error(f" Public IP: {get_public_ip()}")
raise Exception("Invalid S3 credentials")
raise S3InvalidCredentialsError("Invalid S3 credentials")

Check warning on line 299 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L299

Added line #L299 was not covered by tests
10 changes: 8 additions & 2 deletions scraper/src/mindtouch2zim/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ class MindtouchParsingError(Exception):
pass


class APITokenRetrievalError(Exception):
"""Exception raised when failing to retrieve API token to query website API"""

pass


class MindtouchHome(BaseModel):
home_url: str
welcome_text_paragraphs: list[str]
Expand Down Expand Up @@ -425,15 +431,15 @@ def _get_deki_token_from_home(soup: BeautifulSoup) -> str:
if not global_settings:
logger.debug("home content:")
logger.debug(soup)
raise Exception(
raise APITokenRetrievalError(

Check warning on line 434 in scraper/src/mindtouch2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/client.py#L434

Added line #L434 was not covered by tests
"Failed to retrieve API token to query website API, missing "
"mt-global-settings script"
)
x_deki_token = json.loads(global_settings.text).get("apiToken", None)
if not x_deki_token:
logger.debug("mt-global-settings script content:")
logger.debug(global_settings.text)
raise Exception(
raise APITokenRetrievalError(

Check warning on line 442 in scraper/src/mindtouch2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/client.py#L442

Added line #L442 was not covered by tests
"Failed to retrieve API token to query website API, missing apiToken."
)
return x_deki_token
Expand Down
6 changes: 3 additions & 3 deletions scraper/src/mindtouch2zim/html_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def rewrite_href_src_srcset_attributes(
if attr_name not in ("href", "src", "srcset") or not attr_value:
return
if not isinstance(url_rewriter, HtmlUrlsRewriter):
raise Exception("Expecting HtmlUrlsRewriter")
raise TypeError("Expecting instance of HtmlUrlsRewriter")

Check warning on line 41 in scraper/src/mindtouch2zim/html_rewriting.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/html_rewriting.py#L41

Added line #L41 was not covered by tests
new_attr_value = None
if tag in ["a", "area"]:
rewrite_result = url_rewriter(
Expand Down Expand Up @@ -77,7 +77,7 @@ def rewrite_iframe_tags(
if tag not in ["iframe"]:
return
if not isinstance(url_rewriter, HtmlUrlsRewriter):
raise Exception("Expecting HtmlUrlsRewriter")
raise TypeError("Expecting instance of HtmlUrlsRewriter")

Check warning on line 80 in scraper/src/mindtouch2zim/html_rewriting.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/html_rewriting.py#L80

Added line #L80 was not covered by tests
src = get_attr_value_from(attrs=attrs, name="src")
if not src:
logger.warning(f"Empty src found in iframe while rewriting {rewriting_context}")
Expand Down Expand Up @@ -185,7 +185,7 @@ def rewrite_img_tags(
if tag != "img":
return
if not isinstance(url_rewriter, HtmlUrlsRewriter):
raise Exception("Expecting HtmlUrlsRewriter")
raise TypeError("Expecting instance of HtmlUrlsRewriter")

Check warning on line 188 in scraper/src/mindtouch2zim/html_rewriting.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/html_rewriting.py#L188

Added line #L188 was not covered by tests
if not (srcset_value := get_attr_value_from(attrs, "srcset")):
# simple case, just need to rewrite the src
src_value = get_attr_value_from(attrs, "src")
Expand Down
4 changes: 2 additions & 2 deletions scraper/src/mindtouch2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def run_with_creator(self, creator: Creator):
except HTTPError as exc:
if exc.response.status_code == HTTPStatus.FORBIDDEN:
if page == selected_pages[0]:
raise Exception(
raise PermissionError(

Check warning on line 410 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L410

Added line #L410 was not covered by tests
"Root page is private, we cannot ZIM it, stopping"
) from None
logger.debug(f"Ignoring page {page.id} (private page)")
Expand All @@ -417,7 +417,7 @@ def run_with_creator(self, creator: Creator):
if len(private_pages) == len(selected_pages):
# we should never get here since we already check fail early if root
# page is private, but we are better safe than sorry
raise Exception("All pages have been ignored, not creating an empty ZIM")
raise OSError("All pages have been ignored, not creating an empty ZIM")

Check warning on line 420 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L420

Added line #L420 was not covered by tests
del private_pages

logger.info(f" Retrieving {len(self.items_to_download)} assets...")
Expand Down

0 comments on commit 5134301

Please sign in to comment.