Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise qualified exceptions instead of generic 'Exception' #103

Merged
merged 2 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions scraper/src/mindtouch2zim/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
from zimscraperlib.zim import Creator

from mindtouch2zim.constants import KNOWN_BAD_ASSETS_REGEX, logger, web_session
from mindtouch2zim.errors import KnownBadAssetFailedError
from mindtouch2zim.errors import (
KnownBadAssetFailedError,
S3CacheError,
S3InvalidCredentialsError,
)
from mindtouch2zim.utils import backoff_hdlr

SUPPORTED_IMAGE_MIME_TYPES = {
Expand Down Expand Up @@ -111,7 +115,7 @@
f"Exception while processing asset for {asset_url.value}: "
f"{exc}"
)
raise Exception( # noqa: B904
raise OSError( # noqa: B904

Check warning on line 118 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L118

Added line #L118 was not covered by tests
f"Asset failure threshold ({self.bad_assets_threshold}) "
"reached, stopping execution"
)
Expand Down Expand Up @@ -195,7 +199,7 @@
self, s3_key: str, meta: dict[str, str]
) -> BytesIO | None:
if not self.s3_storage:
raise Exception("s3 storage must be set")
raise AttributeError("s3 storage must be set")

Check warning on line 202 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L202

Added line #L202 was not covered by tests
try:
asset_content = BytesIO()
self.s3_storage.download_matching_fileobj( # pyright: ignore[reportUnknownMemberType]
Expand All @@ -205,19 +209,19 @@
except NotFoundError:
return None
except Exception as exc:
raise Exception(f"Failed to download {s3_key} from S3 cache") from exc
raise S3CacheError(f"Failed to download {s3_key} from S3 cache") from exc

Check warning on line 212 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L212

Added line #L212 was not covered by tests

def _upload_to_s3_cache(
self, s3_key: str, meta: dict[str, str], asset_content: BytesIO
):
if not self.s3_storage:
raise Exception("s3 storage must be set")
raise AttributeError("s3 storage must be set")

Check warning on line 218 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L218

Added line #L218 was not covered by tests
try:
self.s3_storage.upload_fileobj( # pyright: ignore[reportUnknownMemberType]
key=s3_key, fileobj=asset_content, meta=meta
)
except Exception as exc:
raise Exception(f"Failed to upload {s3_key} to S3 cache") from exc
raise S3CacheError(f"Failed to upload {s3_key} to S3 cache") from exc

Check warning on line 224 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L224

Added line #L224 was not covered by tests

def _download_from_online(self, asset_url: HttpUrl) -> BytesIO:
"""Download whole content from online server with retry from scraperlib"""
Expand Down Expand Up @@ -284,4 +288,4 @@
f" Key ID: {self.s3_storage.params.get('keyid')}" # pyright: ignore[reportUnknownMemberType]
)
logger.error(f" Public IP: {get_public_ip()}")
raise Exception("Invalid S3 credentials")
raise S3InvalidCredentialsError("Invalid S3 credentials")

Check warning on line 291 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L291

Added line #L291 was not covered by tests
9 changes: 3 additions & 6 deletions scraper/src/mindtouch2zim/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,10 @@
logger,
web_session,
)
from mindtouch2zim.errors import APITokenRetrievalError, MindtouchParsingError
from mindtouch2zim.html import get_soup


class MindtouchParsingError(Exception):
pass


class MindtouchHome(BaseModel):
home_url: str
welcome_text_paragraphs: list[str]
Expand Down Expand Up @@ -425,15 +422,15 @@
if not global_settings:
logger.debug("home content:")
logger.debug(soup)
raise Exception(
raise APITokenRetrievalError(

Check warning on line 425 in scraper/src/mindtouch2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/client.py#L425

Added line #L425 was not covered by tests
"Failed to retrieve API token to query website API, missing "
"mt-global-settings script"
)
x_deki_token = json.loads(global_settings.text).get("apiToken", None)
if not x_deki_token:
logger.debug("mt-global-settings script content:")
logger.debug(global_settings.text)
raise Exception(
raise APITokenRetrievalError(

Check warning on line 433 in scraper/src/mindtouch2zim/client.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/client.py#L433

Added line #L433 was not covered by tests
"Failed to retrieve API token to query website API, missing apiToken."
)
return x_deki_token
Expand Down
34 changes: 34 additions & 0 deletions scraper/src/mindtouch2zim/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,37 @@ class KnownBadAssetFailedError(Exception):
"""An exception raised when an asset known to be failing, failed as expected"""

pass


class VimeoThumbnailError(Exception):
"""Error raised when there is a problem with a vimeo video"""

pass


class GlossaryRewriteError(Exception):
"""Exception indicating a problem during glossary rewrite"""

pass


class S3InvalidCredentialsError(Exception):
"""Raised when S3 credentials are invalid"""

pass


class S3CacheError(Exception):
"""Raised when there is a problem with the S3 cache"""

pass


class MindtouchParsingError(Exception):
pass


class APITokenRetrievalError(Exception):
"""Exception raised when failing to retrieve API token to query website API"""

pass
6 changes: 3 additions & 3 deletions scraper/src/mindtouch2zim/html_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
if attr_name not in ("href", "src", "srcset") or not attr_value:
return
if not isinstance(url_rewriter, HtmlUrlsRewriter):
raise Exception("Expecting HtmlUrlsRewriter")
raise TypeError("Expecting instance of HtmlUrlsRewriter")

Check warning on line 41 in scraper/src/mindtouch2zim/html_rewriting.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/html_rewriting.py#L41

Added line #L41 was not covered by tests
new_attr_value = None
if tag in ["a", "area"]:
rewrite_result = url_rewriter(
Expand Down Expand Up @@ -77,7 +77,7 @@
if tag not in ["iframe"]:
return
if not isinstance(url_rewriter, HtmlUrlsRewriter):
raise Exception("Expecting HtmlUrlsRewriter")
raise TypeError("Expecting instance of HtmlUrlsRewriter")

Check warning on line 80 in scraper/src/mindtouch2zim/html_rewriting.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/html_rewriting.py#L80

Added line #L80 was not covered by tests
src = get_attr_value_from(attrs=attrs, name="src")
if not src:
logger.warning(f"Empty src found in iframe while rewriting {rewriting_context}")
Expand Down Expand Up @@ -185,7 +185,7 @@
if tag != "img":
return
if not isinstance(url_rewriter, HtmlUrlsRewriter):
raise Exception("Expecting HtmlUrlsRewriter")
raise TypeError("Expecting instance of HtmlUrlsRewriter")

Check warning on line 188 in scraper/src/mindtouch2zim/html_rewriting.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/html_rewriting.py#L188

Added line #L188 was not covered by tests
if not (srcset_value := get_attr_value_from(attrs, "srcset")):
# simple case, just need to rewrite the src
src_value = get_attr_value_from(attrs, "src")
Expand Down
6 changes: 1 addition & 5 deletions scraper/src/mindtouch2zim/libretexts/glossary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
from jinja2 import Template
from pydantic import BaseModel


class GlossaryRewriteError(Exception):
"""Exception indicating a problem during glossary rewrite"""

pass
from mindtouch2zim.errors import GlossaryRewriteError


class GlossaryEntry(BaseModel):
Expand Down
4 changes: 2 additions & 2 deletions scraper/src/mindtouch2zim/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@
except HTTPError as exc:
if exc.response.status_code == HTTPStatus.FORBIDDEN:
if page == selected_pages[0]:
raise Exception(
raise PermissionError(

Check warning on line 410 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L410

Added line #L410 was not covered by tests
"Root page is private, we cannot ZIM it, stopping"
) from None
logger.debug(f"Ignoring page {page.id} (private page)")
Expand All @@ -417,7 +417,7 @@
if len(private_pages) == len(selected_pages):
# we should never get here since we already check fail early if root
# page is private, but we are better safe than sorry
raise Exception("All pages have been ignored, not creating an empty ZIM")
raise OSError("All pages have been ignored, not creating an empty ZIM")

Check warning on line 420 in scraper/src/mindtouch2zim/processor.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/processor.py#L420

Added line #L420 was not covered by tests
del private_pages

logger.info(f" Retrieving {len(self.items_to_download)} assets...")
Expand Down
7 changes: 1 addition & 6 deletions scraper/src/mindtouch2zim/vimeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
logger,
web_session,
)


class VimeoThumbnailError(Exception):
"""Error raised when there is a problem with a vimeo video"""

pass
from mindtouch2zim.errors import VimeoThumbnailError


def get_vimeo_thumbnail_url(video_url: str) -> str:
Expand Down