Skip to content

Commit

Permalink
Limit image size to put memory consumption back under control
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Nov 29, 2024
1 parent bf62de2 commit 0ba35d5
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 3 deletions.
29 changes: 26 additions & 3 deletions scraper/src/mindtouch2zim/asset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import math
import threading
from functools import partial
from io import BytesIO
Expand All @@ -8,6 +9,7 @@
from pif import get_public_ip
from PIL import Image
from requests.exceptions import RequestException
from resizeimage import resizeimage
from zimscraperlib.image.optimization import optimize_webp
from zimscraperlib.image.presets import WebpMedium
from zimscraperlib.rewriting.url_rewriting import HttpUrl, ZimPath
Expand Down Expand Up @@ -159,9 +161,30 @@ def _get_image_content(

logger.debug("Optimizing")
optimized = BytesIO()
with Image.open(unoptimized) as img:
img.save(optimized, format="WEBP")
del unoptimized
with Image.open(unoptimized) as image:
if image.width * image.height <= CONTEXT.maximum_image_pixels:
image.save(optimized, format="WEBP")

Check warning on line 166 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L166

Added line #L166 was not covered by tests
else:
resizeimage.resize_cover(

Check warning on line 168 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L168

Added line #L168 was not covered by tests
image,
[
int(
math.sqrt(
CONTEXT.maximum_image_pixels
* image.width
/ image.height
)
),
int(
math.sqrt(
CONTEXT.maximum_image_pixels
* image.height
/ image.width
)
),
],
).save(optimized, format="WEBP")
del unoptimized

Check warning on line 187 in scraper/src/mindtouch2zim/asset.py

View check run for this annotation

Codecov / codecov/patch

scraper/src/mindtouch2zim/asset.py#L187

Added line #L187 was not covered by tests

optimize_webp(
src=optimized,
Expand Down
3 changes: 3 additions & 0 deletions scraper/src/mindtouch2zim/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class Context:
# URL to Mindtouch instance
library_url: str

# Maximum number of pixels of images that will be pushed to the ZIM
maximum_image_pixels: int = 1280 * 720

def __init__(self) -> None:
if path := os.getenv("MINDTOUCH_TMP"):
self.tmp_folder = Path(path)
Expand Down

0 comments on commit 0ba35d5

Please sign in to comment.