diff --git a/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py b/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py index d53731e86..5a238acf9 100644 --- a/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py +++ b/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py @@ -2,6 +2,7 @@ import csv import tempfile +import uuid import zipfile from datetime import datetime from pathlib import Path @@ -20,6 +21,7 @@ from palace.manager.service.integration_registry.license_providers import ( LicenseProvidersRegistry, ) +from palace.manager.service.storage.s3 import S3Service from palace.manager.sqlalchemy.model.integration import ( IntegrationConfiguration, IntegrationLibraryConfiguration, @@ -71,14 +73,14 @@ def __init__( email_address: str, send_email: SendEmailCallable, registry: LicenseProvidersRegistry, - delete_attachments: bool = True, + s3_service: S3Service, ): super().__init__(session_maker) self.library_id = library_id self.email_address = email_address - self.delete_attachments = delete_attachments self.send_email = send_email self.registry = registry + self.s3_service = s3_service def run(self) -> None: with self.transaction() as session: @@ -113,9 +115,7 @@ def run(self) -> None: "integration_ids": tuple(integration_ids), } - with tempfile.NamedTemporaryFile( - delete=self.delete_attachments - ) as report_zip: + with tempfile.NamedTemporaryFile() as report_zip: zip_path = Path(report_zip.name) with ( @@ -148,13 +148,34 @@ def run(self) -> None: arcname=f"palace-inventory-report-for-library-{file_name_modifier}.csv", ) + reports_path = "inventory_and_holds" + expiration_in_days = 30 + self.s3_service.update_bucket_expiration_rule( + prefix=f"{reports_path}/", expiration_in_days=expiration_in_days + ) + + with zip_path.open( + "rb", + ) as binary_stream: + uid = uuid.uuid4() + key = ( + f"{reports_path}/{library.short_name}/" + f"inventory-and-holds-for-library-{file_name_modifier}-{uid}.zip" + ) + self.s3_service.store_stream( + key, + binary_stream, + content_type="application/zip", + ) + + s3_file_link = self.s3_service.generate_url(key) self.send_email( subject=f"Inventory and Holds Reports {current_time}", receivers=[self.email_address], - text="", - attachments={ - f"palace-inventory-and-holds-reports-for-{file_name_modifier}.zip": zip_path - }, + text=( + f"Download Report here -> {s3_file_link} \n\n" + f"This report will be available for download for {expiration_in_days} days." + ), ) self.log.debug(f"Zip file written to {zip_path}") @@ -331,4 +352,5 @@ def generate_inventory_and_hold_reports( email_address=email_address, send_email=task.services.email.send_email, registry=task.services.integration_registry.license_providers(), + s3_service=task.services.storage.public(), ).run() diff --git a/src/palace/manager/service/storage/s3.py b/src/palace/manager/service/storage/s3.py index d68941f21..9d9a5b508 100644 --- a/src/palace/manager/service/storage/s3.py +++ b/src/palace/manager/service/storage/s3.py @@ -244,3 +244,63 @@ def multipart_abort(self, key: str, upload_id: str) -> None: Key=key, UploadId=upload_id, ) + + def update_bucket_expiration_rule(self, prefix: str, expiration_in_days: int = 30): + """ + Update the expiration lifecycle policy rule if it exists and has changed. If + it does not already exist, add one. + """ + rule_name = f"expiration_on_{prefix}" + bucket = self.bucket + configuration = self.client.get_bucket_lifecycle_configuration(Bucket=bucket) + if not configuration: + configuration = {} + + if "Rules" not in configuration: + configuration["Rules"] = [] + + rules_list = configuration["Rules"] + for rule in rules_list: + if "ID" in rule and rule["ID"] == rule_name: + if ( + rule["Prefix"] == prefix + and rule["Expiration"]["Days"] == expiration_in_days + ): + self.log.info( + f"Expiration lifecycle rule {rule_name} has not changed for {bucket}: " + f"current value = {rule}. Ignoring." + ) + return + else: + # remove the old version of the rule from the list + rules_list.remove(rule) + break + + rules_list.append( + { + "Expiration": { + "Days": expiration_in_days, + }, + "ID": rule_name, + "Prefix": prefix, + "Filter": { + "Prefix": prefix, + }, + "Status": "Enabled", + } + ) + + try: + policy_status = self.client.put_bucket_lifecycle_configuration( + Bucket=bucket, LifecycleConfiguration=configuration + ) + + self.log.info( + f"Updated configuration for {bucket}: " + f"configuration={configuration}, " + f"status = {policy_status}" + ) + except ClientError as e: + self.log.error( + f"Unable to apply bucket policy to {self.bucket} . \nReason:{e}" + ) diff --git a/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py b/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py index c5496d646..af7d1f2ac 100644 --- a/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py +++ b/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py @@ -3,8 +3,8 @@ import os import zipfile from datetime import timedelta -from typing import IO -from unittest.mock import create_autospec +from typing import IO, BinaryIO +from unittest.mock import MagicMock, create_autospec from pytest import LogCaptureFixture from sqlalchemy.orm import sessionmaker @@ -40,12 +40,16 @@ def test_job_run( send_email_mock = create_autospec( services_fixture.services.email.container.send_email ) + + mock_s3 = MagicMock() + GenerateInventoryAndHoldsReportsJob( mock_session_maker, library_id=1, email_address=email, send_email=send_email_mock, registry=services_fixture.services.integration_registry.license_providers(), + s3_service=mock_s3, ).run() assert ( f"Cannot generate inventory and holds report for library (id=1): library not found." @@ -187,19 +191,32 @@ def test_job_run( library.id, email_address=email, send_email=send_email_mock, - delete_attachments=False, registry=services_fixture.services.integration_registry.license_providers(), + s3_service=mock_s3, ) + reports_zip = "test_zip" + + def store_stream_mock( + key: str, + stream: BinaryIO, + content_type: str | None = None, + ): + + with open(reports_zip, "wb") as file: + file.write(stream.read()) + + mock_s3.store_stream = store_stream_mock + job.run() + + mock_s3.update_bucket_expiration_rule.assert_called_once() + mock_s3.generate_url.assert_called_once() send_email_mock.assert_called_once() kwargs = send_email_mock.call_args.kwargs assert kwargs["receivers"] == [email] assert "Inventory and Holds Reports" in kwargs["subject"] - attachments: dict = kwargs["attachments"] - - assert len(attachments) == 1 - reports_zip = list(attachments.values())[0] + assert "This report will be available for download for 30 days." in kwargs["text"] try: with zipfile.ZipFile(reports_zip, mode="r") as archive: entry_list = archive.namelist() @@ -290,11 +307,21 @@ def test_generate_inventory_and_hold_reports_task( services_fixture: ServicesFixture, celery_fixture: CeleryFixture, ): + + mock_s3_service = MagicMock() + mock_s3_service.generate_url.return_value = "http://test" + services_fixture.services.storage.public.override(mock_s3_service) + library = db.library(short_name="test_library") # there must be at least one opds collection associated with the library for this to work create_test_opds_collection("c1", "d1", db, library) generate_inventory_and_hold_reports.delay(library.id, "test@email").wait() services_fixture.email_fixture.mock_emailer.send.assert_called_once() + + mock_s3_service.store_stream.assert_called_once() + mock_s3_service.update_bucket_expiration_rule.assert_called_once() + mock_s3_service.generate_url.assert_called_once() + assert ( "Inventory and Holds Reports" in services_fixture.email_fixture.mock_emailer.send.call_args.kwargs["subject"] @@ -302,3 +329,11 @@ def test_generate_inventory_and_hold_reports_task( assert services_fixture.email_fixture.mock_emailer.send.call_args.kwargs[ "receivers" ] == ["test@email"] + assert ( + "Download Report here -> http://test" + in services_fixture.email_fixture.mock_emailer.send.call_args.kwargs["text"] + ) + assert ( + "This report will be available for download for 30 days." + in services_fixture.email_fixture.mock_emailer.send.call_args.kwargs["text"] + ) diff --git a/tests/manager/service/storage/test_s3.py b/tests/manager/service/storage/test_s3.py index cce3aee37..bb07f3be8 100644 --- a/tests/manager/service/storage/test_s3.py +++ b/tests/manager/service/storage/test_s3.py @@ -225,6 +225,84 @@ def test_multipart_upload_exception(self, s3_service_fixture: S3ServiceFixture): with pytest.raises(RuntimeError): upload.upload_part(b"foo") + def _configuration(self, prefix, expiration): + return { + "Rules": [ + { + "Expiration": { + "Days": expiration, + }, + "ID": f"expiration_on_{prefix}", + "Prefix": prefix, + "Filter": {"Prefix": prefix}, + "Status": "Enabled", + } + ] + } + + def test_update_bucket_expiration_rule_not_previously_set( + self, s3_service_fixture: S3ServiceFixture + ): + service = s3_service_fixture.service() + prefix = "prefix/" + expiration_in_days = 10 + + s3_service_fixture.mock_s3_client.get_bucket_lifecycle_configuration.return_value = ( + None + ) + service.update_bucket_expiration_rule( + prefix=prefix, expiration_in_days=expiration_in_days + ) + s3_service_fixture.mock_s3_client.get_bucket_lifecycle_configuration.assert_called_once() + put_config_method = ( + s3_service_fixture.mock_s3_client.put_bucket_lifecycle_configuration + ) + put_config_method.assert_called_once_with( + Bucket="bucket", + LifecycleConfiguration=self._configuration(prefix, expiration_in_days), + ) + + def test_update_bucket_expiration_rule_previously_set_unchanged( + self, s3_service_fixture: S3ServiceFixture + ): + service = s3_service_fixture.service() + prefix = "prefix/" + expiration_in_days = 10 + + get_bucket_lifecycle_config = ( + s3_service_fixture.mock_s3_client.get_bucket_lifecycle_configuration + ) + get_bucket_lifecycle_config.return_value = self._configuration( + prefix, expiration_in_days + ) + service.update_bucket_expiration_rule( + prefix=prefix, expiration_in_days=expiration_in_days + ) + s3_service_fixture.mock_s3_client.get_bucket_lifecycle_configuration.assert_called_once() + s3_service_fixture.mock_s3_client.put_bucket_lifecycle_configuration.assert_not_called() + + def test_update_bucket_expiration_rule_previously_set_changed( + self, s3_service_fixture: S3ServiceFixture + ): + service = s3_service_fixture.service() + prefix = "prefix/" + expiration_in_days = 10 + get_lifecycle = ( + s3_service_fixture.mock_s3_client.get_bucket_lifecycle_configuration + ) + get_lifecycle.return_value = self._configuration(prefix, 12) + service.update_bucket_expiration_rule( + prefix=prefix, expiration_in_days=expiration_in_days + ) + s3_service_fixture.mock_s3_client.get_bucket_lifecycle_configuration.assert_called_once() + put_config_method = ( + s3_service_fixture.mock_s3_client.put_bucket_lifecycle_configuration + ) + put_config_method.assert_called_once_with( + Bucket="bucket", + LifecycleConfiguration=self._configuration(prefix, expiration_in_days), + ) + @pytest.mark.minio class TestS3ServiceIntegration: