From f3ad4c7c7dd30dda542abf9f858b41d2f6d78c04 Mon Sep 17 00:00:00 2001
From: dbernstein <dabylon@gmail.com>
Date: Fri, 3 May 2024 09:36:21 -0700
Subject: [PATCH] [PP-1216] Zip email reports (#1818)

Resolves:  https://ebce-lyrasis.atlassian.net/browse/PP-1216
---
 .../generate_inventory_and_hold_reports.py    | 171 ++++++++++--------
 ...est_generate_inventory_and_hold_reports.py | 123 +++++++------
 2 files changed, 167 insertions(+), 127 deletions(-)

diff --git a/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py b/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py
index 7259f4c4be..52c0235391 100644
--- a/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py
+++ b/src/palace/manager/celery/tasks/generate_inventory_and_hold_reports.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
 import csv
-import os
 import tempfile
+import zipfile
 from datetime import datetime
 from pathlib import Path
-from typing import Any
+from typing import IO, Any
 
 from celery import shared_task
 from sqlalchemy import not_, select, text
@@ -54,90 +54,111 @@ def run(self) -> None:
                 )
                 return
 
-            try:
-                current_time = datetime.now()
-                date_str = current_time.strftime("%Y-%m-%d_%H:%M:%s")
-                attachments: dict[str, Path] = {}
-
-                file_name_modifier = f"{library.short_name}-{date_str}"
+            self.log.info(
+                f"Starting inventory and holds report job for {library.name}({library.short_name})."
+            )
 
-                # resolve integrations
-                integrations = session.scalars(
-                    select(IntegrationConfiguration)
-                    .join(IntegrationLibraryConfiguration)
-                    .where(
-                        IntegrationLibraryConfiguration.library_id == self.library_id,
-                        IntegrationConfiguration.goal == Goals.LICENSE_GOAL,
-                        not_(
-                            IntegrationConfiguration.settings_dict.contains(
-                                {"include_in_inventory_report": False}
-                            )
-                        ),
-                    )
-                ).all()
-                registry = LicenseProvidersRegistry()
-                integration_ids: list[int] = []
-                for integration in integrations:
-                    settings = registry[integration.protocol].settings_load(integration)
-                    if not isinstance(settings, OPDSImporterSettings):
-                        continue
-                    integration_ids.append(integration.id)
+            current_time = datetime.now()
+            date_str = current_time.strftime("%Y-%m-%d_%H:%M:%s")
 
-                # generate inventory report csv file
-                sql_params: dict[str, Any] = {
-                    "library_id": library.id,
-                    "integration_ids": tuple(integration_ids),
-                }
+            file_name_modifier = f"{library.short_name}-{date_str}"
 
-                inventory_report_file_path = self.generate_inventory_report(
-                    session, sql_params=sql_params
+            # resolve integrations
+            integrations = session.scalars(
+                select(IntegrationConfiguration)
+                .join(IntegrationLibraryConfiguration)
+                .where(
+                    IntegrationLibraryConfiguration.library_id == self.library_id,
+                    IntegrationConfiguration.goal == Goals.LICENSE_GOAL,
+                    not_(
+                        IntegrationConfiguration.settings_dict.contains(
+                            {"include_in_inventory_report": False}
+                        )
+                    ),
                 )
+            ).all()
+            registry = LicenseProvidersRegistry()
+            integration_ids: list[int] = []
+            for integration in integrations:
+                settings = registry[integration.protocol].settings_load(integration)
+                if not isinstance(settings, OPDSImporterSettings):
+                    continue
+                integration_ids.append(integration.id)
 
-                # generate holds report csv file
-                holds_report_file_path = self.generate_holds_report(
-                    session, sql_params=sql_params
-                )
+            # generate inventory report csv file
+            sql_params: dict[str, Any] = {
+                "library_id": library.id,
+                "integration_ids": tuple(integration_ids),
+            }
 
-                attachments[f"palace-inventory-report-{file_name_modifier}.csv"] = Path(
-                    inventory_report_file_path
-                )
-                attachments[f"palace-holds-report-{file_name_modifier}.csv"] = Path(
-                    holds_report_file_path
-                )
+            with tempfile.NamedTemporaryFile(
+                delete=self.delete_attachments
+            ) as report_zip:
+                zip_path = Path(report_zip.name)
 
-                self.send_email(
-                    subject=f"Inventory and Holds Reports {current_time}",
-                    receivers=[self.email_address],
-                    text="",
-                    attachments=attachments,
-                )
-            finally:
-                if self.delete_attachments:
-                    for file_path in attachments.values():
-                        os.remove(file_path)
+                with (
+                    self.create_temp_file() as inventory_report_file,
+                    self.create_temp_file() as holds_report_file,
+                ):
+                    self.generate_csv_report(
+                        session,
+                        csv_file=inventory_report_file,
+                        sql_params=sql_params,
+                        query=self.inventory_report_query(),
+                    )
 
-    def generate_inventory_report(
-        self, _db: Session, sql_params: dict[str, Any]
-    ) -> str:
-        """Generate an inventory csv file and return the file path"""
-        return self.generate_csv_report(_db, sql_params, self.inventory_report_query())
+                    self.generate_csv_report(
+                        session,
+                        csv_file=holds_report_file,
+                        sql_params=sql_params,
+                        query=self.holds_report_query(),
+                    )
+
+                    with zipfile.ZipFile(
+                        zip_path, "w", zipfile.ZIP_DEFLATED
+                    ) as archive:
+                        archive.write(
+                            filename=holds_report_file.name,
+                            arcname=f"palace-holds-report-for-library-{file_name_modifier}.csv",
+                        )
+                        archive.write(
+                            filename=inventory_report_file.name,
+                            arcname=f"palace-inventory-report-for-library-{file_name_modifier}.csv",
+                        )
+
+                    self.send_email(
+                        subject=f"Inventory and Holds Reports {current_time}",
+                        receivers=[self.email_address],
+                        text="",
+                        attachments={
+                            f"palace-inventory-and-holds-reports-for-{file_name_modifier}.zip": zip_path
+                        },
+                    )
 
-    def generate_holds_report(self, _db: Session, sql_params: dict[str, Any]) -> str:
-        """Generate a holds report csv file and return the file path"""
-        return self.generate_csv_report(_db, sql_params, self.holds_report_query())
+                    self.log.debug(f"Zip file written to {zip_path}")
+                    self.log.info(
+                        f"Emailed inventory and holds reports for {library.name}({library.short_name})."
+                    )
+
+    def create_temp_file(self) -> IO[str]:
+        return tempfile.NamedTemporaryFile("w", encoding="utf-8")
 
     def generate_csv_report(
-        self, _db: Session, sql_params: dict[str, Any], query: str
-    ) -> str:
-        with tempfile.NamedTemporaryFile("w", delete=False, encoding="utf-8") as temp:
-            writer = csv.writer(temp, delimiter=",")
-            rows = _db.execute(
-                text(query),
-                sql_params,
-            )
-            writer.writerow(rows.keys())
-            writer.writerows(rows)
-        return temp.name
+        self,
+        _db: Session,
+        csv_file: IO[str],
+        sql_params: dict[str, Any],
+        query: str,
+    ) -> None:
+        writer = csv.writer(csv_file, delimiter=",")
+        rows = _db.execute(
+            text(query),
+            sql_params,
+        )
+        writer.writerow(rows.keys())
+        writer.writerows(rows)
+        csv_file.flush()
+        self.log.debug(f"report written to {csv_file.name}")
 
     @staticmethod
     def inventory_report_query() -> str:
diff --git a/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py b/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py
index a3b085a9c7..555f827df1 100644
--- a/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py
+++ b/tests/manager/celery/tasks/test_generate_inventory_and_hold_reports.py
@@ -1,6 +1,9 @@
 import csv
+import io
 import os
+import zipfile
 from datetime import timedelta
+from typing import IO
 from unittest.mock import create_autospec
 
 from pytest import LogCaptureFixture
@@ -199,58 +202,74 @@ def test_job_run(
     assert "Inventory and Holds Reports" in kwargs["subject"]
     attachments: dict = kwargs["attachments"]
 
-    assert len(attachments) == 2
-    inventory_report_key = [x for x in attachments.keys() if "inventory" in x][0]
-    assert inventory_report_key
-    assert "test_library" in inventory_report_key
-    inventory_report_value = attachments[inventory_report_key]
-    assert inventory_report_value
-    inventory_report_csv = list(csv.DictReader(open(inventory_report_value)))
-
-    assert len(inventory_report_csv) == 1
-    for row in inventory_report_csv:
-        assert row["title"] == title
-        assert row["author"] == author
-        assert row["identifier"]
-        assert row["language"] == language
-        assert row["publisher"] == publisher
-        assert row["audience"] == "young adult"
-        assert row["genres"] == "genre_a,genre_z"
-        assert row["format"] == edition.BOOK_MEDIUM
-        assert row["data_source"] == data_source
-        assert row["collection_name"] == collection_name
-        assert float(row["days_remaining_on_license"]) == float(days_remaining)
-        assert row["shared_active_loan_count"] == "0"
-        assert row["library_active_loan_count"] == "0"
-        assert row["remaining_loans"] == str(checkouts_left)
-        assert row["allowed_concurrent_users"] == str(terms_concurrency)
-        assert expiration.strftime("%Y-%m-%d %H:%M:%S.%f") in row["license_expiration"]
-
-    holds_report_key = [x for x in attachments.keys() if "holds" in x][0]
-    assert holds_report_key
-    assert "test_library" in holds_report_key
-    holds_report_value = attachments[holds_report_key]
-    assert holds_report_value
-    holds_report_csv = list(csv.DictReader(open(holds_report_value)))
-    assert len(holds_report_csv) == 1
-
-    for row in holds_report_csv:
-        assert row["title"] == title
-        assert row["author"] == author
-        assert row["identifier"]
-        assert row["language"] == language
-        assert row["publisher"] == publisher
-        assert row["audience"] == "young adult"
-        assert row["genres"] == "genre_a,genre_z"
-        assert row["format"] == edition.BOOK_MEDIUM
-        assert row["data_source"] == data_source
-        assert row["collection_name"] == collection_name
-        assert int(row["shared_active_hold_count"]) == shared_patrons_in_hold_queue
-        assert int(row["library_active_hold_count"]) == 3
-
-    # clean up files
-    for f in attachments.values():
-        os.remove(f)
+    assert len(attachments) == 1
+    reports_zip = list(attachments.values())[0]
+    try:
+        with zipfile.ZipFile(reports_zip, mode="r") as archive:
+            entry_list = archive.namelist()
+            assert len(entry_list) == 2
+            with (
+                archive.open(entry_list[0]) as holds_report_zip_entry,
+                archive.open(entry_list[1]) as inventory_report_zip_entry,
+            ):
+                assert inventory_report_zip_entry
+                assert "test_library" in inventory_report_zip_entry.name
+                inventory_report_csv = zip_csv_entry_to_dict(inventory_report_zip_entry)
+
+                assert len(inventory_report_csv) == 1
+                for row in inventory_report_csv:
+                    assert row["title"] == title
+                    assert row["author"] == author
+                    assert row["identifier"]
+                    assert row["language"] == language
+                    assert row["publisher"] == publisher
+                    assert row["audience"] == "young adult"
+                    assert row["genres"] == "genre_a,genre_z"
+                    assert row["format"] == edition.BOOK_MEDIUM
+                    assert row["data_source"] == data_source
+                    assert row["collection_name"] == collection_name
+                    assert float(row["days_remaining_on_license"]) == float(
+                        days_remaining
+                    )
+                    assert row["shared_active_loan_count"] == "0"
+                    assert row["library_active_loan_count"] == "0"
+                    assert row["remaining_loans"] == str(checkouts_left)
+                    assert row["allowed_concurrent_users"] == str(terms_concurrency)
+                    assert (
+                        expiration.strftime("%Y-%m-%d %H:%M:%S.%f")
+                        in row["license_expiration"]
+                    )
+
+                assert holds_report_zip_entry
+                assert "test_library" in holds_report_zip_entry.name
+                assert holds_report_zip_entry
+                holds_report_csv = zip_csv_entry_to_dict(holds_report_zip_entry)
+                assert len(holds_report_csv) == 1
+
+                for row in holds_report_csv:
+                    assert row["title"] == title
+                    assert row["author"] == author
+                    assert row["identifier"]
+                    assert row["language"] == language
+                    assert row["publisher"] == publisher
+                    assert row["audience"] == "young adult"
+                    assert row["genres"] == "genre_a,genre_z"
+                    assert row["format"] == edition.BOOK_MEDIUM
+                    assert row["data_source"] == data_source
+                    assert row["collection_name"] == collection_name
+                    assert (
+                        int(row["shared_active_hold_count"])
+                        == shared_patrons_in_hold_queue
+                    )
+                    assert int(row["library_active_hold_count"]) == 3
+    finally:
+        os.remove(reports_zip)
+
+
+def zip_csv_entry_to_dict(zip_entry: IO[bytes]):
+    wrapper = io.TextIOWrapper(zip_entry, encoding="UTF-8")
+    csv_dict = list(csv.DictReader(wrapper))
+    return csv_dict
 
 
 def create_test_opds_collection(