From 8943083cb57f97a5b08c11dd1dbaa47cd854e167 Mon Sep 17 00:00:00 2001 From: Daniel Bernstein Date: Wed, 13 Mar 2024 22:39:38 -0700 Subject: [PATCH] Add inventory report generation script. --- api/admin/controller/report.py | 14 +- api/admin/routes.py | 6 +- bin/generate_inventory_reports | 11 ++ core/model/asynctask.py | 50 ++++-- core/scripts.py | 202 ++++++++++++++++++++++ tests/api/admin/controller/test_report.py | 2 +- 6 files changed, 268 insertions(+), 17 deletions(-) create mode 100755 bin/generate_inventory_reports diff --git a/api/admin/controller/report.py b/api/admin/controller/report.py index 14ac0c6ba0..6d2d0c1393 100644 --- a/api/admin/controller/report.py +++ b/api/admin/controller/report.py @@ -1,24 +1,30 @@ +import json import logging +from dataclasses import asdict from http import HTTPStatus import flask from flask import Response from api.controller.circulation_manager import CirculationManagerController +from core.model import Library from core.model.admin import Admin -from core.model.asynctask import AsyncTaskType, queue_task +from core.model.asynctask import AsyncTaskType, InventoryReportTaskData, queue_task from core.util.problem_detail import ProblemDetail, ProblemDetailException class ReportController(CirculationManagerController): def generate_inventory_report(self) -> Response | ProblemDetail: log = logging.getLogger(self.__class__.__name__) + library: Library = getattr(flask.request, "library") admin: Admin = getattr(flask.request, "admin") try: email = admin.email - data = dict(admin_email=email, admin_id=admin.id) + data: InventoryReportTaskData = InventoryReportTaskData( + admin_email=email, admin_id=admin.id, library_id=library.id + ) task, is_new = queue_task( - self._db, task_type=AsyncTaskType.INVENTORY_REPORT, data=data + self._db, task_type=AsyncTaskType.INVENTORY_REPORT, data=asdict(data) ) self._db.commit() @@ -28,7 +34,7 @@ def generate_inventory_report(self) -> Response | ProblemDetail: ) http_status = HTTPStatus.ACCEPTED if is_new else HTTPStatus.CONFLICT - return Response(dict(message=msg), http_status) + return Response(json.dumps(dict(message=msg)), http_status) except ProblemDetailException as e: self._db.rollback() return e.problem_detail diff --git a/api/admin/routes.py b/api/admin/routes.py index 02b6356158..a2df2d32f9 100644 --- a/api/admin/routes.py +++ b/api/admin/routes.py @@ -702,7 +702,11 @@ def diagnostics(): return app.manager.timestamps_controller.diagnostics() -@app.route("/admin/reports/generate_inventory_report", methods=["POST"]) +@app.route( + "/admin/reports/generate_inventory_report/", + methods=["POST"], +) +@allows_library @returns_json_or_response_or_problem_detail @requires_admin def generate_inventory_report(): diff --git a/bin/generate_inventory_reports b/bin/generate_inventory_reports new file mode 100755 index 0000000000..c0dfe0db54 --- /dev/null +++ b/bin/generate_inventory_reports @@ -0,0 +1,11 @@ +#!/usr/bin/env python +"""Update the cached sizes of all custom lists.""" +import os +import sys + +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.scripts import GenerateInventoryReports + +GenerateInventoryReports().run() diff --git a/core/model/asynctask.py b/core/model/asynctask.py index 8e8486d3e4..7459a87456 100644 --- a/core/model/asynctask.py +++ b/core/model/asynctask.py @@ -1,44 +1,65 @@ # Async import datetime import json +import uuid from enum import Enum -from sqlalchemy import Column, DateTime, Integer, String -from sqlalchemy.dialects.postgresql import JSON +from pydantic.dataclasses import dataclass +from sqlalchemy import Column, DateTime, String +from sqlalchemy.dialects.postgresql import JSON, UUID from sqlalchemy.ext.mutable import MutableDict -from sqlalchemy.sql.functions import now from sqlalchemy.types import Enum as SqlAlchemyEnum from core.model import Base, create +from core.util.datetime_helpers import utc_now class AsyncTaskStatus(str, Enum): - READY = "ready" - PROCESSING = "processing" - SUCCESSFUL = "successful" - FAILED = "failed" + READY = "READY" + PROCESSING = "PROCESSING" + SUCCESS = "SUCCESS" + FAILURE = "FAILURE" class AsyncTaskType(Enum): - INVENTORY_REPORT = "inventory-report" + INVENTORY_REPORT = "INVENTORY_REPORT" class AsyncTask(Base): """An asynchronous task.""" __tablename__ = "asynctasks" - id = Column(Integer, primary_key=True) + id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) + created = Column( + DateTime(timezone=True), index=True, nullable=False, default=utc_now + ) task_type = Column(SqlAlchemyEnum(AsyncTaskType), index=True, nullable=False) status = Column(SqlAlchemyEnum(AsyncTaskStatus), index=True, nullable=False) - created = Column(DateTime, default=now(), nullable=False) processing_start_time = Column(DateTime, nullable=True) processing_end_time = Column(DateTime, nullable=True) - failure_details = Column(String, nullable=True) + status_details = Column(String, nullable=True) data = Column(MutableDict.as_mutable(JSON), default={}) def __repr__(self): return f"<{self.__class__.__name__}({repr(self.__dict__)})>" + def complete(self): + if self.status != AsyncTaskStatus.PROCESSING: + raise Exception( + "The task must be in the PROCESSING state in order to transition to a completion state" + ) + self.status = AsyncTaskStatus.SUCCESS + self.processing_end_time = datetime.datetime.now() + + def fail(self, failure_details: str): + if self.status != AsyncTaskStatus.PROCESSING: + raise Exception( + "The task must be in the PROCESSING state in order to transition to a completion state" + ) + self.status = AsyncTaskStatus.FAIL + self.processing_end_time = datetime.datetime.now() + self.status_details = failure_details + def start_next_task(_db, task_type: str) -> AsyncTask | None: """ @@ -87,3 +108,10 @@ def queue_task(_db, task_type, data: dict[str, str]) -> tuple[AsyncTask, bool]: return create( _db, AsyncTask, task_type=task_type, status=AsyncTaskStatus.READY, data=data ) + + +@dataclass +class InventoryReportTaskData: + admin_id: int + library_id: int + admin_email: str diff --git a/core/scripts.py b/core/scripts.py index dc548d607c..ebb302e4ff 100644 --- a/core/scripts.py +++ b/core/scripts.py @@ -1,10 +1,12 @@ import argparse +import csv import datetime import json import logging import os import random import sys +import tempfile import traceback import unicodedata import uuid @@ -24,6 +26,7 @@ from core.lane import Lane from core.metadata_layer import TimestampData from core.model import ( + Admin, BaseCoverageRecord, Collection, Contributor, @@ -46,6 +49,12 @@ get_one_or_create, production_session, ) +from core.model.asynctask import ( + AsyncTask, + AsyncTaskType, + InventoryReportTaskData, + start_next_task, +) from core.model.classification import Classification from core.model.devicetokens import DeviceToken, DeviceTokenTypes from core.model.listeners import site_configuration_has_changed @@ -2758,6 +2767,199 @@ def suppress_work(self, library: Library, identifier: Identifier) -> None: self._db.commit() +class GenerateInventoryReports(Script): + """Generate inventory reports from queued report tasks""" + + HEADER = [ + "title", + "author", + "isbn", + "other_identifier", + "language", + "genre", + "publisher", + "audience", + "format", + "library", + "collection", + "license_duration_in_days", + "license_expiration_date", + "initial_loan_count", + "consumed_loans", + "remaining_loans", + "allowed_concurrent_users", + "max_loan_duration_in_days", + "active_holds_for_library", + "active_loans_for_library", + "active_holds_for_collection", + "active_loans_for_collection", + ] + + DATA_SOURCES = [ + "Palace Marketplace", + "BiblioBoard", + "Palace Bookshelf", + "Unlimited Listens", + ] + + @classmethod + def arg_parser(cls, _db: Session | None) -> argparse.ArgumentParser: # type: ignore[override] + parser = argparse.ArgumentParser() + if _db is None: + raise ValueError("No database session provided.") + + return parser + + @classmethod + def parse_command_line( + cls, _db: Session | None = None, cmd_args: list[str] | None = None + ): + parser = cls.arg_parser(_db) + return parser.parse_known_args(cmd_args)[0] + + def load_admin(self, admin_id: int) -> Admin: + admin = self._db.query(Admin).filter(Admin.id == admin_id).first() + if not admin: + raise ValueError(f"Unknown Admin: id = {id}") + return admin + + def do_run(self, cmd_args: list[str] | None = None) -> None: + parsed = self.parse_command_line(self._db, cmd_args=cmd_args) + + while True: + task = start_next_task(self._db, AsyncTaskType.INVENTORY_REPORT) + if not task: + return + + self.process_task(task) + + def process_task(self, task: AsyncTask): + data = InventoryReportTaskData(**task.data) + + admin = self.load_admin(data.admin_id) + files = [] + try: + current_time = datetime.datetime.now() + date_str = current_time.strftime("%Y-%m-%d_%H:%M:%s") + attachments = {} + + for data_source_name in self.DATA_SOURCES: + prefix = f"palace-inventory-report-{data_source_name}-{date_str}" + suffix = ".csv" + with tempfile.NamedTemporaryFile( + "w", + delete=False, + prefix=prefix, + suffix=suffix, + ) as temp: + self.generate_report( + data_source_name=data_source_name, + library_id=data.library_id, + output_file=temp, + ) + + with open(temp.name) as temp: + attachments[f"{prefix}{suffix}"] = temp.read() + files.append(temp) + + self.services.email.send_email( + subject=f"Inventory Report {current_time}", + receivers=[data.admin_email], + text="", + attachments=attachments, + ) + task.complete() + except Exception as e: + # log error + self.log.error(f"Failed to process task: {task}", e) + task.fail(failure_details=f"{e}") + finally: + self._db.commit() + for file in files: + os.remove(file.name) + + def generate_report(self, data_source_name: str, library_id: int, output_file): + writer = csv.writer(output_file, delimiter=",") + rows = self._db.execute( + self.inventory_report_query(), + {"data_source_name": data_source_name, "library_id": library_id}, + ) + writer.writerow(rows.keys()) + writer.writerows(rows) + + def inventory_report_query(self) -> str: + return """select lp.id as license_pool_id, + e.title, + e.author, + i.identifier, + e.language, + e.publisher, + e.medium as format, + ic.name collection_name, + DATE_PART('day', l.expires::date) - DATE_PART('day',lp.availability_time::date) as license_duration_days, + l.expires license_expiration_date, + l.checkouts_available initial_loan_count, + (l.checkouts_available-l.checkouts_left) consumed_loans, + l.checkouts_left remaining_loans, + l.terms_concurrency allowed_concurrent_users, + coalesce(lib_holds.active_hold_count, 0) library_active_hold_count, + + coalesce(lib_loans.active_loan_count, 0) library_active_loan_count, + CASE WHEN collection_sharing.is_shared_collection THEN lp.patrons_in_hold_queue + ELSE -1 + END shared_hold_queue, + CASE WHEN collection_sharing.is_shared_collection THEN lp.licenses_reserved + ELSE -1 + END shared_hold_queue + from datasources d, + collections c, + integration_configurations ic, + integration_library_configurations il, + libraries lib, + editions e, + identifiers i, + (select ic.parent_id, + count(ic.parent_id) > 1 is_shared_collection + from integration_library_configurations ic, + integration_configurations i, + collections c + where c.integration_configuration_id = i.id and + i.id = ic.parent_id group by ic.parent_id) collection_sharing, + licensepools lp left outer join licenses l on lp.id = l.license_pool_id + left outer join (select h.license_pool_id, + p.library_id, + count(h.id) active_hold_count + from holds h, + patrons p, + libraries l + where p.id = h.patron_id and + p.library_id = l.id and + l.id = :library_id + group by p.library_id, h.license_pool_id) lib_holds on lp.id = lib_holds.license_pool_id + left outer join (select ln.license_pool_id, + p.library_id, + count(ln.id) active_loan_count + from loans ln, + patrons p, + libraries l + where p.id = ln.patron_id and + p.library_id = l.id and + l.id = :library_id + group by p.library_id, ln.license_pool_id) lib_loans on lp.id = lib_holds.license_pool_id + where lp.identifier_id = i.id and + e.primary_identifier_id = i.id and + d.id = e.data_source_id and + c.id = lp.collection_id and + c.integration_configuration_id = ic.id and + ic.id = il.parent_id and + ic.id = collection_sharing.parent_id and + il.library_id = lib.id and + d.name = :data_source_name and + lib.id = :library_id + order by title, author + """ + + class MockStdin: """Mock a list of identifiers passed in on standard input.""" diff --git a/tests/api/admin/controller/test_report.py b/tests/api/admin/controller/test_report.py index 92d404a339..4534593989 100644 --- a/tests/api/admin/controller/test_report.py +++ b/tests/api/admin/controller/test_report.py @@ -24,7 +24,7 @@ class TestReportController: def test_generate_inventory_report(self, report_fixture: ReportControllerFixture): ctrl = report_fixture.manager.admin_report_controller db = report_fixture.ctrl.db - + library = report_fixture.ctrl.db.default_library() system_admin, _ = create(db.session, Admin, email="admin@email.com") system_admin.add_role(AdminRole.SYSTEM_ADMIN) default = db.default_library()