From 1ffa3e81ea56b7e7eb92ac6abe0762028bf4de0d Mon Sep 17 00:00:00 2001 From: Tim DiLauro Date: Thu, 21 Mar 2024 17:37:51 -0400 Subject: [PATCH] Playback time tables are independent of related tables. (PP-1044) (#1739) * Minute-level timestamp. * Playtime accounting doesn't need related collection, library, identifier rows. --- ...56_playtime_tables_have_no_dependencies.py | 293 +++++++++++++ core/jobs/playtime_entries.py | 194 +++++---- core/model/identifier.py | 40 +- core/model/time_tracking.py | 172 ++++++-- core/query/playtime_entries.py | 3 + core/util/datetime_helpers.py | 9 + tests/api/controller/test_playtime_entries.py | 7 +- tests/core/jobs/test_playtime_entries.py | 412 +++++++++++++----- tests/core/models/test_time_tracking.py | 109 ++++- tests/core/util/test_datetime_helpers.py | 34 ++ 10 files changed, 1019 insertions(+), 254 deletions(-) create mode 100644 alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py diff --git a/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py b/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py new file mode 100644 index 0000000000..d4099bd68f --- /dev/null +++ b/alembic/versions/20240318_3e43ed59f256_playtime_tables_have_no_dependencies.py @@ -0,0 +1,293 @@ +"""Playtime tables have no dependencies. + +Revision ID: 3e43ed59f256 +Revises: 9d2dccb0d6ff +Create Date: 2024-03-18 01:34:28.381129+00:00 + +""" +from functools import cache + +import sqlalchemy as sa +from sqlalchemy.engine import Connection +from sqlalchemy.orm.session import Session + +from alembic import op +from core.model import Identifier, get_one +from core.model.time_tracking import _isbn_for_identifier, _title_for_identifier + +# revision identifiers, used by Alembic. +revision = "3e43ed59f256" +down_revision = "9d2dccb0d6ff" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + session = Session(bind=op.get_bind()) + conn = session.connection() + + op.add_column( + "playtime_entries", sa.Column("identifier_str", sa.String(), nullable=True) + ) + op.add_column( + "playtime_entries", sa.Column("collection_name", sa.String(), nullable=True) + ) + op.add_column( + "playtime_entries", sa.Column("library_name", sa.String(), nullable=True) + ) + + # Migrate the existing playtime records before we set the new columns to not nullable. + update_playtime_entries(conn) + + op.alter_column( + "playtime_entries", "identifier_str", existing_type=sa.String(), nullable=False + ) + op.alter_column( + "playtime_entries", "collection_name", existing_type=sa.String(), nullable=False + ) + op.alter_column( + "playtime_entries", "library_name", existing_type=sa.String(), nullable=False + ) + + op.alter_column( + "playtime_entries", "identifier_id", existing_type=sa.INTEGER(), nullable=True + ) + op.alter_column( + "playtime_entries", "collection_id", existing_type=sa.INTEGER(), nullable=True + ) + op.alter_column( + "playtime_entries", "library_id", existing_type=sa.INTEGER(), nullable=True + ) + op.drop_constraint( + "playtime_entries_identifier_id_collection_id_library_id_tra_key", + "playtime_entries", + type_="unique", + ) + op.drop_constraint( + "playtime_entries_collection_id_fkey", "playtime_entries", type_="foreignkey" + ) + op.drop_constraint( + "playtime_entries_identifier_id_fkey", "playtime_entries", type_="foreignkey" + ) + op.drop_constraint( + "playtime_entries_library_id_fkey", "playtime_entries", type_="foreignkey" + ) + + op.create_unique_constraint( + "unique_playtime_entry", + "playtime_entries", + ["tracking_id", "identifier_str", "collection_name", "library_name"], + ) + op.create_foreign_key( + "playtime_entries_identifier_id_fkey", + "playtime_entries", + "identifiers", + ["identifier_id"], + ["id"], + onupdate="CASCADE", + ondelete="SET NULL", + ) + op.create_foreign_key( + "playtime_entries_collection_id_fkey", + "playtime_entries", + "collections", + ["collection_id"], + ["id"], + onupdate="CASCADE", + ondelete="SET NULL", + ) + op.create_foreign_key( + "playtime_entries_library_id_fkey", + "playtime_entries", + "libraries", + ["library_id"], + ["id"], + onupdate="CASCADE", + ondelete="SET NULL", + ) + + op.add_column("playtime_summaries", sa.Column("title", sa.String(), nullable=True)) + op.add_column("playtime_summaries", sa.Column("isbn", sa.String(), nullable=True)) + op.alter_column( + "playtime_summaries", "collection_id", existing_type=sa.INTEGER(), nullable=True + ) + op.alter_column( + "playtime_summaries", "library_id", existing_type=sa.INTEGER(), nullable=True + ) + op.drop_constraint( + "playtime_summaries_identifier_str_collection_name_library_n_key", + "playtime_summaries", + type_="unique", + ) + op.create_unique_constraint( + "unique_playtime_summary", + "playtime_summaries", + ["timestamp", "identifier_str", "collection_name", "library_name"], + ) + + # Update ISBN, where available, and title in summary table. + update_summary_isbn_and_title(session) + + +def downgrade() -> None: + op.drop_constraint("unique_playtime_summary", "playtime_summaries", type_="unique") + op.create_unique_constraint( + "playtime_summaries_identifier_str_collection_name_library_n_key", + "playtime_summaries", + ["identifier_str", "collection_name", "library_name", "timestamp"], + ) + op.alter_column( + "playtime_summaries", + "collection_id", + existing_type=sa.INTEGER(), + nullable=False, + ) + op.alter_column( + "playtime_summaries", "library_id", existing_type=sa.INTEGER(), nullable=False + ) + op.drop_column("playtime_summaries", "isbn") + op.drop_column("playtime_summaries", "title") + + op.drop_constraint( + "playtime_entries_identifier_id_fkey", "playtime_entries", type_="foreignkey" + ) + op.drop_constraint( + "playtime_entries_collection_id_fkey", "playtime_entries", type_="foreignkey" + ) + op.drop_constraint( + "playtime_entries_library_id_fkey", "playtime_entries", type_="foreignkey" + ) + op.create_foreign_key( + "playtime_entries_library_id_fkey", + "playtime_entries", + "libraries", + ["library_id"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + op.create_foreign_key( + "playtime_entries_identifier_id_fkey", + "playtime_entries", + "identifiers", + ["identifier_id"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + op.create_foreign_key( + "playtime_entries_collection_id_fkey", + "playtime_entries", + "collections", + ["collection_id"], + ["id"], + onupdate="CASCADE", + ondelete="CASCADE", + ) + op.drop_constraint("unique_playtime_entry", "playtime_entries", type_="unique") + op.create_unique_constraint( + "playtime_entries_identifier_id_collection_id_library_id_tra_key", + "playtime_entries", + ["identifier_id", "collection_id", "library_id", "tracking_id"], + ) + op.alter_column( + "playtime_entries", "library_id", existing_type=sa.INTEGER(), nullable=False + ) + op.alter_column( + "playtime_entries", "collection_id", existing_type=sa.INTEGER(), nullable=False + ) + op.alter_column( + "playtime_entries", "identifier_id", existing_type=sa.INTEGER(), nullable=False + ) + op.drop_column("playtime_entries", "library_name") + op.drop_column("playtime_entries", "collection_name") + op.drop_column("playtime_entries", "identifier_str") + + +def update_summary_isbn_and_title(session: Session) -> None: + """Update existing playtime summary records in the database.""" + conn = session.connection() + rows = conn.execute("SELECT id, identifier_id FROM playtime_summaries").all() + + for row in rows: + identifier = get_one(session, Identifier, id=row.identifier_id) + isbn = cached_isbn_lookup(identifier) + title = cached_title_lookup(identifier) + conn.execute( + """ + UPDATE playtime_summaries + SET isbn = %(isbn)s, title = %(title)s + WHERE id = %(id)s + """, + {"id": row.id, "isbn": isbn, "title": title}, + ) + + +@cache +def cached_isbn_lookup(identifier: Identifier) -> str | None: + """Given an identifier, return its ISBN.""" + return _isbn_for_identifier(identifier) + + +@cache +def cached_title_lookup(identifier: Identifier) -> str | None: + """Given an identifier, return its title.""" + return _title_for_identifier(identifier) + + +def update_playtime_entries(conn: Connection) -> None: + """Update existing playtime entries in the database.""" + rows = conn.execute( + "SELECT id, identifier_id, collection_id, library_id FROM playtime_entries" + ).all() + + for row in rows: + conn.execute( + """ + UPDATE playtime_entries + SET identifier_str = %(urn)s, collection_name = %(collection_name)s, library_name = %(library_name)s + WHERE id = %(id)s + """, + { + "id": row.id, + "urn": get_identifier_urn(conn, row.identifier_id), + "collection_name": get_collection_name(conn, row.collection_id), + "library_name": get_library_name(conn, row.library_id), + }, + ) + + +@cache +def get_collection_name(conn: Connection, collection_id: int) -> str: + """Given the id of a collection, return its name.""" + return conn.execute( + """ + SELECT ic.name + FROM collections c + JOIN integration_configurations ic on c.integration_configuration_id = ic.id + WHERE c.id = %s + """, + (collection_id,), + ).scalar_one() + + +@cache +def get_identifier_urn(conn: Connection, identifier_id: int) -> str: + """Given the id of an identifier id, return its urn.""" + row = conn.execute( + """ + SELECT type, identifier + FROM identifiers + WHERE id = %s + """, + (identifier_id,), + ).one() + return Identifier._urn_from_type_and_value(row.type, row.identifier) + + +@cache +def get_library_name(conn: Connection, library_id: int) -> str: + """Given the id of a library, return its name.""" + return conn.execute( + "SELECT name FROM libraries WHERE id = %s", (library_id,) + ).scalar_one() diff --git a/core/jobs/playtime_entries.py b/core/jobs/playtime_entries.py index b13510b340..b28df81cf4 100644 --- a/core/jobs/playtime_entries.py +++ b/core/jobs/playtime_entries.py @@ -4,19 +4,17 @@ import csv import os from collections import defaultdict +from collections.abc import Iterable from datetime import datetime, timedelta from tempfile import TemporaryFile -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, Any, Protocol import dateutil.parser import pytz -from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import false, true from sqlalchemy.sql.functions import sum from core.config import Configuration -from core.model import get_one -from core.model.edition import Edition -from core.model.identifier import Identifier, RecursiveEquivalencyCache from core.model.time_tracking import PlaytimeEntry, PlaytimeSummary from core.util.datetime_helpers import previous_months, utc_now from scripts import Script @@ -25,6 +23,14 @@ from sqlalchemy.orm import Query +# TODO: Replace uses once we have a proper CSV writer type or protocol. +class Writer(Protocol): + """CSV Writer protocol.""" + + def writerow(self, row: Iterable[Any]) -> Any: + ... + + class PlaytimeEntriesSummationScript(Script): def do_run(self): # Reap older processed entries @@ -35,38 +41,69 @@ def do_run(self): deleted = ( self._db.query(PlaytimeEntry) .filter( - PlaytimeEntry.processed == True, PlaytimeEntry.timestamp < older_than_ts + PlaytimeEntry.processed == true(), + PlaytimeEntry.timestamp < older_than_ts, ) .delete() ) self.log.info(f"Deleted {deleted} entries. Older than {older_than_ts}") # Collect everything from one hour ago, reducing entries still in flux - cuttoff = utc_now() - timedelta(hours=1) + cut_off = utc_now() - timedelta(hours=1) # Fetch the unprocessed entries result = self._db.query(PlaytimeEntry).filter( - PlaytimeEntry.processed == False, - PlaytimeEntry.timestamp <= cuttoff, + PlaytimeEntry.processed == false(), + PlaytimeEntry.timestamp <= cut_off, ) - by_identifier = defaultdict(int) - # Aggregate entries per identifier-timestamp-collection-library tuple + # Aggregate entries per identifier-timestamp-collection-library grouping. + # The label forms of the identifier, collection, and library are also + # factored in, in case any of the foreign keys are missing. # Since timestamps should be on minute-boundaries the aggregation # can be written to PlaytimeSummary directly + def group_key_for_entry(e: PlaytimeEntry) -> tuple: + return ( + e.timestamp, + e.identifier, + e.collection, + e.library, + e.identifier_str, + e.collection_name, + e.library_name, + ) + + by_group = defaultdict(int) for entry in result.all(): - by_identifier[ - (entry.identifier, entry.collection, entry.library, entry.timestamp) - ] += entry.total_seconds_played + by_group[group_key_for_entry(entry)] += entry.total_seconds_played entry.processed = True - for id_ts, seconds in by_identifier.items(): - identifier, collection, library, timestamp = id_ts + for group, seconds in by_group.items(): + # Values are in the same order returned from `group_key_for_entry` above. + ( + timestamp, + identifier, + collection, + library, + identifier_str, + collection_name, + library_name, + ) = group + + # Update the playtime summary. playtime = PlaytimeSummary.add( - identifier, collection, library, timestamp, seconds + self._db, + ts=timestamp, + seconds=seconds, + identifier=identifier, + collection=collection, + library=library, + identifier_str=identifier_str, + collection_name=collection_name, + library_name=library_name, ) self.log.info( - f"Added {seconds} to {identifier.urn} ({collection.name} in {library.name}) for {timestamp}: new total {playtime.total_seconds_played}." + f"Added {seconds} to {identifier_str} ({collection_name} in {library_name}) for {timestamp}: new total {playtime.total_seconds_played}." ) self._db.commit() @@ -154,52 +191,12 @@ def do_run(self): ) as temp: # Write the data as a CSV writer = csv.writer(temp) - writer.writerow( - [ - "date", - "urn", - "isbn", - "collection", - "library", - "title", - "total seconds", - ] + _produce_report( + writer, + date_label=report_date_label, + records=self._fetch_report_records(start=start, until=until), ) - for ( - urn, - collection_name, - library_name, - identifier_id, - total, - ) in self._fetch_report_records(start=start, until=until): - edition: Edition | None = None - identifier: Identifier | None = None - if identifier_id: - edition = get_one( - self._db, Edition, primary_identifier_id=identifier_id - ) - # Use the identifier from the edition where available. - # Otherwise, we'll have to look it up. - identifier = ( - edition.primary_identifier - if edition - else get_one(self._db, Identifier, id=identifier_id) - ) - isbn = self._isbn_for_identifier(identifier) - title = edition and edition.title - row = ( - report_date_label, - urn, - isbn, - collection_name, - library_name, - title, - total, - ) - # Write the row to the CSV - writer.writerow(row) - # Rewind the file and send the report email temp.seek(0) recipient = os.environ.get( @@ -223,7 +220,8 @@ def _fetch_report_records(self, start: datetime, until: datetime) -> Query: PlaytimeSummary.identifier_str, PlaytimeSummary.collection_name, PlaytimeSummary.library_name, - PlaytimeSummary.identifier_id, + PlaytimeSummary.isbn, + PlaytimeSummary.title, sum(PlaytimeSummary.total_seconds_played), ) .filter( @@ -235,41 +233,47 @@ def _fetch_report_records(self, start: datetime, until: datetime) -> Query: PlaytimeSummary.collection_name, PlaytimeSummary.library_name, PlaytimeSummary.identifier_id, + PlaytimeSummary.isbn, + PlaytimeSummary.title, + ) + .order_by( + PlaytimeSummary.collection_name, + PlaytimeSummary.library_name, + PlaytimeSummary.identifier_str, ) ) - @staticmethod - def _isbn_for_identifier( - identifier: Identifier | None, - /, - *, - default_value: str = "", - ) -> str: - """Find the strongest ISBN match for the given identifier. - - :param identifier: The identifier to match. - :param default_value: The default value to return if the identifier is missing or a match is not found. - """ - if identifier is None: - return default_value - - if identifier.type == Identifier.ISBN: - return cast(str, identifier.identifier) - # If our identifier is not an ISBN itself, we'll use our Recursive Equivalency - # mechanism to find the next best one that is, if available. - db = Session.object_session(identifier) - eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter( - RecursiveEquivalencyCache.parent_identifier_id == identifier.id +def _produce_report(writer: Writer, date_label, records=None) -> None: + if not records: + records = [] + writer.writerow( + ( + "date", + "urn", + "isbn", + "collection", + "library", + "title", + "total seconds", ) - equivalent_identifiers = ( - db.query(Identifier) - .filter(Identifier.id.in_(eq_subquery)) - .filter(Identifier.type == Identifier.ISBN) - ) - - isbn = next( - map(lambda id_: id_.identifier, equivalent_identifiers), - None, + ) + for ( + identifier_str, + collection_name, + library_name, + isbn, + title, + total, + ) in records: + row = ( + date_label, + identifier_str, + isbn, + collection_name, + library_name, + title, + total, ) - return isbn or default_value + # Write the row to the CSV + writer.writerow(row) diff --git a/core/model/identifier.py b/core/model/identifier.py index 482d4a3698..d68858152d 100644 --- a/core/model/identifier.py +++ b/core/model/identifier.py @@ -399,18 +399,20 @@ def valid_as_foreign_identifier(cls, type, id): @property def urn(self) -> str: - identifier_text = quote(self.identifier or "") - if self.type == Identifier.ISBN: - return self.ISBN_URN_SCHEME_PREFIX + identifier_text - elif self.type == Identifier.URI: - return self.identifier or "" - elif self.type == Identifier.GUTENBERG_ID: - return self.GUTENBERG_URN_SCHEME_PREFIX + identifier_text + return self._urn_from_type_and_value(self.type, self.identifier) + + @classmethod + def _urn_from_type_and_value(cls, id_type: str | None, id_value: str | None) -> str: + identifier_text = quote(id_value or "") + if id_type == Identifier.ISBN: + return cls.ISBN_URN_SCHEME_PREFIX + identifier_text + elif id_type == Identifier.URI: + return id_value or "" + elif id_type == Identifier.GUTENBERG_ID: + return cls.GUTENBERG_URN_SCHEME_PREFIX + identifier_text else: - identifier_type = quote(self.type or "") - return self.URN_SCHEME_PREFIX + "{}/{}".format( - identifier_type, identifier_text - ) + identifier_type = quote(id_type or "") + return f"{cls.URN_SCHEME_PREFIX}{identifier_type}/{identifier_text}" @property def work(self): @@ -546,6 +548,7 @@ def _parse_urn( identifier_string: str, identifier_type: str, must_support_license_pools: bool = False, + autocreate: bool = True, ) -> tuple[Identifier, bool]: """Parse identifier string. @@ -554,6 +557,7 @@ def _parse_urn( :param identifier_type: Identifier's type :param must_support_license_pools: Boolean value indicating whether there should be a DataSource that provides licenses for books identified by the given identifier + :param autocreate: Boolean value indicating whether an identifier should be created if it's not found. :return: 2-tuple containing Identifier object and a boolean value indicating whether it's new """ if must_support_license_pools: @@ -565,7 +569,9 @@ def _parse_urn( # This is fine. pass - return cls.for_foreign_id(_db, identifier_type, identifier_string) + return cls.for_foreign_id( + _db, identifier_type, identifier_string, autocreate=autocreate + ) @classmethod @overload @@ -574,6 +580,7 @@ def parse_urn( _db: Session, identifier_string: str, must_support_license_pools: bool = False, + autocreate=True, ) -> tuple[Identifier, bool]: ... @@ -584,6 +591,7 @@ def parse_urn( _db: Session, identifier_string: str | None, must_support_license_pools: bool = False, + autocreate=True, ) -> tuple[Identifier | None, bool | None]: ... @@ -593,6 +601,7 @@ def parse_urn( _db: Session, identifier_string: str | None, must_support_license_pools: bool = False, + autocreate=True, ) -> tuple[Identifier | None, bool | None]: """Parse identifier string. @@ -600,6 +609,7 @@ def parse_urn( :param identifier_string: String containing an identifier :param must_support_license_pools: Boolean value indicating whether there should be a DataSource that provides licenses for books identified by the given identifier + :param autocreate: Boolean value indicating whether an identifier should be created if it's not found. :return: 2-tuple containing Identifier object and a boolean value indicating whether it's new """ # I added this in here in my refactoring because there is a test @@ -615,7 +625,11 @@ def parse_urn( ) return cls._parse_urn( - _db, identifier_string, identifier_type, must_support_license_pools + _db, + identifier_string, + identifier_type, + must_support_license_pools, + autocreate=autocreate, ) @classmethod diff --git a/core/model/time_tracking.py b/core/model/time_tracking.py index 0ad377c2c2..0ab7c0cc13 100644 --- a/core/model/time_tracking.py +++ b/core/model/time_tracking.py @@ -1,7 +1,7 @@ from __future__ import annotations import datetime -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast from sqlalchemy import ( Boolean, @@ -15,12 +15,19 @@ ) from sqlalchemy.orm import Session, relationship -from core.model import Base, get_one_or_create +from core.model import ( + Base, + Edition, + Identifier, + RecursiveEquivalencyCache, + get_one, + get_one_or_create, +) +from core.util.datetime_helpers import minute_timestamp if TYPE_CHECKING: from sqlalchemy.orm import Mapped - from core.model import Identifier from core.model.collection import Collection from core.model.library import Library @@ -30,21 +37,27 @@ class PlaytimeEntry(Base): id = Column(Integer, autoincrement=True, primary_key=True) + # Even if related objects are deleted, we keep our row. identifier_id = Column( Integer, - ForeignKey("identifiers.id", onupdate="CASCADE", ondelete="CASCADE"), - nullable=False, + ForeignKey("identifiers.id", onupdate="CASCADE", ondelete="SET NULL"), + nullable=True, ) collection_id = Column( Integer, - ForeignKey("collections.id", onupdate="CASCADE", ondelete="CASCADE"), - nullable=False, + ForeignKey("collections.id", onupdate="CASCADE", ondelete="SET NULL"), + nullable=True, ) library_id = Column( Integer, - ForeignKey("libraries.id", onupdate="CASCADE", ondelete="CASCADE"), - nullable=False, + ForeignKey("libraries.id", onupdate="CASCADE", ondelete="SET NULL"), + nullable=True, ) + # Related objects can be deleted, so we keep string representation. + identifier_str = Column(String, nullable=False) + collection_name = Column(String, nullable=False) + library_name = Column(String, nullable=False) + timestamp: Mapped[datetime.datetime] = Column( DateTime(timezone=True), nullable=False ) @@ -63,7 +76,13 @@ class PlaytimeEntry(Base): library: Mapped[Library] = relationship("Library", uselist=False) __table_args__ = ( - UniqueConstraint("identifier_id", "collection_id", "library_id", "tracking_id"), + UniqueConstraint( + "tracking_id", + "identifier_str", + "collection_name", + "library_name", + name="unique_playtime_entry", + ), ) @@ -72,6 +91,7 @@ class PlaytimeSummary(Base): id = Column(Integer, autoincrement=True, primary_key=True) + # Even if related objects are deleted, we keep our row. identifier_id = Column( Integer, ForeignKey("identifiers.id", onupdate="CASCADE", ondelete="SET NULL"), @@ -81,18 +101,16 @@ class PlaytimeSummary(Base): collection_id = Column( Integer, ForeignKey("collections.id", onupdate="CASCADE", ondelete="SET NULL"), - nullable=False, + nullable=True, index=True, ) library_id = Column( Integer, ForeignKey("libraries.id", onupdate="CASCADE", ondelete="SET NULL"), - nullable=False, + nullable=True, index=True, ) - # In case an identifier is deleted, we should not delete "analytics" on it - # so we store the identifier "string" as well. This should be an identifier.urn string - # The same logic applies to collection, and library + # Related objects can be deleted, so we keep string representation. identifier_str = Column(String, nullable=False) collection_name = Column(String, nullable=False) library_name = Column(String, nullable=False) @@ -109,48 +127,138 @@ class PlaytimeSummary(Base): total_seconds_played = Column(Integer, default=0) + title = Column(String) + isbn = Column(String) + identifier: Mapped[Identifier] = relationship("Identifier", uselist=False) collection: Mapped[Collection] = relationship("Collection", uselist=False) library: Mapped[Library] = relationship("Library", uselist=False) __table_args__ = ( UniqueConstraint( - "identifier_str", "collection_name", "library_name", "timestamp" + "timestamp", + "identifier_str", + "collection_name", + "library_name", + name="unique_playtime_summary", ), ) @classmethod def add( cls, - identifier: Identifier, - collection: Collection, - library: Library, + _db: Session, ts: datetime.datetime, seconds: int, + identifier: Identifier | None, + collection: Collection | None, + library: Library | None, + identifier_str: str, + collection_name: str, + library_name: str | None, ) -> PlaytimeSummary: - """Add playtime in seconds to a summary record for a minute-timestamp""" - _db = Session.object_session(identifier) - # Sanitize the timestamp to a minute boundary - timestamp = datetime.datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute) + """Add playtime (in seconds) to it's associated minute-level summary record.""" + # Update each label with its current value, if its foreign key is present. + # Because a collection is sometimes renamed when marked for deletion, we + # won't update the name in that case. + if identifier: + identifier_str = identifier.urn + if collection and collection.name and not collection.marked_for_deletion: + collection_name = collection.name + if library and library.name: + library_name = library.name + + # When the related identifier, collection, and/or library rows are available, + # we'll use those to look up or create the summary row. If not, we'll use their + # string labels to do so. The minute-level timestamp is always part of the key. + _potential_lookup_keys = { + "timestamp": minute_timestamp(ts), + "identifier_id": identifier.id if identifier else None, + "identifier_str": None if identifier else identifier_str, + "collection_id": collection.id if collection else None, + "collection_name": None if collection else collection_name, + "library_id": library.id if library else None, + "library_name": None if library else library_name, + } + lookup_keys = {k: v for k, v in _potential_lookup_keys.items() if v is not None} + additional_columns = { + k: v + for k, v in { + "identifier_str": identifier_str, + "collection_name": collection_name, + "library_name": library_name, + }.items() + if k not in lookup_keys + } # Ensure the row exists playtime, _ = get_one_or_create( _db, cls, - timestamp=timestamp, - identifier_id=identifier.id, - collection_id=collection.id, - library_id=library.id, - create_method_kwargs={ - "identifier_str": identifier.urn, - "collection_name": collection.name, - "library_name": library.name, - }, + create_method_kwargs=additional_columns, + **lookup_keys, ) + # Set the label values, in case they weren't used to create the summary row. + playtime.identifier_str = identifier_str + playtime.collection_name = collection_name + playtime.library_name = library_name + + # Set ISBN and title, if needed and possible. + if (not playtime.isbn or not playtime.title) and not identifier: + identifier, _ = Identifier.parse_urn(_db, identifier_str, autocreate=False) + if not playtime.isbn and identifier: + playtime.isbn = _isbn_for_identifier(identifier) + if not playtime.title and identifier: + playtime.title = _title_for_identifier(identifier) + # Race condition safe update _db.query(cls).filter(cls.id == playtime.id).update( {"total_seconds_played": cls.total_seconds_played + seconds} ) _db.refresh(playtime) return playtime + + +def _title_for_identifier(identifier: Identifier | None) -> str | None: + """Find the strongest title match for the given identifier. + + :param identifier: The identifier to match. + :return: The title string associated with the identifier or None, if no match is found. + """ + if identifier is None: + return None + db = Session.object_session(identifier) + if edition := get_one(db, Edition, primary_identifier_id=identifier.id): + return edition.title + return None + + +def _isbn_for_identifier(identifier: Identifier | None) -> str | None: + """Find the strongest ISBN match for the given identifier. + + :param identifier: The identifier to match. + :return: The ISBN string associated with the identifier or None, if no match is found. + """ + if identifier is None: + return None + + if identifier.type == Identifier.ISBN: + return cast(str, identifier.identifier) + + # If our identifier is not an ISBN itself, we'll use our Recursive Equivalency + # mechanism to find the next best one that is, if available. + db = Session.object_session(identifier) + eq_subquery = db.query(RecursiveEquivalencyCache.identifier_id).filter( + RecursiveEquivalencyCache.parent_identifier_id == identifier.id + ) + equivalent_identifiers = ( + db.query(Identifier) + .filter(Identifier.id.in_(eq_subquery)) + .filter(Identifier.type == Identifier.ISBN) + ) + + return next( + map(lambda id_: id_.identifier, equivalent_identifiers), + None, + ) diff --git a/core/query/playtime_entries.py b/core/query/playtime_entries.py index 6b9f79c3e1..05bd82afce 100644 --- a/core/query/playtime_entries.py +++ b/core/query/playtime_entries.py @@ -51,6 +51,9 @@ def insert_playtime_entries( identifier_id=identifier.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, timestamp=entry.during_minute, total_seconds_played=entry.seconds_played, ) diff --git a/core/util/datetime_helpers.py b/core/util/datetime_helpers.py index 9f3bd8e6da..9212bc7333 100644 --- a/core/util/datetime_helpers.py +++ b/core/util/datetime_helpers.py @@ -93,3 +93,12 @@ def previous_months(number_of_months: int) -> tuple[datetime.date, datetime.date start = start.replace(day=1) until = now.replace(day=1) return start.date(), until.date() + + +def minute_timestamp(dt: datetime.datetime) -> datetime.datetime: + """Minute resolution timestamp by truncating the seconds from a datetime object. + + :param dt: datetime object with seconds resolution + :return: datetime object with minute resolution + """ + return datetime.datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute) diff --git a/tests/api/controller/test_playtime_entries.py b/tests/api/controller/test_playtime_entries.py index 36ab25ae8e..abf94425cc 100644 --- a/tests/api/controller/test_playtime_entries.py +++ b/tests/api/controller/test_playtime_entries.py @@ -103,6 +103,8 @@ def test_track_playtime_duplicate_id_ok( db = circulation_fixture.db identifier = db.identifier() collection = db.default_collection() + library = db.default_library() + # Attach the identifier to the collection pool = db.licensepool( db.edition( @@ -119,7 +121,10 @@ def test_track_playtime_duplicate_id_ok( total_seconds_played=12, identifier_id=identifier.id, collection_id=collection.id, - library_id=db.default_library().id, + library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, ) ) diff --git a/tests/core/jobs/test_playtime_entries.py b/tests/core/jobs/test_playtime_entries.py index 0950a8e5d7..f58f0184be 100644 --- a/tests/core/jobs/test_playtime_entries.py +++ b/tests/core/jobs/test_playtime_entries.py @@ -1,12 +1,14 @@ from __future__ import annotations import re -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta +from typing import Literal from unittest.mock import MagicMock, call, patch import pytest import pytz from freezegun import freeze_time +from sqlalchemy.sql.expression import and_, null from api.model.time_tracking import PlaytimeTimeEntry from core.config import Configuration @@ -15,7 +17,6 @@ PlaytimeEntriesEmailReportsScript, PlaytimeEntriesSummationScript, ) -from core.model import create from core.model.collection import Collection from core.model.identifier import Equivalency, Identifier from core.model.library import Library @@ -41,6 +42,9 @@ def create_playtime_entries( library_id=library.id, collection_id=collection.id, total_seconds_played=entry.seconds_played, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, ) db.session.add(inserted) all_inserted.append(inserted) @@ -60,10 +64,24 @@ def test_summation(self, db: DatabaseTransactionFixture): P = PlaytimeTimeEntry dk = date2k identifier = db.identifier() - identifier2 = db.identifier() - collection = db.default_collection() - collection2 = db.collection() + collection = db.collection() library = db.default_library() + + c2_old_name = "Colletcion 2 with typo" + c2_new_name = "Collection 2" + id2_old_value = "original-id" + id2_new_value = "updated-id" + id2_old_urn = f"urn:isbn:{id2_old_value}" + id2_new_urn = f"urn:isbn:{id2_new_value}" + l2_old_name = "Lirbrary 2 with typo" + l2_new_name = "Library 2" + + identifier2 = db.identifier( + identifier_type=Identifier.ISBN, foreign_id=id2_old_value + ) + collection2 = db.collection(name=c2_old_name) + library2 = db.library(name=l2_old_name) + entries = create_playtime_entries( db, identifier, @@ -97,6 +115,17 @@ def test_summation(self, db: DatabaseTransactionFixture): P(id="1", during_minute=dk(m=1), seconds_played=40), ) + # Different library, should get grouped separately. + entries4 = create_playtime_entries( + db, + identifier2, + collection2, + library2, + P(id="0", during_minute=dk(m=0), seconds_played=30), + P(id="1", during_minute=dk(m=0), seconds_played=40), + P(id="2", during_minute=dk(m=0), seconds_played=30), + ) + # This entry should not be considered as it is too recent [out_of_scope_entry] = create_playtime_entries( db, @@ -116,9 +145,52 @@ def test_summation(self, db: DatabaseTransactionFixture): ) processed_entry.processed = True + # Update identifer2, collection2 and library2. + # The existing playtime entries should remain unchanged, but the resulting + # playtime summary records should reflect these changes. + identifier2.identifier = id2_new_value + collection2.integration_configuration.name = c2_new_name + library2.name = l2_new_name + + presummation_entries = db.session.query(PlaytimeEntry).count() + PlaytimeEntriesSummationScript(db.session).run() - playtimes = ( + postsummation_entries = db.session.query(PlaytimeEntry).count() + + # The old "processed_entry" has been deleted, reducing the entry count + # by 1. Counts for the associated id2, c(1) and l(1) are also reduced by 1. + assert presummation_entries == 15 + assert postsummation_entries == 14 + + # Ensure that the playtime entries have the original identifier 2 urn. + i2_entries = ( + db.session.query(PlaytimeEntry) + .where(PlaytimeEntry.identifier_id == identifier2.id) + .all() + ) + assert len(i2_entries) == 10 + assert all(e.identifier_str == id2_old_urn for e in i2_entries) + + # Ensure that the playtime entries have the original collection 2 name. + c2_entries = ( + db.session.query(PlaytimeEntry) + .where(PlaytimeEntry.collection_id == collection2.id) + .all() + ) + assert len(c2_entries) == 5 + assert all(e.collection_name == c2_old_name for e in c2_entries) + + # Ensure that the playtime entries have the original library 2 name. + l2_entries = ( + db.session.query(PlaytimeEntry) + .where(PlaytimeEntry.library_id == library2.id) + .all() + ) + assert len(l2_entries) == 3 + assert all(e.library_name == l2_old_name for e in l2_entries) + + summaries = ( db.session.query(PlaytimeSummary) .order_by( PlaytimeSummary.identifier_id, @@ -129,40 +201,64 @@ def test_summation(self, db: DatabaseTransactionFixture): .all() ) - assert len(playtimes) == 5 + assert len(summaries) == 6 - id1time, id2time1, id2time2, id2col2time, id2col2time1 = playtimes + id1time, id2time1, id2time2, id2col2time, id2col2time1, id2c2l2time = summaries assert id1time.identifier == identifier assert id1time.total_seconds_played == 120 assert id1time.collection == collection assert id1time.library == library + assert id1time.identifier_str == identifier.urn + assert id1time.collection_name == collection.name + assert id1time.library_name == library.name assert id1time.timestamp == dk() assert id2time1.identifier == identifier2 assert id2time1.total_seconds_played == 90 assert id2time1.collection == collection assert id2time1.library == library + assert id2time1.identifier_str == id2_new_urn + assert id2time1.collection_name == collection.name + assert id2time1.library_name == library.name assert id2time1.timestamp == dk() assert id2time2.identifier == identifier2 assert id2time2.collection == collection assert id2time2.library == library + assert id2time2.identifier_str == id2_new_urn + assert id2time2.collection_name == collection.name + assert id2time2.library_name == library.name assert id2time2.total_seconds_played == 30 assert id2time2.timestamp == dk(m=1) assert id2col2time.identifier == identifier2 assert id2col2time.collection == collection2 assert id2col2time.library == library + assert id2col2time.identifier_str == id2_new_urn + assert id2col2time.collection_name == c2_new_name + assert id2col2time.library_name == library.name assert id2col2time.total_seconds_played == 30 assert id2col2time.timestamp == dk() assert id2col2time1.identifier == identifier2 assert id2col2time1.collection == collection2 assert id2col2time1.library == library + assert id2col2time1.identifier_str == id2_new_urn + assert id2col2time1.collection_name == c2_new_name + assert id2col2time1.library_name == library.name assert id2col2time1.total_seconds_played == 40 assert id2col2time1.timestamp == dk(m=1) + assert id2c2l2time.identifier == identifier2 + assert id2c2l2time.collection == collection2 + assert id2c2l2time.library == library2 + assert id2c2l2time.identifier_str == id2_new_urn + assert id2c2l2time.collection_name == c2_new_name + assert id2c2l2time.library_name == l2_new_name + assert id2c2l2time.total_seconds_played == 100 + assert id2c2l2time.timestamp == dk() + def test_reap_processed_entries(self, db: DatabaseTransactionFixture): P = PlaytimeTimeEntry dk = date2k @@ -200,24 +296,203 @@ def test_reap_processed_entries(self, db: DatabaseTransactionFixture): .values(PlaytimeEntry.tracking_id) ) == [("4",), ("5",)] + def test_deleted_related_rows(self, db: DatabaseTransactionFixture): + def related_rows(table, present: Literal["all"] | Literal["none"]): + """Query for the presence of related identifier, collection, and library rows.""" + condition = ( + and_( + table.identifier_id != null(), + table.collection_id != null(), + table.library_id != null(), + ) + if present == "all" + else and_( + table.identifier_id == null(), + table.collection_id == null(), + table.library_id == null(), + ) + ) + return db.session.query(table).where(condition) + + summaries_query = db.session.query(PlaytimeSummary).order_by( + PlaytimeSummary.identifier_str, + PlaytimeSummary.collection_name, + PlaytimeSummary.library_name, + ) + + # Set up our identifiers, collections, and libraries. + id1_value = "080442957X" + id2_value = "9788175257665" + + c1_name = "Collection 1" + c2_name = "Collection 2" + + l1_name = "Library 1" + l2_name = "Library 2" + + id1 = db.identifier(identifier_type=Identifier.ISBN, foreign_id=id1_value) + id2 = db.identifier(identifier_type=Identifier.ISBN, foreign_id=id2_value) + id1_urn = id1.urn + id2_urn = id2.urn + + c1 = db.collection(name=c1_name) + c2 = db.collection(name=c2_name) + l1 = db.library(name=l1_name) + l2 = db.library(name=l2_name) + + P = PlaytimeTimeEntry + dk = date2k + + # Create some client playtime entries. + book1_round1 = create_playtime_entries( + db, + id1, + c1, + l1, + P(id="0", during_minute=dk(m=0), seconds_played=30), + P(id="1", during_minute=dk(m=0), seconds_played=30), + ) + book2_round1 = create_playtime_entries( + db, + id2, + c2, + l2, + P(id="2", during_minute=dk(m=0), seconds_played=12), + P(id="3", during_minute=dk(m=0), seconds_played=17), + ) + + # We should have four entries, all with keys to their associated records. + assert db.session.query(PlaytimeEntry).count() == 4 + assert related_rows(PlaytimeEntry, present="all").count() == 4 + assert related_rows(PlaytimeEntry, present="none").count() == 0 -def date1m(days): + # We should have no summary records, at this point. + assert db.session.query(PlaytimeSummary).count() == 0 + + # Summarize those records. + PlaytimeEntriesSummationScript(db.session).run() + + # Now we should have two summary records. + assert db.session.query(PlaytimeSummary).count() == 2 + # And they should have associated identifier, collection, and library records. + assert related_rows(PlaytimeSummary, present="all").count() == 2 + + # And those should be the correct identifier, collection, and library records. + b1sum1, b2sum1 = summaries_query.all() + + assert b1sum1.total_seconds_played == 60 + assert b1sum1.identifier_str == id1_urn + assert b1sum1.collection_name == c1_name + assert b1sum1.library_name == l1_name + assert b1sum1.identifier_id == id1.id + assert b1sum1.collection_id == c1.id + assert b1sum1.library_id == l1.id + + assert b2sum1.total_seconds_played == 29 + assert b2sum1.identifier_str == id2_urn + assert b2sum1.collection_name == c2_name + assert b2sum1.library_name == l2_name + assert b2sum1.identifier_id == id2.id + assert b2sum1.collection_id == c2.id + assert b2sum1.library_id == l2.id + + # Add some new client playtime entries. + book1_round2 = create_playtime_entries( + db, + id1, + c1, + l1, + P(id="4", during_minute=dk(m=0), seconds_played=30), + P(id="5", during_minute=dk(m=0), seconds_played=30), + ) + book2_round2 = create_playtime_entries( + db, + id2, + c2, + l2, + P(id="6", during_minute=dk(m=0), seconds_played=22), + P(id="7", during_minute=dk(m=0), seconds_played=46), + ) + + # Now we should have more entries. + assert db.session.query(PlaytimeEntry).count() == 8 + assert related_rows(PlaytimeEntry, present="all").count() == 8 + assert related_rows(PlaytimeEntry, present="none").count() == 0 + + # Remove our identifiers, collections, and libraries. + for obj in (id1, id2, c1, c2, l1, l2): + db.session.delete(obj) + db.session.flush() + + # Verify that the entry records still exist, but that none have related values. + assert db.session.query(PlaytimeEntry).count() == 8 + assert related_rows(PlaytimeEntry, present="all").count() == 0 + assert related_rows(PlaytimeEntry, present="none").count() == 8 + + # Verify that the existing summary records have not been deleted. + assert db.session.query(PlaytimeSummary).count() == 2 + # None of them should have all associated records. + assert related_rows(PlaytimeSummary, present="all").count() == 0 + # And all of them should have none of their associated records. + assert related_rows(PlaytimeSummary, present="none").count() == 2 + + # Run the summarization script again. + PlaytimeEntriesSummationScript(db.session).run() + + # We should have the same summary records, none of which have links. + assert db.session.query(PlaytimeSummary).count() == 2 + assert related_rows(PlaytimeSummary, present="all").count() == 0 + assert related_rows(PlaytimeSummary, present="none").count() == 2 + + # Verify the times and other details. + b1sum1, b2sum1 = summaries_query.all() + + assert b1sum1.total_seconds_played == 120 + assert b1sum1.identifier_str == id1_urn + assert b1sum1.collection_name == c1_name + assert b1sum1.library_name == l1_name + assert b1sum1.identifier_id is None + assert b1sum1.collection_id is None + assert b1sum1.library_id is None + + assert b2sum1.total_seconds_played == 97 + assert b2sum1.identifier_str == id2_urn + assert b2sum1.collection_name == c2_name + assert b2sum1.library_name == l2_name + assert b2sum1.identifier_id is None + assert b2sum1.collection_id is None + assert b2sum1.library_id is None + + +def date1m(days) -> date: + """Helper to get a `date` value for 1 month ago, adjusted by the given number of days.""" return previous_months(number_of_months=1)[0] + timedelta(days=days) -def playtime(session, identifier, collection, library, timestamp, total_seconds): - return create( +def dt1m(days) -> datetime: + """Helper to get a `datetime` value for 1 month ago, adjusted by the given number of days.""" + return datetime.combine(date1m(days), datetime.min.time(), tzinfo=pytz.UTC) + + +def playtime( + session, + identifier: Identifier, + collection: Collection, + library: Library, + timestamp: datetime, + total_seconds: int, +): + return PlaytimeSummary.add( session, - PlaytimeSummary, + ts=timestamp, + seconds=total_seconds, identifier=identifier, collection=collection, library=library, - timestamp=timestamp, - total_seconds_played=total_seconds, identifier_str=identifier.urn, collection_name=collection.name, library_name=library.name, - )[0] + ) class TestPlaytimeEntriesEmailReportsScript: @@ -251,28 +526,28 @@ def test_do_run( ), ] strongest_isbn = isbn_ids["i2"].identifier - no_isbn = "" + no_isbn = None # We're using the RecursiveEquivalencyCache, so must refresh it. EquivalentIdentifiersCoverageProvider(db.session).run() - playtime(db.session, identifier, collection, library, date1m(3), 1) - playtime(db.session, identifier, collection, library, date1m(31), 2) + playtime(db.session, identifier, collection, library, dt1m(3), 1) + playtime(db.session, identifier, collection, library, dt1m(31), 2) playtime( - db.session, identifier, collection, library, date1m(-31), 60 + db.session, identifier, collection, library, dt1m(-31), 60 ) # out of range: prior to the beginning of the default reporting period playtime( - db.session, identifier, collection, library, date1m(95), 60 + db.session, identifier, collection, library, dt1m(95), 60 ) # out of range: future - playtime(db.session, identifier2, collection, library, date1m(3), 5) - playtime(db.session, identifier2, collection, library, date1m(4), 6) + playtime(db.session, identifier2, collection, library, dt1m(3), 5) + playtime(db.session, identifier2, collection, library, dt1m(4), 6) # Collection2 - playtime(db.session, identifier, collection2, library, date1m(3), 100) + playtime(db.session, identifier, collection2, library, dt1m(3), 100) # library2 - playtime(db.session, identifier, collection, library2, date1m(3), 200) + playtime(db.session, identifier, collection, library2, dt1m(3), 200) # collection2 library2 - playtime(db.session, identifier, collection2, library2, date1m(3), 300) + playtime(db.session, identifier, collection2, library2, dt1m(3), 300) reporting_name = "test cm" with ( @@ -299,7 +574,7 @@ def test_do_run( call_args = writer().writerow.call_args_list assert call_args == [ call( - [ + ( # Header "date", "urn", "isbn", @@ -307,8 +582,8 @@ def test_do_run( "library", "title", "total seconds", - ] - ), # Header + ) + ), call( ( column1, @@ -382,7 +657,7 @@ def test_no_reporting_email(self, db: DatabaseTransactionFixture): identifier = db.identifier() collection = db.default_collection() library = db.default_library() - _ = playtime(db.session, identifier, collection, library, date1m(20), 1) + _ = playtime(db.session, identifier, collection, library, dt1m(20), 1) with patch("core.jobs.playtime_entries.os.environ", new={}): script = PlaytimeEntriesEmailReportsScript(db.session) @@ -393,85 +668,6 @@ def test_no_reporting_email(self, db: DatabaseTransactionFixture): assert script._log.warning.call_count == 1 assert "date,urn,isbn,collection," in script._log.warning.call_args[0][0] - @pytest.mark.parametrize( - "id_key, equivalents, default_value, expected_isbn", - [ - # If the identifier is an ISBN, we will not use an equivalency. - [ - "i1", - (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), - "", - "080442957X", - ], - [ - "i2", - (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), - "", - "9788175257665", - ], - ["i1", (("i1", "i2", 200),), "", "080442957X"], - ["i2", (("i2", "i1", 200),), "", "9788175257665"], - # If identifier is not an ISBN, but has an equivalency that is, use the strongest match. - [ - "g2", - (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), - "", - "080442957X", - ], - [ - "g2", - (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), - "", - "9788175257665", - ], - # If we don't find an equivalent ISBN identifier, then we'll use the default. - ["g2", (), "default value", "default value"], - ["g1", (("g1", "g2", 1),), "default value", "default value"], - # If identifier is None, expect default value. - [None, (), "default value", "default value"], - ], - ) - def test__isbn_for_identifier( - self, - db: DatabaseTransactionFixture, - id_key: str | None, - equivalents: tuple[tuple[str, str, int | float]], - default_value: str, - expected_isbn: str, - ): - ids: dict[str, Identifier] = { - "i1": db.identifier( - identifier_type=Identifier.ISBN, foreign_id="080442957X" - ), - "i2": db.identifier( - identifier_type=Identifier.ISBN, foreign_id="9788175257665" - ), - "g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID), - "g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID), - } - equivalencies = [ - Equivalency( - input_id=ids[equivalent[0]].id, - output_id=ids[equivalent[1]].id, - strength=equivalent[2], - ) - for equivalent in equivalents - ] - test_identifier: Identifier | None = ids[id_key] if id_key is not None else None - if test_identifier is not None: - test_identifier.equivalencies = equivalencies - - # We're using the RecursiveEquivalencyCache, so must refresh it. - EquivalentIdentifiersCoverageProvider(db.session).run() - - # Act - result = PlaytimeEntriesEmailReportsScript._isbn_for_identifier( - test_identifier, - default_value=default_value, - ) - # Assert - assert result == expected_isbn - @pytest.mark.parametrize( "current_utc_time, start_arg, expected_start, until_arg, expected_until", [ diff --git a/tests/core/models/test_time_tracking.py b/tests/core/models/test_time_tracking.py index f14710820f..eaf1a4ee0b 100644 --- a/tests/core/models/test_time_tracking.py +++ b/tests/core/models/test_time_tracking.py @@ -3,8 +3,13 @@ import pytest from sqlalchemy.exc import IntegrityError -from core.model import create -from core.model.time_tracking import PlaytimeEntry, PlaytimeSummary +from core.equivalents_coverage import EquivalentIdentifiersCoverageProvider +from core.model import Equivalency, Identifier, create +from core.model.time_tracking import ( + PlaytimeEntry, + PlaytimeSummary, + _isbn_for_identifier, +) from core.util.datetime_helpers import utc_now from tests.fixtures.database import DatabaseTransactionFixture @@ -21,6 +26,9 @@ def test_create(self, db: DatabaseTransactionFixture): identifier_id=identifier.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, timestamp=now, total_seconds_played=30, tracking_id="tracking-id", @@ -29,6 +37,9 @@ def test_create(self, db: DatabaseTransactionFixture): assert entry.identifier == identifier assert entry.collection == collection assert entry.library == library + assert entry.identifier_str == identifier.urn + assert entry.collection_name == collection.name + assert entry.library_name == library.name assert entry.total_seconds_played == 30 assert entry.timestamp == now assert entry.tracking_id == "tracking-id" @@ -47,6 +58,9 @@ def test_constraints(self, db: DatabaseTransactionFixture): identifier_id=identifier.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, timestamp=now, total_seconds_played=61, tracking_id="tracking-id", @@ -66,6 +80,9 @@ def test_constraints(self, db: DatabaseTransactionFixture): identifier_id=identifier.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, timestamp=now, total_seconds_played=60, tracking_id="tracking-id-0", @@ -77,6 +94,9 @@ def test_constraints(self, db: DatabaseTransactionFixture): identifier_id=identifier_2.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier_2.urn, + collection_name=collection.name, + library_name=library.name, timestamp=now, total_seconds_played=60, tracking_id="tracking-id-0", @@ -88,6 +108,9 @@ def test_constraints(self, db: DatabaseTransactionFixture): identifier_id=identifier.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, timestamp=now, total_seconds_played=60, tracking_id="tracking-id-1", @@ -100,12 +123,15 @@ def test_constraints(self, db: DatabaseTransactionFixture): identifier_id=identifier.id, collection_id=collection.id, library_id=library.id, + identifier_str=identifier.urn, + collection_name=collection.name, + library_name=library.name, timestamp=now, total_seconds_played=60, tracking_id="tracking-id-0", ) assert ( - f"Key (identifier_id, collection_id, library_id, tracking_id)=({identifier.id}, {collection.id}, {library.id}, tracking-id-0) already exists" + f"Key (tracking_id, identifier_str, collection_name, library_name)=(tracking-id-0, {identifier.urn}, {collection.name}, {library.name}) already exists" in raised.exconly() ) @@ -144,7 +170,7 @@ def test_contraints(self, db: DatabaseTransactionFixture): total_seconds_played=600, ) assert ( - f'Key (identifier_str, collection_name, library_name, "timestamp")=({identifier.urn}, {collection.name}, {library.name}, 2000-01-01 12:00:00+00) already exists' + f'Key ("timestamp", identifier_str, collection_name, library_name)=(2000-01-01 12:00:00+00, {identifier.urn}, {collection.name}, {library.name}) already exists' in raised.exconly() ) @@ -191,5 +217,78 @@ def test_cascades(self, db: DatabaseTransactionFixture): db.session.delete(identifier) db.session.refresh(entry) - assert entry.identifier == None assert entry.identifier_str == urn + assert entry.identifier is None + + +class TestHelpers: + @pytest.mark.parametrize( + "id_key, equivalents, expected_isbn", + [ + # If the identifier is an ISBN, we will not use an equivalency. + [ + "i1", + (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), + "080442957X", + ], + [ + "i2", + (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), + "9788175257665", + ], + ["i1", (("i1", "i2", 200),), "080442957X"], + ["i2", (("i2", "i1", 200),), "9788175257665"], + # If identifier is not an ISBN, but has an equivalency that is, use the strongest match. + [ + "g2", + (("g1", "g2", 1), ("g2", "i1", 1), ("g1", "i2", 0.5)), + "080442957X", + ], + [ + "g2", + (("g1", "g2", 1), ("g2", "i1", 0.5), ("g1", "i2", 1)), + "9788175257665", + ], + # If we don't find an equivalent ISBN identifier, then we should get None. + ["g2", (), None], + ["g1", (("g1", "g2", 1),), None], + # If identifier is None, expect default value. + [None, (), None], + ], + ) + def test__isbn_for_identifier( + self, + db: DatabaseTransactionFixture, + id_key: str | None, + equivalents: tuple[tuple[str, str, int | float]], + expected_isbn: str, + ): + ids: dict[str, Identifier] = { + "i1": db.identifier( + identifier_type=Identifier.ISBN, foreign_id="080442957X" + ), + "i2": db.identifier( + identifier_type=Identifier.ISBN, foreign_id="9788175257665" + ), + "g1": db.identifier(identifier_type=Identifier.GUTENBERG_ID), + "g2": db.identifier(identifier_type=Identifier.GUTENBERG_ID), + } + equivalencies = [ + Equivalency( + input_id=ids[equivalent[0]].id, + output_id=ids[equivalent[1]].id, + strength=equivalent[2], + ) + for equivalent in equivalents + ] + test_identifier: Identifier | None = ids[id_key] if id_key is not None else None + if test_identifier is not None: + test_identifier.equivalencies = equivalencies + + # We're using the RecursiveEquivalencyCache, so must refresh it. + EquivalentIdentifiersCoverageProvider(db.session).run() + + # Act + result = _isbn_for_identifier(test_identifier) + # Assert + assert result == expected_isbn diff --git a/tests/core/util/test_datetime_helpers.py b/tests/core/util/test_datetime_helpers.py index 7b63adea61..50761dd0f9 100644 --- a/tests/core/util/test_datetime_helpers.py +++ b/tests/core/util/test_datetime_helpers.py @@ -7,6 +7,7 @@ from core.util.datetime_helpers import ( datetime_utc, from_timestamp, + minute_timestamp, previous_months, strptime_utc, to_utc, @@ -273,3 +274,36 @@ def test_boundaries_at_different_current_times( # Both dates should be the 1st of the month. assert 1 == computed_start.day assert 1 == computed_until.day + + +class TestMinuteTimestamp: + @pytest.mark.parametrize( + "dt, expected", + [ + ( + datetime.datetime(2022, 1, 1, 12, 30, 45), + datetime.datetime(2022, 1, 1, 12, 30), + ), + ( + datetime.datetime(2022, 1, 1, 12, 30), + datetime.datetime(2022, 1, 1, 12, 30), + ), + ( + datetime.datetime(2022, 2, 15, 9, 45, 30), + datetime.datetime(2022, 2, 15, 9, 45), + ), + ( + datetime.datetime(2023, 12, 31, 23, 59, 59), + datetime.datetime(2023, 12, 31, 23, 59), + ), + ( + datetime.datetime(2024, 5, 10, 0, 0), + datetime.datetime(2024, 5, 10, 0, 0), + ), + ], + ) + def test_minute_resolution( + self, dt: datetime.datetime, expected: datetime.datetime + ): + result = minute_timestamp(dt) + assert result == expected