diff --git a/src/palace/manager/celery/tasks/marc.py b/src/palace/manager/celery/tasks/marc.py index dd2f7871d..6bd84930e 100644 --- a/src/palace/manager/celery/tasks/marc.py +++ b/src/palace/manager/celery/tasks/marc.py @@ -13,6 +13,10 @@ from palace.manager.service.redis.models.lock import RedisLock from palace.manager.service.redis.redis import Redis from palace.manager.sqlalchemy.model.collection import Collection +from palace.manager.sqlalchemy.model.identifier import ( + Identifier, + RecursiveEquivalencyCache, +) from palace.manager.sqlalchemy.model.marcfile import MarcFile from palace.manager.sqlalchemy.util import create from palace.manager.util.datetime_helpers import utc_now @@ -190,9 +194,10 @@ def marc_export_collection( ] # Find ISBN for any work that needs it - isbns = MarcExporter.query_isbn_identifiers( + isbns = RecursiveEquivalencyCache.equivalent_identifiers( session, {pool.identifier for work, pool in works_with_pools}, + Identifier.ISBN, ) for work, pool in works_with_pools: diff --git a/src/palace/manager/marc/exporter.py b/src/palace/manager/marc/exporter.py index d1a6cd26f..378b38af4 100644 --- a/src/palace/manager/marc/exporter.py +++ b/src/palace/manager/marc/exporter.py @@ -241,7 +241,7 @@ def query_works( .order_by(Work.id.asc()) .options( # We set loader options on all the collection properties - # needed to generate the MARC records, so that don't end + # needed to generate the MARC records, so that we don't end # up doing queries for each work. selectinload(Work.license_pools).options( selectinload(LicensePool.identifier), diff --git a/src/palace/manager/sqlalchemy/model/identifier.py b/src/palace/manager/sqlalchemy/model/identifier.py index 68eba02c1..ab530ce0a 100644 --- a/src/palace/manager/sqlalchemy/model/identifier.py +++ b/src/palace/manager/sqlalchemy/model/identifier.py @@ -23,7 +23,7 @@ func, ) from sqlalchemy.exc import MultipleResultsFound, NoResultFound -from sqlalchemy.orm import Mapped, joinedload, relationship +from sqlalchemy.orm import Mapped, joinedload, relationship, selectinload from sqlalchemy.orm.session import Session from sqlalchemy.sql import select from sqlalchemy.sql.expression import and_, or_ @@ -1216,3 +1216,54 @@ class RecursiveEquivalencyCache(Base): is_parent = Column(Boolean, Computed("parent_identifier_id = identifier_id")) __table_args__ = (UniqueConstraint(parent_identifier_id, identifier_id),) + + @staticmethod + def equivalent_identifiers( + session: Session, identifiers: set[Identifier], type: str | None = None + ) -> dict[Identifier, Identifier]: + """ + Find all equivalent identifiers for the given Identifiers. + + :param session: DB Session + :param identifiers: Set of Identifiers that we need equivalencies for + :param type: An optional type, if given only equivalent identifiers + of this type will be returned. + :return: A dictionary mapping input identifiers to equivalent identifiers. + """ + + # Find identifiers that don't need to be looked up + results = ( + {i: i for i in identifiers if i.type == type} if type is not None else {} + ) + needs_lookup = {i.id: i for i in identifiers - results.keys()} + if not needs_lookup: + return results + + query = ( + select(RecursiveEquivalencyCache) + .join( + Identifier, + RecursiveEquivalencyCache.identifier_id == Identifier.id, + ) + .where( + RecursiveEquivalencyCache.parent_identifier_id.in_(needs_lookup.keys()), + ) + .order_by( + RecursiveEquivalencyCache.parent_identifier_id, + RecursiveEquivalencyCache.is_parent.desc(), + RecursiveEquivalencyCache.identifier_id.desc(), + ) + .options( + selectinload(RecursiveEquivalencyCache.identifier), + ) + ) + if type is not None: + query = query.where(Identifier.type == Identifier.ISBN) + + equivalents = session.execute(query).scalars().all() + + for equivalent in equivalents: + parent_identifier = needs_lookup[equivalent.parent_identifier_id] + results[parent_identifier] = equivalent.identifier + + return results diff --git a/tests/fixtures/marc.py b/tests/fixtures/marc.py index ca035a9c9..f33977162 100644 --- a/tests/fixtures/marc.py +++ b/tests/fixtures/marc.py @@ -52,6 +52,10 @@ def work(self, collection: Collection | None = None) -> Work: edition = self._db.edition() self._db.licensepool(edition, collection=collection) work = self._db.work(presentation_edition=edition) + # We set the works last updated time to 1 day ago, so we know this work + # will only be included in delta exports covering a time range before + # 1 day ago. This lets us easily test works being included / excluded + # based on their `last_update_time`. work.last_update_time = utc_now() - datetime.timedelta(days=1) return work diff --git a/tests/manager/marc/test_exporter.py b/tests/manager/marc/test_exporter.py index 6b0ef8dbc..b5534e9e4 100644 --- a/tests/manager/marc/test_exporter.py +++ b/tests/manager/marc/test_exporter.py @@ -1,20 +1,15 @@ import datetime from functools import partial -from unittest.mock import MagicMock import pytest from freezegun import freeze_time from sqlalchemy.exc import InvalidRequestError -from palace.manager.core.equivalents_coverage import ( - EquivalentIdentifiersCoverageProvider, -) from palace.manager.marc.exporter import LibraryInfo, MarcExporter from palace.manager.marc.settings import MarcExporterLibrarySettings from palace.manager.sqlalchemy.model.discovery_service_registration import ( DiscoveryServiceRegistration, ) -from palace.manager.sqlalchemy.model.identifier import Equivalency, Identifier from palace.manager.sqlalchemy.model.marcfile import MarcFile from palace.manager.sqlalchemy.util import create from palace.manager.util.datetime_helpers import datetime_utc, utc_now @@ -305,69 +300,6 @@ def test_query_works(self, marc_exporter_fixture: MarcExporterFixture) -> None: assert query_works(work_id_offset=works[3].id) == works[4:] - def test_query_isbn_identifiers(self, db: DatabaseTransactionFixture) -> None: - # This is already an isbn so no lookup necessary - isbn_identifier = db.identifier(identifier_type=Identifier.ISBN) - - # Overdrive identifier with two ISBN associated - overdrive_identifier = db.identifier(identifier_type=Identifier.OVERDRIVE_ID) - od_isbn_1 = db.identifier(identifier_type=Identifier.ISBN) - od_isbn_2 = db.identifier(identifier_type=Identifier.ISBN) - create( - db.session, - Equivalency, - input_id=overdrive_identifier.id, - output_id=od_isbn_1.id, - strength=5, - ) - create( - db.session, - Equivalency, - input_id=overdrive_identifier.id, - output_id=od_isbn_2.id, - strength=1, - ) - - # Gutenberg ID with one associated ISBN - gutenberg_identifier = db.identifier(identifier_type=Identifier.GUTENBERG_ID) - gutenberg_isbn = db.identifier(identifier_type=Identifier.ISBN) - create( - db.session, - Equivalency, - input_id=gutenberg_identifier.id, - output_id=gutenberg_isbn.id, - strength=5, - ) - - # Proquest ID with no associated ISBN - proquest_identfier = db.identifier(identifier_type=Identifier.PROQUEST_ID) - - # We're using the RecursiveEquivalencyCache, so must refresh it. - EquivalentIdentifiersCoverageProvider(db.session).run() - - # Calling with only ISBN doesn't do a query, it just returns the identifiers - assert MarcExporter.query_isbn_identifiers( - MagicMock(side_effect=Exception("Should not be called")), - { - isbn_identifier, - }, - ) == {isbn_identifier: isbn_identifier} - - equivalent_isbns = MarcExporter.query_isbn_identifiers( - db.session, - { - isbn_identifier, - overdrive_identifier, - gutenberg_identifier, - proquest_identfier, - }, - ) - assert equivalent_isbns == { - isbn_identifier: isbn_identifier, - overdrive_identifier: od_isbn_1, - gutenberg_identifier: gutenberg_isbn, - } - def test_collection(self, marc_exporter_fixture: MarcExporterFixture) -> None: collection_id = marc_exporter_fixture.collection1.id assert collection_id is not None diff --git a/tests/manager/sqlalchemy/model/test_identifier.py b/tests/manager/sqlalchemy/model/test_identifier.py index c0eb9ed13..2ed805a84 100644 --- a/tests/manager/sqlalchemy/model/test_identifier.py +++ b/tests/manager/sqlalchemy/model/test_identifier.py @@ -1,18 +1,23 @@ import datetime -from unittest.mock import PropertyMock, create_autospec +from unittest.mock import MagicMock, PropertyMock, create_autospec import pytest +from palace.manager.core.equivalents_coverage import ( + EquivalentIdentifiersCoverageProvider, +) from palace.manager.sqlalchemy.constants import MediaTypes from palace.manager.sqlalchemy.model.datasource import DataSource from palace.manager.sqlalchemy.model.edition import Edition from palace.manager.sqlalchemy.model.identifier import ( + Equivalency, Identifier, ProQuestIdentifierParser, RecursiveEquivalencyCache, ) from palace.manager.sqlalchemy.model.resource import Hyperlink from palace.manager.sqlalchemy.presentation import PresentationCalculationPolicy +from palace.manager.sqlalchemy.util import create from tests.fixtures.database import DatabaseTransactionFixture from tests.manager.sqlalchemy.model.test_coverage import ( ExampleEquivalencyCoverageRecordFixture, @@ -765,6 +770,96 @@ def test_identifier_delete_cascade_parent( all_recursives = session.query(RecursiveEquivalencyCache).all() assert len(all_recursives) == 3 + def test_equivalent_identifiers(self, db: DatabaseTransactionFixture) -> None: + # This is already an isbn so no lookup necessary + isbn_identifier = db.identifier(identifier_type=Identifier.ISBN) + + # Overdrive identifier with two ISBN associated + overdrive_identifier = db.identifier(identifier_type=Identifier.OVERDRIVE_ID) + od_isbn_1 = db.identifier(identifier_type=Identifier.ISBN) + od_isbn_2 = db.identifier(identifier_type=Identifier.ISBN) + create( + db.session, + Equivalency, + input_id=overdrive_identifier.id, + output_id=od_isbn_1.id, + strength=5, + ) + create( + db.session, + Equivalency, + input_id=overdrive_identifier.id, + output_id=od_isbn_2.id, + strength=1, + ) + + # Gutenberg ID with one associated ISBN + gutenberg_identifier = db.identifier(identifier_type=Identifier.GUTENBERG_ID) + gutenberg_isbn = db.identifier(identifier_type=Identifier.ISBN) + create( + db.session, + Equivalency, + input_id=gutenberg_identifier.id, + output_id=gutenberg_isbn.id, + strength=5, + ) + + # Proquest ID with no associated ISBN but an associated GUTENBERG_ID + proquest_identfier = db.identifier(identifier_type=Identifier.PROQUEST_ID) + proquest_gutenberg = db.identifier(identifier_type=Identifier.GUTENBERG_ID) + create( + db.session, + Equivalency, + input_id=proquest_identfier.id, + output_id=proquest_gutenberg.id, + strength=2, + ) + + # We're using the RecursiveEquivalencyCache, so must refresh it. + EquivalentIdentifiersCoverageProvider(db.session).run() + + # Calling with only identifiers of the specified type doesn't do a query, + # it just returns the identifiers + assert RecursiveEquivalencyCache.equivalent_identifiers( + MagicMock(side_effect=Exception("Should not be called")), + { + isbn_identifier, + }, + type=Identifier.ISBN, + ) == {isbn_identifier: isbn_identifier} + + equivalent_isbns = RecursiveEquivalencyCache.equivalent_identifiers( + db.session, + { + isbn_identifier, + overdrive_identifier, + gutenberg_identifier, + proquest_identfier, + }, + type=Identifier.ISBN, + ) + assert equivalent_isbns == { + isbn_identifier: isbn_identifier, + overdrive_identifier: od_isbn_1, + gutenberg_identifier: gutenberg_isbn, + } + + equivalents = RecursiveEquivalencyCache.equivalent_identifiers( + db.session, + { + isbn_identifier, + overdrive_identifier, + gutenberg_identifier, + proquest_identfier, + }, + ) + assert equivalents == { + isbn_identifier: isbn_identifier, + overdrive_identifier: od_isbn_1, + gutenberg_identifier: gutenberg_isbn, + proquest_identfier: proquest_gutenberg, + } + class TestProQuestIdentifierParser: @pytest.mark.parametrize(