diff --git a/core/coverage.py b/core/coverage.py index 459e6dddf0..577dacfe6f 100644 --- a/core/coverage.py +++ b/core/coverage.py @@ -1387,25 +1387,6 @@ def run(self, _db, **kwargs): provider.finalize_timestampdata(self.progress) -class CatalogCoverageProvider(CollectionCoverageProvider): - """Most CollectionCoverageProviders provide coverage to Identifiers - that are licensed through a given Collection. - - A CatalogCoverageProvider provides coverage to Identifiers that - are present in a given Collection's catalog. - """ - - def items_that_need_coverage(self, identifiers=None, **kwargs): - """Find all Identifiers in this Collection's catalog but lacking - coverage through this CoverageProvider. - """ - qu = super(CollectionCoverageProvider, self).items_that_need_coverage( - identifiers, **kwargs - ) - qu = qu.join(Identifier.collections).filter(Collection.id == self.collection_id) - return qu - - class BibliographicCoverageProvider(CollectionCoverageProvider): """Fill in bibliographic metadata for all books in a Collection. diff --git a/core/model/collection.py b/core/model/collection.py index 47053908fe..778ad2b573 100644 --- a/core/model/collection.py +++ b/core/model/collection.py @@ -1,6 +1,5 @@ from __future__ import annotations -import datetime from abc import ABCMeta, abstractmethod from typing import TYPE_CHECKING, Any, Generator, List, Optional, Tuple, TypeVar @@ -13,15 +12,14 @@ Unicode, UniqueConstraint, exists, - func, ) -from sqlalchemy.orm import Mapped, Query, backref, joinedload, mapper, relationship +from sqlalchemy.orm import Mapped, Query, backref, mapper, relationship from sqlalchemy.orm.exc import NoResultFound from sqlalchemy.orm.session import Session from sqlalchemy.sql.expression import and_, or_ from core.integration.goals import Goals -from core.model import Base, create, get_one, get_one_or_create +from core.model import Base, create, get_one_or_create from core.model.configuration import ConfigurationSetting, ExternalIntegration from core.model.constants import EditionConstants from core.model.coverage import CoverageRecord @@ -37,7 +35,6 @@ from core.model.library import Library from core.model.licensing import LicensePool, LicensePoolDeliveryMechanism from core.model.work import Work -from core.util.string_helpers import base64 if TYPE_CHECKING: from core.external_search import ExternalSearchIndex @@ -571,32 +568,6 @@ def parents(self) -> Generator[Collection, None, None]: yield parent yield from parent.parents - @property - def metadata_identifier(self) -> str: - """Identifier based on collection details that uniquely represents - this Collection on the metadata wrangler. This identifier is - composed of the Collection protocol and account identifier. - - A circulation manager provides a Collection's metadata - identifier as part of collection registration. The metadata - wrangler creates a corresponding Collection on its side, - *named after* the metadata identifier -- regardless of the name - of that collection on the circulation manager side. - """ - account_id = self.unique_account_id - if self.protocol == ExternalIntegration.OPDS_IMPORT: - # Remove ending / from OPDS url that could duplicate the collection - # on the Metadata Wrangler. - while account_id.endswith("/"): - account_id = account_id[:-1] - - encode = base64.urlsafe_b64encode - account_id = encode(account_id) - protocol = encode(self.protocol) - - metadata_identifier = protocol + ":" + account_id - return encode(metadata_identifier) # type: ignore[no-any-return] - def disassociate_library(self, library: Library) -> None: """Disassociate a Library from this Collection and delete any relevant ConfigurationSettings. @@ -638,67 +609,6 @@ def disassociate_library(self, library: Library) -> None: self.libraries.remove(library) - @classmethod - def _decode_metadata_identifier(cls, metadata_identifier: str) -> Tuple[str, str]: - """Invert the metadata_identifier property.""" - if not metadata_identifier: - raise ValueError("No metadata identifier provided.") - try: - decode = base64.urlsafe_b64decode - details = decode(metadata_identifier) - encoded_details = details.split(":", 1) - [protocol, account_id] = [decode(d) for d in encoded_details] - except (TypeError, ValueError) as e: - raise ValueError( - "Metadata identifier '%s' is invalid: %s" - % (metadata_identifier, str(e)) - ) - return protocol, account_id - - @classmethod - def from_metadata_identifier( - cls, - _db: Session, - metadata_identifier: str, - data_source: DataSource | str | None = None, - ) -> Tuple[Collection, bool]: - """Finds or creates a Collection on the metadata wrangler, based - on its unique metadata_identifier. - """ - - # Decode the metadata identifier into a protocol and an - # account ID. If the metadata identifier is invalid, this - # will raise an exception. - protocol, account_id = cls._decode_metadata_identifier(metadata_identifier) - - # Now that we know the metadata identifier is valid, try to - # look up a collection named after it. - collection = get_one(_db, Collection, name=metadata_identifier) - is_new = False - - if not collection: - # Create a collection named after the metadata - # identifier. Give it an ExternalIntegration with the - # corresponding protocol, and set its data source and - # external_account_id. - new_collection, is_new = create(_db, Collection, name=metadata_identifier) - new_collection.create_external_integration(protocol) - new_collection.create_integration_configuration(protocol) - collection = new_collection - - if protocol == ExternalIntegration.OPDS_IMPORT: - # For OPDS Import collections only, we store the URL to - # the OPDS feed (the "account ID") and the data source. - collection.external_account_id = account_id - if isinstance(data_source, DataSource): - collection.data_source = data_source - elif data_source is not None: - collection.data_source = DataSource.lookup( - _db, data_source, autocreate=True - ) - - return collection, is_new - @property def pools_with_no_delivery_mechanisms(self) -> Query[LicensePool]: """Find all LicensePools in this Collection that have no delivery @@ -739,91 +649,6 @@ def explain(self, include_secrets: bool = False) -> List[str]: lines.append(f'Setting "{name}": "{value}"') return lines - def catalog_identifier(self, identifier: Identifier) -> None: - """Inserts an identifier into a catalog""" - self.catalog_identifiers([identifier]) - - def catalog_identifiers(self, identifiers: List[Identifier]) -> None: - """Inserts identifiers into the catalog""" - if not identifiers: - # Nothing to do. - return - - _db = Session.object_session(identifiers[0]) - already_in_catalog = ( - _db.query(Identifier) - .join(CollectionIdentifier) - .filter(CollectionIdentifier.collection_id == self.id) # type: ignore[attr-defined] - .filter(Identifier.id.in_([x.id for x in identifiers])) - .all() - ) - - new_catalog_entries = [ - dict(collection_id=self.id, identifier_id=identifier.id) - for identifier in identifiers - if identifier not in already_in_catalog - ] - _db.bulk_insert_mappings(CollectionIdentifier, new_catalog_entries) - _db.commit() - - def unresolved_catalog( - self, _db: Session, data_source_name: str, operation: str - ) -> Query[Identifier]: - """Returns a query with all identifiers in a Collection's catalog that - have unsuccessfully attempted resolution. This method is used on the - metadata wrangler. - - :return: a sqlalchemy.Query - """ - coverage_source = DataSource.lookup(_db, data_source_name) - is_not_resolved = and_( - CoverageRecord.operation == operation, - CoverageRecord.data_source_id == coverage_source.id, - CoverageRecord.status != CoverageRecord.SUCCESS, - ) - - query = ( - _db.query(Identifier) - .outerjoin(Identifier.licensed_through) - .outerjoin(Identifier.coverage_records) - .outerjoin(LicensePool.work) - .outerjoin(Identifier.collections) # type: ignore[attr-defined] - .filter(Collection.id == self.id, is_not_resolved, Work.id == None) - .order_by(Identifier.id) - ) - - return query - - def isbns_updated_since( - self, _db: Session, timestamp: datetime.datetime | None - ) -> Query[Identifier]: - """Finds all ISBNs in a collection's catalog that have been updated - since the timestamp but don't have a Work to show for it. Used in - the metadata wrangler. - - :return: a Query - """ - isbns = ( - _db.query(Identifier, func.max(CoverageRecord.timestamp).label("latest")) - .join(Identifier.collections) # type: ignore[attr-defined] - .join(Identifier.coverage_records) - .outerjoin(Identifier.licensed_through) - .group_by(Identifier.id) - .order_by("latest") - .filter( - Collection.id == self.id, - LicensePool.work_id == None, - CoverageRecord.status == CoverageRecord.SUCCESS, - ) - .enable_eagerloads(False) - .options(joinedload(Identifier.coverage_records)) - ) - - if timestamp: - isbns = isbns.filter(CoverageRecord.timestamp > timestamp) - - return isbns - @classmethod def restrict_to_ready_deliverable_works( cls, diff --git a/tests/core/models/test_collection.py b/tests/core/models/test_collection.py index 4e529e74ea..72e1100220 100644 --- a/tests/core/models/test_collection.py +++ b/tests/core/models/test_collection.py @@ -1,4 +1,3 @@ -import datetime import json import pytest @@ -12,15 +11,12 @@ from core.model.customlist import CustomList from core.model.datasource import DataSource from core.model.edition import Edition -from core.model.identifier import Identifier from core.model.integration import ( IntegrationConfiguration, IntegrationLibraryConfiguration, ) from core.model.licensing import Hold, License, LicensePool, Loan from core.model.work import Work -from core.util.datetime_helpers import utc_now -from core.util.string_helpers import base64 from tests.fixtures.database import DatabaseTransactionFixture @@ -433,201 +429,6 @@ def test_explain(self, example_collection_fixture: ExampleCollectionFixture): 'External account ID: "id2"', ] == data - def test_metadata_identifier( - self, example_collection_fixture: ExampleCollectionFixture - ): - db = example_collection_fixture.database_fixture - test_collection = example_collection_fixture.collection - - # If the collection doesn't have its unique identifier, an error - # is raised. - pytest.raises(ValueError, getattr, test_collection, "metadata_identifier") - - def build_expected(protocol, unique_id): - encode = base64.urlsafe_b64encode - encoded = [encode(value) for value in [protocol, unique_id]] - joined = ":".join(encoded) - return encode(joined) - - # With a unique identifier, we get back the expected identifier. - test_collection.external_account_id = "id" - expected = build_expected(ExternalIntegration.OVERDRIVE, "id") - assert expected == test_collection.metadata_identifier - - # If there's a parent, its unique id is incorporated into the result. - child = db.collection( - name="Child", - protocol=ExternalIntegration.OPDS_IMPORT, - external_account_id=db.fresh_url(), - ) - child.parent = test_collection - expected = build_expected( - ExternalIntegration.OPDS_IMPORT, "id+%s" % child.external_account_id - ) - assert expected == child.metadata_identifier - - # If it's an OPDS_IMPORT collection with a url external_account_id, - # closing '/' marks are removed. - opds = db.collection( - name="OPDS", - protocol=ExternalIntegration.OPDS_IMPORT, - external_account_id=(db.fresh_url() + "/"), - ) - assert isinstance(opds.external_account_id, str) - expected = build_expected( - ExternalIntegration.OPDS_IMPORT, opds.external_account_id[:-1] - ) - assert expected == opds.metadata_identifier - - def test_from_metadata_identifier( - self, example_collection_fixture: ExampleCollectionFixture - ): - db = example_collection_fixture.database_fixture - test_collection = example_collection_fixture.collection - - data_source = "New data source" - - # A ValueError results if we try to look up using an invalid - # identifier. - with pytest.raises(ValueError) as excinfo: - Collection.from_metadata_identifier( - db.session, "not a real identifier", data_source=data_source - ) - assert ( - "Metadata identifier 'not a real identifier' is invalid: Incorrect padding" - in str(excinfo.value) - ) - - # Of if we pass in the empty string. - with pytest.raises(ValueError) as excinfo: - Collection.from_metadata_identifier(db.session, "", data_source=data_source) - assert "No metadata identifier provided" in str(excinfo.value) - - # No new data source was created. - def new_data_source(): - return DataSource.lookup(db.session, data_source) - - assert None == new_data_source() - - # If a mirrored collection doesn't exist, it is created. - test_collection.external_account_id = "id" - mirror_collection, is_new = Collection.from_metadata_identifier( - db.session, test_collection.metadata_identifier, data_source=data_source - ) - assert True == is_new - assert test_collection.metadata_identifier == mirror_collection.name - assert test_collection.protocol == mirror_collection.protocol - - # Because this isn't an OPDS collection, the external account - # ID is not stored, the data source is the default source for - # the protocol, and no new data source was created. - assert mirror_collection.external_account_id is None - assert mirror_collection.data_source is not None - assert DataSource.OVERDRIVE == mirror_collection.data_source.name - assert None == new_data_source() - - # If the mirrored collection already exists, it is returned. - collection = db.collection(external_account_id=db.fresh_url()) - mirror_collection = create( - db.session, Collection, name=collection.metadata_identifier - )[0] - assert collection.protocol is not None - mirror_collection.create_external_integration(collection.protocol) - mirror_collection.create_integration_configuration(collection.protocol) - - # Confirm that there's no external_account_id and no DataSource. - # TODO I don't understand why we don't store this information, - # even if only to keep it in an easy-to-read form. - assert None == mirror_collection.external_account_id - assert None == mirror_collection.data_source - assert None == new_data_source() - - # Now try a lookup of an OPDS Import-type collection. - result, is_new = Collection.from_metadata_identifier( - db.session, collection.metadata_identifier, data_source=data_source - ) - assert False == is_new - assert mirror_collection == result - # The external_account_id and data_source have been set now. - assert collection.external_account_id == mirror_collection.external_account_id - - # A new DataSource object has been created. - source = new_data_source() - assert "New data source" == source.name - assert source == mirror_collection.data_source - - def test_catalog_identifier( - self, example_collection_fixture: ExampleCollectionFixture - ): - """#catalog_identifier associates an identifier with the catalog""" - db = example_collection_fixture.database_fixture - test_collection = example_collection_fixture.collection - - identifier = db.identifier() - test_collection.catalog_identifier(identifier) - - assert 1 == len(test_collection.catalog) - assert identifier == test_collection.catalog[0] - - def test_catalog_identifiers( - self, example_collection_fixture: ExampleCollectionFixture - ): - """#catalog_identifier associates multiple identifiers with a catalog""" - db = example_collection_fixture.database_fixture - test_collection = example_collection_fixture.collection - - i1 = db.identifier() - i2 = db.identifier() - i3 = db.identifier() - - # One of the identifiers is already in the catalog. - test_collection.catalog_identifier(i3) - - test_collection.catalog_identifiers([i1, i2, i3]) - - # Now all three identifiers are in the catalog. - assert sorted([i1, i2, i3]) == sorted(test_collection.catalog) - - def test_unresolved_catalog( - self, example_collection_fixture: ExampleCollectionFixture - ): - db = example_collection_fixture.database_fixture - test_collection = example_collection_fixture.collection - - # A regular schmegular identifier: untouched, pure. - pure_id = db.identifier() - - # A 'resolved' identifier that doesn't have a work yet. - # (This isn't supposed to happen, but jic.) - source = DataSource.lookup(db.session, DataSource.GUTENBERG) - operation = "test-thyself" - resolved_id = db.identifier() - db.coverage_record( - resolved_id, source, operation=operation, status=CoverageRecord.SUCCESS - ) - - # An unresolved identifier--we tried to resolve it, but - # it all fell apart. - unresolved_id = db.identifier() - db.coverage_record( - unresolved_id, - source, - operation=operation, - status=CoverageRecord.TRANSIENT_FAILURE, - ) - - # An identifier with a Work already. - id_with_work = db.work().presentation_edition.primary_identifier - - test_collection.catalog_identifiers( - [pure_id, resolved_id, unresolved_id, id_with_work] - ) - - result = test_collection.unresolved_catalog(db.session, source.name, operation) - - # Only the failing identifier is in the query. - assert [unresolved_id] == result.all() - def test_disassociate_library( self, example_collection_fixture: ExampleCollectionFixture ): @@ -695,62 +496,6 @@ def test_disassociate_library( collection.disassociate_library(other_library) assert "No known external integration for collection" in str(excinfo.value) - def test_isbns_updated_since( - self, example_collection_fixture: ExampleCollectionFixture - ): - db = example_collection_fixture.database_fixture - test_collection = example_collection_fixture.collection - - i1 = db.identifier(identifier_type=Identifier.ISBN, foreign_id=db.isbn_take()) - i2 = db.identifier(identifier_type=Identifier.ISBN, foreign_id=db.isbn_take()) - i3 = db.identifier(identifier_type=Identifier.ISBN, foreign_id=db.isbn_take()) - i4 = db.identifier(identifier_type=Identifier.ISBN, foreign_id=db.isbn_take()) - - timestamp = utc_now() - - # An empty catalog returns nothing.. - assert [] == test_collection.isbns_updated_since(db.session, None).all() - - # Give the ISBNs some coverage. - content_cafe = DataSource.lookup(db.session, DataSource.CONTENT_CAFE) - for isbn in [i2, i3, i1]: - db.coverage_record(isbn, content_cafe) - - # Give one ISBN more than one coverage record. - oclc = DataSource.lookup(db.session, DataSource.OCLC) - i1_oclc_record = db.coverage_record(i1, oclc) - - def assert_isbns(expected, result_query): - results = [r[0] for r in result_query] - assert expected == results - - # When no timestamp is given, all ISBNs in the catalog are returned, - # in order of their CoverageRecord timestamp. - test_collection.catalog_identifiers([i1, i2]) - updated_isbns = test_collection.isbns_updated_since(db.session, None).all() - assert_isbns([i2, i1], updated_isbns) - - # That CoverageRecord timestamp is also returned. - i1_timestamp = updated_isbns[1][1] # type: ignore[index] - assert isinstance(i1_timestamp, datetime.datetime) - assert i1_oclc_record.timestamp == i1_timestamp - - # When a timestamp is passed, only works that have been updated since - # then will be returned. - timestamp = utc_now() - i1.coverage_records[0].timestamp = utc_now() - updated_isbns_2 = test_collection.isbns_updated_since(db.session, timestamp) - assert_isbns([i1], updated_isbns_2) - - # Prepare an ISBN associated with a Work. - work = db.work(with_license_pool=True) - work.license_pools[0].identifier = i2 - i2.coverage_records[0].timestamp = utc_now() - - # ISBNs that have a Work will be ignored. - updated_isbns_3 = test_collection.isbns_updated_since(db.session, timestamp) - assert_isbns([i1], updated_isbns_3) - def test_custom_lists(self, example_collection_fixture: ExampleCollectionFixture): db = example_collection_fixture.database_fixture test_collection = example_collection_fixture.collection diff --git a/tests/core/test_coverage.py b/tests/core/test_coverage.py index 3054d08852..9fb3d48e01 100644 --- a/tests/core/test_coverage.py +++ b/tests/core/test_coverage.py @@ -4,7 +4,6 @@ from core.coverage import ( BaseCoverageProvider, - CatalogCoverageProvider, CoverageFailure, CoverageProviderProgress, IdentifierCoverageProvider, @@ -1908,34 +1907,6 @@ def test_set_presentation_ready(self, db: DatabaseTransactionFixture): assert True == pool.work.presentation_ready -class TestCatalogCoverageProvider: - def test_items_that_need_coverage(self, db: DatabaseTransactionFixture): - c1 = db.collection() - c2 = db.collection() - - i1 = db.identifier() - c1.catalog_identifier(i1) - - i2 = db.identifier() - c2.catalog_identifier(i2) - - i3 = db.identifier() - - # This Identifier is licensed through the Collection c1, but - # it's not in the catalog--catalogs are used for different - # things. - edition, lp = db.edition(with_license_pool=True, collection=c1) - - # We have four identifiers, but only i1 shows up, because - # it's the only one in c1's catalog. - class Provider(CatalogCoverageProvider): - SERVICE_NAME = "test" - DATA_SOURCE_NAME = DataSource.OVERDRIVE - - provider = Provider(c1) - assert [i1] == provider.items_that_need_coverage().all() - - class BibliographicCoverageProviderFixture: transaction: DatabaseTransactionFixture work: Work