diff --git a/api/coverage.py b/api/coverage.py deleted file mode 100644 index 05e561fd2b..0000000000 --- a/api/coverage.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Base classes for CoverageProviders. - -The CoverageProviders themselves are in the file corresponding to the -service that needs coverage -- overdrive.py, and so on. -""" - - -from core.coverage import CollectionCoverageProvider, CoverageFailure -from core.model import DataSource -from core.opds_import import OPDSImporter - - -class OPDSImportCoverageProvider(CollectionCoverageProvider): - """Provide coverage for identifiers by looking them up, in batches, - using the Simplified lookup protocol. - """ - - DEFAULT_BATCH_SIZE = 25 - OPDS_IMPORTER_CLASS = OPDSImporter - - def __init__(self, collection, lookup_client, **kwargs): - """Constructor. - - :param lookup_client: A SimplifiedOPDSLookup object. - """ - super().__init__(collection, **kwargs) - self.lookup_client = lookup_client - - def process_batch(self, batch): - """Perform a Simplified lookup and import the resulting OPDS feed.""" - ( - imported_editions, - pools, - works, - error_messages_by_id, - ) = self.lookup_and_import_batch(batch) - - results = [] - imported_identifiers = set() - # We grant coverage if an Edition was created from the operation. - for edition in imported_editions: - identifier = edition.primary_identifier - results.append(identifier) - imported_identifiers.add(identifier) - - # The operation may also have updated information from a - # number of LicensePools. - for pool in pools: - identifier = pool.identifier - if identifier in imported_identifiers: - self.finalize_license_pool(pool) - else: - msg = "OPDS import operation imported LicensePool, but no Edition." - results.append(self.failure(identifier, msg, transient=True)) - - # Anything left over is either a CoverageFailure, or an - # Identifier that used to be a CoverageFailure, indicating - # that a simplified:message that a normal OPDSImporter would - # consider a 'failure' should actually be considered a - # success. - for failure_or_identifier in sorted(error_messages_by_id.values()): - if isinstance(failure_or_identifier, CoverageFailure): - failure_or_identifier.collection = self.collection_or_not - results.append(failure_or_identifier) - return results - - def process_item(self, identifier): - """Handle an individual item (e.g. through ensure_coverage) as a very - small batch. Not efficient, but it works. - """ - [result] = self.process_batch([identifier]) - return result - - def finalize_license_pool(self, pool): - """An OPDS entry was matched with a LicensePool. Do something special - to mark the occasion. - - By default, nothing happens. - """ - - @property - def api_method(self): - """The method to call to fetch an OPDS feed from the remote server.""" - return self.lookup_client.lookup - - def lookup_and_import_batch(self, batch): - """Look up a batch of identifiers and parse the resulting OPDS feed. - - This method is overridden by MockOPDSImportCoverageProvider. - """ - # id_mapping maps our local identifiers to identifiers the - # foreign data source will reocgnize. - id_mapping = self.create_identifier_mapping(batch) - if id_mapping: - foreign_identifiers = list(id_mapping.keys()) - else: - foreign_identifiers = batch - - response = self.api_method(foreign_identifiers) - - # import_feed_response takes id_mapping so it can map the - # foreign identifiers back to their local counterparts. - return self.import_feed_response(response, id_mapping) - - def create_identifier_mapping(self, batch): - """Map the internal identifiers used for books to the corresponding - identifiers used by the lookup client. - - By default, no identifier mapping is needed. - """ - return None - - def import_feed_response(self, response, id_mapping): - """Confirms OPDS feed response and imports feed through - the appropriate OPDSImporter subclass. - """ - self.lookup_client.check_content_type(response) - importer = self.OPDS_IMPORTER_CLASS( - self._db, - self.collection, - identifier_mapping=id_mapping, - data_source_name=self.data_source.name, - ) - return importer.import_from_feed(response.text) - - -class MockOPDSImportCoverageProvider(OPDSImportCoverageProvider): - - SERVICE_NAME = "Mock Provider" - DATA_SOURCE_NAME = DataSource.OA_CONTENT_SERVER - - def __init__(self, collection, *args, **kwargs): - super().__init__(collection, None, *args, **kwargs) - self.batches = [] - self.finalized = [] - self.import_results = [] - - def queue_import_results(self, editions, pools, works, messages_by_id): - self.import_results.insert(0, (editions, pools, works, messages_by_id)) - - def finalize_license_pool(self, license_pool): - self.finalized.append(license_pool) - super().finalize_license_pool(license_pool) - - def lookup_and_import_batch(self, batch): - self.batches.append(batch) - return self.import_results.pop() diff --git a/core/opds_import.py b/core/opds_import.py index 6b52d77406..faca6de823 100644 --- a/core/opds_import.py +++ b/core/opds_import.py @@ -16,14 +16,12 @@ from api.circulation import CirculationConfigurationMixin from api.selftest import HasCollectionSelfTests -from core.integration.goals import Goals from core.integration.settings import ( BaseSettings, ConfigurationFormItem, ConfigurationFormItemType, FormField, ) -from core.model.integration import IntegrationConfiguration from .classifier import Classifier from .config import IntegrationException @@ -95,59 +93,6 @@ def parse_identifier(db, identifier): return parsed_identifier -class AccessNotAuthenticated(Exception): - """No authentication is configured for this service""" - - -class SimplifiedOPDSLookup: - """Tiny integration class for the Simplified 'lookup' protocol.""" - - LOOKUP_ENDPOINT = "lookup" - - @classmethod - def check_content_type(cls, response): - content_type = response.headers.get("content-type") - if content_type != OPDSFeed.ACQUISITION_FEED_TYPE: - raise BadResponseException.from_response( - response.url, "Wrong media type: %s" % content_type, response - ) - - @classmethod - def from_protocol(cls, _db, protocol, goal=Goals.LICENSE_GOAL, library=None): - config = get_one(_db, IntegrationConfiguration, protocol=protocol, goal=goal) - if config is not None and library is not None: - config = config.for_library(library.id) - if config is None: - return None - return cls(config.settings_dict["url"]) - - def __init__(self, base_url): - if not base_url.endswith("/"): - base_url += "/" - self.base_url = base_url - - @property - def lookup_endpoint(self): - return self.LOOKUP_ENDPOINT - - def _get(self, url, **kwargs): - """Make an HTTP request. This method is overridden in the mock class.""" - kwargs["timeout"] = kwargs.get("timeout", 300) - kwargs["allowed_response_codes"] = kwargs.get("allowed_response_codes", []) - kwargs["allowed_response_codes"] += ["2xx", "3xx"] - return HTTP.get_with_timeout(url, **kwargs) - - def urn_args(self, identifiers): - return "&".join({"urn=%s" % i.urn for i in identifiers}) - - def lookup(self, identifiers): - """Retrieve an OPDS feed with metadata for the given identifiers.""" - args = self.urn_args(identifiers) - url = self.base_url + self.lookup_endpoint + "?" + args - logging.info("Lookup URL: %s", url) - return self._get(url) - - class OPDSXMLParser(XMLParser): NAMESPACES = { "simplified": "http://librarysimplified.org/terms/", diff --git a/tests/api/mockapi/opds.py b/tests/api/mockapi/opds.py deleted file mode 100644 index c64dbcd3b0..0000000000 --- a/tests/api/mockapi/opds.py +++ /dev/null @@ -1,23 +0,0 @@ -from core.opds_import import SimplifiedOPDSLookup -from core.util.http import HTTP -from tests.core.mock import MockRequestsResponse - - -class MockSimplifiedOPDSLookup(SimplifiedOPDSLookup): - def __init__(self, *args, **kwargs): - self.requests = [] - self.responses = [] - super().__init__(*args, **kwargs) - - def queue_response(self, status_code, headers={}, content=None): - self.responses.insert(0, MockRequestsResponse(status_code, headers, content)) - - def _get(self, url, *args, **kwargs): - self.requests.append((url, args, kwargs)) - response = self.responses.pop() - return HTTP._process_response( - url, - response, - kwargs.get("allowed_response_codes"), - kwargs.get("disallowed_response_codes"), - ) diff --git a/tests/api/test_coverage.py b/tests/api/test_coverage.py deleted file mode 100644 index a9cff5e28a..0000000000 --- a/tests/api/test_coverage.py +++ /dev/null @@ -1,238 +0,0 @@ -import pytest - -from api.coverage import MockOPDSImportCoverageProvider, OPDSImportCoverageProvider -from core.coverage import CoverageFailure -from core.model import Collection, DataSource, LicensePool -from core.opds_import import OPDSImporter -from core.util.http import BadResponseException -from core.util.opds_writer import OPDSFeed -from tests.api.mockapi.opds import MockSimplifiedOPDSLookup -from tests.core.mock import MockRequestsResponse -from tests.fixtures.database import DatabaseTransactionFixture - - -class TestOPDSImportCoverageProvider: - def _provider(self, db: DatabaseTransactionFixture): - """Create a generic MockOPDSImportCoverageProvider for testing purposes.""" - return MockOPDSImportCoverageProvider(db.default_collection()) - - def test_badresponseexception_on_non_opds_feed( - self, db: DatabaseTransactionFixture - ): - """If the lookup protocol sends something that's not an OPDS - feed, refuse to go any further. - """ - provider = self._provider(db) - provider.lookup_client = MockSimplifiedOPDSLookup(db.fresh_url()) - - response = MockRequestsResponse( - 200, {"content-type": "text/plain"}, "Some data" - ) - provider.lookup_client.queue_response(response) - with pytest.raises(BadResponseException) as excinfo: - provider.import_feed_response(response, None) - assert "Wrong media type: text/plain" in str(excinfo.value) - - def test_process_batch_with_identifier_mapping( - self, db: DatabaseTransactionFixture - ): - """Test that internal identifiers are mapped to and from the form used - by the external service. - """ - - # Unlike other tests in this class, we are using a real - # implementation of OPDSImportCoverageProvider.process_batch. - class TestProvider(OPDSImportCoverageProvider): - SERVICE_NAME = "Test provider" - DATA_SOURCE_NAME = DataSource.OA_CONTENT_SERVER - - mapping: dict - - # Mock the identifier mapping - def create_identifier_mapping(self, batch): - return self.mapping - - # This means we need to mock the lookup client instead. - lookup = MockSimplifiedOPDSLookup(db.fresh_url()) - - DatabaseTransactionFixture.set_settings( - db.default_collection().integration_configuration, - **{Collection.DATA_SOURCE_NAME_SETTING: DataSource.OA_CONTENT_SERVER} - ) - provider = TestProvider(db.default_collection(), lookup) - - # Create a hard-coded mapping. We use id1 internally, but the - # foreign data source knows the book as id2. - id1 = db.identifier() - id2 = db.identifier() - provider.mapping = {id2: id1} - - feed = ( - "%sHere's your title!" - % id2.urn - ) - headers = {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE} - lookup.queue_response(200, headers=headers, content=feed) - [identifier] = provider.process_batch([id1]) - - # We wanted to process id1. We sent id2 to the server, the - # server responded with an for id2, and it was used to - # modify the Edition associated with id1. - assert id1 == identifier - - [edition] = id1.primarily_identifies - assert "Here's your title!" == edition.title - - def test_process_batch(self, db: DatabaseTransactionFixture): - provider = self._provider(db) - - # Here are an Edition and a LicensePool for the same identifier but - # from different data sources. We would expect this to happen - # when talking to the open-access content server. - edition = db.edition(data_source_name=DataSource.OA_CONTENT_SERVER) - identifier = edition.primary_identifier - - license_source = DataSource.lookup(db.session, DataSource.GUTENBERG) - pool, is_new = LicensePool.for_foreign_id( - db.session, - license_source, - identifier.type, - identifier.identifier, - collection=db.default_collection(), - ) - assert pool is not None - assert None == pool.work - - # Here's a second Edition/LicensePool that's going to cause a - # problem: the LicensePool will show up in the results, but - # the corresponding Edition will not. - edition2, pool2 = db.edition(with_license_pool=True) - - # Here's an identifier that can't be looked up at all, - # and an identifier that shows up in messages_by_id because - # its simplified:message was determined to indicate success - # rather than failure. - error_identifier = db.identifier() - not_an_error_identifier = db.identifier() - messages_by_id = { - error_identifier.urn: CoverageFailure( - error_identifier, "500: internal error" - ), - not_an_error_identifier.urn: not_an_error_identifier, - } - - # When we call CoverageProvider.process_batch(), it's going to - # return the information we just set up: a matched - # Edition/LicensePool pair, a mismatched LicensePool, and an - # error message. - provider.queue_import_results([edition], [pool, pool2], [], messages_by_id) - - # Make the CoverageProvider do its thing. - fake_batch = [object()] - ( - success_import, - failure_mismatched, - failure_message, - success_message, - ) = provider.process_batch(fake_batch) - - # The fake batch was provided to lookup_and_import_batch. - assert [fake_batch] == provider.batches - - # The matched Edition/LicensePool pair was returned. - assert success_import == edition.primary_identifier - - # The LicensePool of that pair was passed into finalize_license_pool. - # The mismatched LicensePool was not. - assert [pool] == provider.finalized - - # The mismatched LicensePool turned into a CoverageFailure - # object. - assert isinstance(failure_mismatched, CoverageFailure) - assert ( - "OPDS import operation imported LicensePool, but no Edition." - == failure_mismatched.exception - ) - assert pool2.identifier == failure_mismatched.obj - assert True == failure_mismatched.transient - - # The OPDSMessage with status code 500 was returned as a - # CoverageFailure object. - assert isinstance(failure_message, CoverageFailure) - assert "500: internal error" == failure_message.exception - assert error_identifier == failure_message.obj - assert True == failure_message.transient - - # The identifier that had a treat-as-success OPDSMessage was returned - # as-is. - assert not_an_error_identifier == success_message - - def test_process_batch_success_even_if_no_licensepool_exists( - self, db: DatabaseTransactionFixture - ): - """This shouldn't happen since CollectionCoverageProvider - only operates on Identifiers that are licensed through a Collection. - But if a lookup should return an Edition but no LicensePool, - that counts as a success. - """ - provider = self._provider(db) - edition, pool = db.edition(with_license_pool=True) - provider.queue_import_results([edition], [], [], {}) - fake_batch = [object()] - [success] = provider.process_batch(fake_batch) - - # The Edition's primary identifier was returned to indicate - # success. - assert edition.primary_identifier == success - - # However, since there is no LicensePool, nothing was finalized. - assert [] == provider.finalized - - def test_process_item(self, db: DatabaseTransactionFixture): - """To process a single item we process a batch containing - only that item. - """ - provider = self._provider(db) - edition = db.edition() - provider.queue_import_results([edition], [], [], {}) - item = object() - result = provider.process_item(item) - assert edition.primary_identifier == result - assert [[item]] == provider.batches - - def test_import_feed_response(self, db: DatabaseTransactionFixture): - """Verify that import_feed_response instantiates the - OPDS_IMPORTER_CLASS subclass and calls import_from_feed - on it. - """ - - class MockOPDSImporter(OPDSImporter): - def import_from_feed(self, text): - """Return information that's useful for verifying - that the OPDSImporter was instantiated with the - right values. - """ - return ( - text, - self.collection, - self.identifier_mapping, - self.data_source_name, - ) - - class MockProvider(MockOPDSImportCoverageProvider): - OPDS_IMPORTER_CLASS = MockOPDSImporter - - provider = MockProvider(db.default_collection()) - provider.lookup_client = MockSimplifiedOPDSLookup(db.fresh_url()) - - response = MockRequestsResponse( - 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, "some data" - ) - id_mapping = object() - (text, collection, mapping, data_source_name) = provider.import_feed_response( - response, id_mapping - ) - assert "some data" == text - assert provider.collection == collection - assert id_mapping == mapping - assert provider.data_source.name == data_source_name