diff --git a/api/overdrive.py b/api/overdrive.py index bfb120b4e2..358e6461d4 100644 --- a/api/overdrive.py +++ b/api/overdrive.py @@ -1,13 +1,25 @@ +from __future__ import annotations + +import argparse +import csv import datetime import json +import logging import re import time import urllib.parse -from typing import Any, Dict, Optional, Tuple, Union +from threading import RLock +from typing import Any, Dict, List, Set, Tuple, Union, cast +from urllib.parse import quote, urlsplit, urlunsplit import dateutil import flask +import isbnlib from flask_babel import lazy_gettext as _ +from requests import Response +from requests.structures import CaseInsensitiveDict +from sqlalchemy.exc import NoResultFound +from sqlalchemy.orm import Query, Session from sqlalchemy.orm.exc import StaleDataError from api.circulation import ( @@ -19,37 +31,79 @@ LoanInfo, ) from api.circulation_exceptions import * +from api.circulation_exceptions import CannotFulfill from api.selftest import HasCollectionSelfTests, SelfTestResult from core.analytics import Analytics +from core.config import CannotLoadConfiguration, Configuration +from core.coverage import BibliographicCoverageProvider +from core.importers import BaseImporterSettings from core.integration.base import HasChildIntegrationConfiguration -from core.integration.settings import BaseSettings, ConfigurationFormItem, FormField -from core.metadata_layer import ReplacementPolicy, TimestampData +from core.integration.settings import ( + BaseSettings, + ConfigurationFormItem, + ConfigurationFormItemType, + FormField, +) +from core.metadata_layer import ( + CirculationData, + ContributorData, + FormatData, + IdentifierData, + LinkData, + MeasurementData, + Metadata, + ReplacementPolicy, + SubjectData, + TimestampData, +) from core.model import ( + Classification, Collection, + Contributor, Credential, DataSource, DeliveryMechanism, Edition, ExternalIntegration, + Hyperlink, Identifier, LicensePool, + Measurement, MediaTypes, Patron, Representation, + Subject, + get_one_or_create, ) from core.monitor import CollectionMonitor, IdentifierSweepMonitor, TimelineMonitor -from core.overdrive import ( - OverdriveBibliographicCoverageProvider, - OverdriveCoreAPI, - OverdriveRepresentationExtractor, - OverdriveSettings, -) -from core.scripts import Script -from core.util.datetime_helpers import strptime_utc -from core.util.http import HTTP +from core.scripts import InputScript, Script +from core.util.datetime_helpers import strptime_utc, utc_now +from core.util.http import HTTP, BadResponseException +from core.util.string_helpers import base64 + + +class OverdriveConstants: + OVERDRIVE_CLIENT_KEY = "overdrive_client_key" + OVERDRIVE_CLIENT_SECRET = "overdrive_client_secret" + OVERDRIVE_SERVER_NICKNAME = "overdrive_server_nickname" + OVERDRIVE_WEBSITE_ID = "overdrive_website_id" + + # Note that the library ID is not included here because it is not Overdrive-specific + OVERDRIVE_CONFIGURATION_KEYS = { + OVERDRIVE_CLIENT_KEY, + OVERDRIVE_CLIENT_SECRET, + OVERDRIVE_SERVER_NICKNAME, + OVERDRIVE_WEBSITE_ID, + } + + PRODUCTION_SERVERS = "production" + TESTING_SERVERS = "testing" + # The formats we care about. + FORMATS = "ebook-epub-open,ebook-epub-adobe,ebook-pdf-adobe,ebook-pdf-open,audiobook-overdrive".split( + "," + ) -class OverdriveAPIConstants: # These are not real Overdrive formats; we use them internally so # we can distinguish between (e.g.) using "audiobook-overdrive" # to get into Overdrive Read, and using it to get a link to a @@ -66,10 +120,68 @@ class OverdriveAPIConstants: "audiobook-overdrive", ] + # When associating an Overdrive account with a library, it's + # necessary to also specify an "ILS name" obtained from + # Overdrive. Components that don't authenticate patrons (such as + # the metadata wrangler) don't need to set this value. + ILS_NAME_KEY = "ils_name" + ILS_NAME_DEFAULT = "default" + + +class OverdriveSettings(BaseImporterSettings): + """The basic Overdrive configuration""" + + external_account_id: Optional[str] = FormField( + form=ConfigurationFormItem( + label=_("Library ID"), + type=ConfigurationFormItemType.TEXT, + description="The library identifier.", + required=True, + ), + ) + overdrive_website_id: str = FormField( + form=ConfigurationFormItem( + label=_("Website ID"), + type=ConfigurationFormItemType.TEXT, + description="The web site identifier.", + required=True, + ) + ) + overdrive_client_key: str = FormField( + form=ConfigurationFormItem( + label=_("Client Key"), + type=ConfigurationFormItemType.TEXT, + description="The Overdrive client key.", + required=True, + ) + ) + overdrive_client_secret: str = FormField( + form=ConfigurationFormItem( + label=_("Client Secret"), + type=ConfigurationFormItemType.TEXT, + description="The Overdrive client secret.", + required=True, + ) + ) + + overdrive_server_nickname: str = FormField( + default=OverdriveConstants.PRODUCTION_SERVERS, + form=ConfigurationFormItem( + label=_("Server family"), + type=ConfigurationFormItemType.SELECT, + required=False, + description="Unless you hear otherwise from Overdrive, your integration should use their production servers.", + options={ + OverdriveConstants.PRODUCTION_SERVERS: ("Production"), + OverdriveConstants.TESTING_SERVERS: _("Testing"), + }, + ), + ) + class OverdriveLibrarySettings(BaseCirculationEbookLoanSettings): ils_name: str = FormField( - default=OverdriveCoreAPI.ILS_NAME_DEFAULT, + default=OverdriveConstants.ILS_NAME_DEFAULT, form=ConfigurationFormItem( label=_("ILS Name"), description=_( @@ -89,11 +201,10 @@ class OverdriveChildSettings(BaseSettings): class OverdriveAPI( - OverdriveCoreAPI, BaseCirculationAPI, HasCollectionSelfTests, HasChildIntegrationConfiguration, - OverdriveAPIConstants, + OverdriveConstants, ): NAME = ExternalIntegration.OVERDRIVE DESCRIPTION = _( @@ -130,9 +241,9 @@ class OverdriveAPI( # use other formats. LOCK_IN_FORMATS = [ x - for x in OverdriveCoreAPI.FORMATS - if x not in OverdriveAPIConstants.STREAMING_FORMATS - and x not in OverdriveAPIConstants.MANIFEST_INTERNAL_FORMATS + for x in OverdriveConstants.FORMATS + if x not in OverdriveConstants.STREAMING_FORMATS + and x not in OverdriveConstants.MANIFEST_INTERNAL_FORMATS ] # TODO: This is a terrible choice but this URL should never be @@ -146,6 +257,91 @@ class OverdriveAPI( "PatronHasExceededCheckoutLimit_ForCPC": PatronLoanLimitReached, } + # An OverDrive defined constant indicating the "main" or parent account + # associated with an OverDrive collection. + OVERDRIVE_MAIN_ACCOUNT_ID = -1 + + log = logging.getLogger("Overdrive API") + + # A lock for threaded usage. + lock = RLock() + + # Production and testing have different host names for some of the + # API endpoints. This is configurable on the collection level. + HOSTS = { + OverdriveConstants.PRODUCTION_SERVERS: dict( + host="https://api.overdrive.com", + patron_host="https://patron.api.overdrive.com", + ), + OverdriveConstants.TESTING_SERVERS: dict( + host="https://integration.api.overdrive.com", + patron_host="https://integration-patron.api.overdrive.com", + ), + } + + # Production and testing setups use the same URLs for Client + # Authentication and Patron Authentication, but we use the same + # system as for other hostnames to give a consistent look to the + # templates. + for host in list(HOSTS.values()): + host["oauth_patron_host"] = "https://oauth-patron.overdrive.com" + host["oauth_host"] = "https://oauth.overdrive.com" + + # Each of these endpoint URLs has a slot to plug in one of the + # appropriate servers. This will be filled in either by a call to + # the endpoint() method (if there are other variables in the + # template), or by the _do_get or _do_post methods (if there are + # no other variables). + TOKEN_ENDPOINT = "%(oauth_host)s/token" + PATRON_TOKEN_ENDPOINT = "%(oauth_patron_host)s/patrontoken" + + LIBRARY_ENDPOINT = "%(host)s/v1/libraries/%(library_id)s" + ADVANTAGE_LIBRARY_ENDPOINT = ( + "%(host)s/v1/libraries/%(parent_library_id)s/advantageAccounts/%(library_id)s" + ) + ALL_PRODUCTS_ENDPOINT = ( + "%(host)s/v1/collections/%(collection_token)s/products?sort=%(sort)s" + ) + METADATA_ENDPOINT = ( + "%(host)s/v1/collections/%(collection_token)s/products/%(item_id)s/metadata" + ) + EVENTS_ENDPOINT = "%(host)s/v1/collections/%(collection_token)s/products?lastUpdateTime=%(lastupdatetime)s&sort=%(sort)s&limit=%(limit)s" + AVAILABILITY_ENDPOINT = "%(host)s/v2/collections/%(collection_token)s/products/%(product_id)s/availability" + + PATRON_INFORMATION_ENDPOINT = "%(patron_host)s/v1/patrons/me" + CHECKOUTS_ENDPOINT = "%(patron_host)s/v1/patrons/me/checkouts" + CHECKOUT_ENDPOINT = "%(patron_host)s/v1/patrons/me/checkouts/%(overdrive_id)s" + FORMATS_ENDPOINT = ( + "%(patron_host)s/v1/patrons/me/checkouts/%(overdrive_id)s/formats" + ) + HOLDS_ENDPOINT = "%(patron_host)s/v1/patrons/me/holds" + HOLD_ENDPOINT = "%(patron_host)s/v1/patrons/me/holds/%(product_id)s" + ME_ENDPOINT = "%(patron_host)s/v1/patrons/me" + + MAX_CREDENTIAL_AGE = 50 * 60 + + PAGE_SIZE_LIMIT = 300 + EVENT_SOURCE = "Overdrive" + + EVENT_DELAY = datetime.timedelta(minutes=120) + + # The formats that can be read by the default Library Simplified reader. + DEFAULT_READABLE_FORMATS = { + "ebook-epub-open", + "ebook-epub-adobe", + "ebook-pdf-open", + "audiobook-overdrive", + } + + # The formats that indicate the book has been fulfilled on an + # incompatible platform and just can't be fulfilled on Simplified + # in any format. + INCOMPATIBLE_PLATFORM_FORMATS = {"ebook-kindle"} + + OVERDRIVE_READ_FORMAT = "ebook-overdrive" + + TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" + @classmethod def settings_class(cls): return OverdriveSettings @@ -166,12 +362,487 @@ def description(self): def __init__(self, _db, collection): super().__init__(_db, collection) + if collection.protocol != ExternalIntegration.OVERDRIVE: + raise ValueError( + "Collection protocol is %s, but passed into OverdriveAPI!" + % collection.protocol + ) + + _library_id = collection.external_account_id + if not _library_id: + raise ValueError( + "Collection %s must have an external account ID" % collection.id + ) + else: + self._library_id = _library_id + + self._db = _db + self._external_integration = collection.external_integration + if collection.id is None: + raise ValueError( + "Collection passed into OverdriveAPI must have an ID, but %s does not" + % collection.name + ) + self._collection_id = collection.id + + # Initialize configuration information. + self._integration_configuration_id = cast( + int, collection.integration_configuration.id + ) + self._configuration = OverdriveData() + + if collection.parent: + # This is an Overdrive Advantage account. + self.parent_library_id = collection.parent.external_account_id + + # We're going to inherit all of the Overdrive credentials + # from the parent (the main Overdrive account), except for the + # library ID, which we already set. + parent_integration = collection.parent.integration_configuration + parent_config = self.settings_class()(**parent_integration.settings_dict) + for key in OverdriveConstants.OVERDRIVE_CONFIGURATION_KEYS: + parent_value = getattr(parent_config, key, None) + setattr(self._configuration, key, parent_value) + else: + self.parent_library_id = None + + # Self settings should override parent settings where available + settings = collection.integration_configuration.settings_dict + for name, schema in self.settings_class().schema()["properties"].items(): + if name in settings or not hasattr(self._configuration, name): + setattr( + self._configuration, name, settings.get(name, schema.get("default")) + ) + + if not self._configuration.overdrive_client_key: + raise CannotLoadConfiguration("Overdrive client key is not configured") + if not self._configuration.overdrive_client_secret: + raise CannotLoadConfiguration( + "Overdrive client password/secret is not configured" + ) + if not self._configuration.overdrive_website_id: + raise CannotLoadConfiguration("Overdrive website ID is not configured") + + self._server_nickname = self._configuration.overdrive_server_nickname + + self._hosts = self._determine_hosts(server_nickname=self._server_nickname) + + # This is set by an access to .token, or by a call to + # check_creds() or refresh_creds(). + self._token = None + + # This is set by an access to .collection_token + self._collection_token = None self.overdrive_bibliographic_coverage_provider = ( OverdriveBibliographicCoverageProvider(collection, api_class=self) ) - def external_integration(self, _db): - return self.collection.external_integration + def configuration(self): + """Overdrive has a different implementation for configuration""" + return self._configuration + + def _determine_hosts(self, *, server_nickname: str) -> Dict[str, str]: + # Figure out which hostnames we'll be using when constructing + # endpoint URLs. + if server_nickname not in self.HOSTS: + server_nickname = OverdriveConstants.PRODUCTION_SERVERS + + return dict(self.HOSTS[server_nickname]) + + def external_integration(self, db: Session) -> ExternalIntegration: + return self._external_integration + + def endpoint(self, url: str, **kwargs) -> str: + """Create the URL to an Overdrive API endpoint. + + :param url: A template for the URL. + :param kwargs: Arguments to be interpolated into the template. + The server hostname will be interpolated automatically; you + don't have to pass it in. + """ + if not "%(" in url: + # Nothing to interpolate. + return url + kwargs.update(self._hosts) + return url % kwargs + + @property + def token(self): + if not self._token: + self.check_creds() + return self._token + + @property + def collection_token(self): + """Get the token representing this particular Overdrive collection. + + As a side effect, this will verify that the Overdrive + credentials are working. + """ + if not self._collection_token: + self.check_creds() + library = self.get_library() + error = library.get("errorCode") + if error: + message = library.get("message") + raise CannotLoadConfiguration( + "Overdrive credentials are valid but could not fetch library: %s" + % message + ) + self._collection_token = library["collectionToken"] + return self._collection_token + + @property + def collection(self) -> Optional[Collection]: + return Collection.by_id(self._db, id=self._collection_id) + + @property + def source(self): + return DataSource.lookup(self._db, DataSource.OVERDRIVE) + + def ils_name(self, library): + """Determine the ILS name to use for the given Library.""" + config = self.integration_configuration().for_library(library.id) + if not config: + return self.ILS_NAME_DEFAULT + return config.settings_dict.get(self.ILS_NAME_KEY, self.ILS_NAME_DEFAULT) + + @property + def advantage_library_id(self): + """The library ID for this library, as we should look for it in + certain API documents served by Overdrive. + + For ordinary collections (ie non-Advantage) with or without associated + Advantage (ie child) collections shared among libraries, this will be + equal to the OVERDRIVE_MAIN_ACCOUNT_ID. + + For Overdrive Advantage accounts, this will be the numeric + value of the Overdrive library ID. + """ + if self.parent_library_id is None: + # This is not an Overdrive Advantage collection. + # + # Instead of looking for the library ID itself in these + # documents, we should look for the constant main account id. + return self.OVERDRIVE_MAIN_ACCOUNT_ID + return int(self._library_id) + + def check_creds(self, force_refresh=False): + """If the Bearer Token has expired, update it.""" + with self.lock: + refresh_on_lookup = self.refresh_creds + if force_refresh: + refresh_on_lookup = lambda x: x + + credential = self.credential_object(refresh_on_lookup) + if force_refresh: + self.refresh_creds(credential) + self._token = credential.credential + + def credential_object(self, refresh): + """Look up the Credential object that allows us to use + the Overdrive API. + """ + return Credential.lookup( + self._db, + DataSource.OVERDRIVE, + None, + None, + refresh, + collection=self.collection, + ) + + def refresh_creds(self, credential): + """Fetch a new Bearer Token and update the given Credential object.""" + response = self.token_post( + self.TOKEN_ENDPOINT, + dict(grant_type="client_credentials"), + allowed_response_codes=[200], + ) + data = response.json() + self._update_credential(credential, data) + self._token = credential.credential + + def get( + self, url: str, extra_headers={}, exception_on_401=False + ) -> Tuple[int, CaseInsensitiveDict, bytes]: + """Make an HTTP GET request using the active Bearer Token.""" + request_headers = dict(Authorization="Bearer %s" % self.token) + request_headers.update(extra_headers) + + response: Response = self._do_get( + url, request_headers, allowed_response_codes=["2xx", "3xx", "401", "404"] + ) + status_code: int = response.status_code + headers: CaseInsensitiveDict = response.headers + content: bytes = response.content + + if status_code == 401: + if exception_on_401: + # This is our second try. Give up. + raise BadResponseException.from_response( + url, + "Something's wrong with the Overdrive OAuth Bearer Token!", + (status_code, headers, content), + ) + else: + # Refresh the token and try again. + self.check_creds(True) + return self.get(url, extra_headers, True) + else: + return status_code, headers, content + + @property + def token_authorization_header(self) -> str: + s = b"%s:%s" % (self.client_key(), self.client_secret()) + return "Basic " + base64.standard_b64encode(s).strip() + + @property + def fulfillment_authorization_header(self) -> str: + is_test_mode = ( + True + if self._server_nickname == OverdriveConstants.TESTING_SERVERS + else False + ) + try: + client_credentials = Configuration.overdrive_fulfillment_keys( + testing=is_test_mode + ) + except CannotLoadConfiguration as e: + raise CannotFulfill(*e.args) + + s = b"%s:%s" % ( + client_credentials["key"].encode(), + client_credentials["secret"].encode(), + ) + return "Basic " + base64.standard_b64encode(s).strip() + + def token_post( + self, + url: str, + payload: Dict[str, str], + is_fulfillment=False, + headers={}, + **kwargs, + ) -> Response: + """Make an HTTP POST request for purposes of getting an OAuth token.""" + headers = dict(headers) + headers["Authorization"] = ( + self.token_authorization_header + if not is_fulfillment + else self.fulfillment_authorization_header + ) + return self._do_post(url, payload, headers, **kwargs) + + @staticmethod + def _update_credential(credential, overdrive_data): + """Copy Overdrive OAuth data into a Credential object.""" + credential.credential = overdrive_data["access_token"] + expires_in = overdrive_data["expires_in"] * 0.9 + credential.expires = utc_now() + datetime.timedelta(seconds=expires_in) + + @property + def _library_endpoint(self) -> str: + """Which URL should we go to to get information about this collection? + + If this is an ordinary Overdrive account, we get information + from LIBRARY_ENDPOINT. + + If this is an Overdrive Advantage account, we get information + from LIBRARY_ADVANTAGE_ENDPOINT. + """ + args = dict(library_id=self._library_id) + if self.parent_library_id: + # This is an Overdrive advantage account. + args["parent_library_id"] = self.parent_library_id + endpoint = self.ADVANTAGE_LIBRARY_ENDPOINT + else: + endpoint = self.LIBRARY_ENDPOINT + return self.endpoint(endpoint, **args) + + def get_library(self): + """Get basic information about the collection, including + a link to the titles in the collection. + """ + url = self._library_endpoint + with self.lock: + representation, cached = Representation.get( + self._db, + url, + self.get, + exception_handler=Representation.reraise_exception, + ) + return json.loads(representation.content) + + def get_advantage_accounts(self): + """Find all the Overdrive Advantage accounts managed by this library. + + :yield: A sequence of OverdriveAdvantageAccount objects. + """ + library = self.get_library() + links = library.get("links", {}) + advantage = links.get("advantageAccounts") + if not advantage: + return [] + if advantage: + # This library has Overdrive Advantage accounts, or at + # least a link where some may be found. + advantage_url = advantage.get("href") + if not advantage_url: + return + representation, cached = Representation.get( + self._db, + advantage_url, + self.get, + exception_handler=Representation.reraise_exception, + ) + return OverdriveAdvantageAccount.from_representation(representation.content) + + def all_ids(self): + """Get IDs for every book in the system, with the most recently added + ones at the front. + """ + next_link = self._all_products_link + while next_link: + page_inventory, next_link = self._get_book_list_page(next_link, "next") + + yield from page_inventory + + @property + def _all_products_link(self) -> str: + url = self.endpoint( + self.ALL_PRODUCTS_ENDPOINT, + collection_token=self.collection_token, + sort="dateAdded:desc", + ) + return self.make_link_safe(url) + + def _get_book_list_page(self, link, rel_to_follow="next", extractor_class=None): + """Process a page of inventory whose circulation we need to check. + + Returns a 2-tuple: (availability_info, next_link). + `availability_info` is a list of dictionaries, each containing + basic availability and bibliographic information about + one book. + `next_link` is a link to the next page of results. + """ + extractor_class = extractor_class or OverdriveRepresentationExtractor + # We don't cache this because it changes constantly. + status_code, headers, content = self.get(link, {}) + if isinstance(content, (bytes, str)): + content = json.loads(content) + + # Find the link to the next page of results, if any. + next_link = extractor_class.link(content, rel_to_follow) + + # Prepare to get availability information for all the books on + # this page. + availability_queue = extractor_class.availability_link_list(content) + return availability_queue, next_link + + def recently_changed_ids(self, start, cutoff): + """Get IDs of books whose status has changed between the start time + and now. + """ + # `cutoff` is not supported by Overdrive, so we ignore it. All + # we can do is get events between the start time and now. + + last_update_time = start - self.EVENT_DELAY + self.log.info("Asking for circulation changes since %s", last_update_time) + last_update = last_update_time.strftime(self.TIME_FORMAT) + + next_link = self.endpoint( + self.EVENTS_ENDPOINT, + lastupdatetime=last_update, + sort="popularity:desc", + limit=self.PAGE_SIZE_LIMIT, + collection_token=self.collection_token, + ) + next_link = self.make_link_safe(next_link) + while next_link: + page_inventory, next_link = self._get_book_list_page(next_link) + # We won't be sending out any events for these books yet, + # because we don't know if anything changed, but we will + # be putting them on the list of inventory items to + # refresh. At that point we will send out events. + yield from page_inventory + + def metadata_lookup(self, identifier): + """Look up metadata for an Overdrive identifier.""" + url = self.endpoint( + self.METADATA_ENDPOINT, + collection_token=self.collection_token, + item_id=identifier.identifier, + ) + status_code, headers, content = self.get(url, {}) + if isinstance(content, (bytes, str)): + content = json.loads(content) + return content + + def metadata_lookup_obj(self, identifier): + url = self.endpoint( + self.METADATA_ENDPOINT, + collection_token=self.collection_token, + item_id=identifier, + ) + status_code, headers, content = self.get(url, {}) + if isinstance(content, (bytes, str)): + content = json.loads(content) + return OverdriveRepresentationExtractor.book_info_to_metadata(content) + + @classmethod + def make_link_safe(cls, url: str) -> str: + """Turn a server-provided link into a link the server will accept! + + The {} part is completely obnoxious and I have complained about it to + Overdrive. + + The availability part is to make sure we always use v2 of the + availability API, even if Overdrive sent us a link to v1. + """ + parts = list(urlsplit(url)) + parts[2] = quote(parts[2]) + endings = ("/availability", "/availability/") + if parts[2].startswith("/v1/collections/") and any( + parts[2].endswith(x) for x in endings + ): + parts[2] = parts[2].replace("/v1/collections/", "/v2/collections/", 1) + query_string = parts[3] + query_string = query_string.replace("+", "%2B") + query_string = query_string.replace(":", "%3A") + query_string = query_string.replace("{", "%7B") + query_string = query_string.replace("}", "%7D") + parts[3] = query_string + return urlunsplit(tuple(parts)) + + def _do_get(self, url: str, headers, **kwargs) -> Response: + """This method is overridden in MockOverdriveAPI.""" + url = self.endpoint(url) + kwargs["max_retry_count"] = int(self._configuration.max_retry_count) + kwargs["timeout"] = 120 + return HTTP.get_with_timeout(url, headers=headers, **kwargs) + + def _do_post(self, url: str, payload, headers, **kwargs) -> Response: + """This method is overridden in MockOverdriveAPI.""" + url = self.endpoint(url) + kwargs["max_retry_count"] = int(self._configuration.max_retry_count) + kwargs["timeout"] = 120 + return HTTP.post_with_timeout(url, payload, headers=headers, **kwargs) + + def website_id(self) -> bytes: + return self._configuration.overdrive_website_id.encode("utf-8") + + def client_key(self) -> bytes: + return self._configuration.overdrive_client_key.encode("utf-8") + + def client_secret(self) -> bytes: + return self._configuration.overdrive_client_secret.encode("utf-8") + + def library_id(self) -> str: + return self._library_id + + def hosts(self) -> Dict[str, str]: + return dict(self._hosts) def _run_self_tests(self, _db): result = self.run_test( @@ -616,7 +1287,7 @@ def fulfill(self, patron, pin, licensepool, internal_format, **kwargs): def get_fulfillment_link( self, patron: Patron, pin: Optional[str], overdrive_id: str, format_type: str - ) -> Union["OverdriveManifestFulfillmentInfo", Tuple[str, str]]: + ) -> Union[OverdriveManifestFulfillmentInfo, Tuple[str, str]]: """Get the link to the ACSM or manifest for an existing loan.""" try: loan = self.get_loan(patron, pin, overdrive_id) @@ -1469,6 +2140,894 @@ def process_item(self, identifier): break +class OverdriveData: + overdrive_client_key: str + overdrive_client_secret: str + overdrive_website_id: str + overdrive_server_nickname: str = OverdriveConstants.PRODUCTION_SERVERS + max_retry_count: int = 0 + + +class OverdriveRepresentationExtractor: + """Extract useful information from Overdrive's JSON representations.""" + + log = logging.getLogger("Overdrive representation extractor") + + def __init__(self, api): + """Constructor. + + :param api: An OverdriveAPI object. This will be used when deciding + which portions of a JSON representation are relevant to the active + Overdrive collection. + """ + self.library_id = api.advantage_library_id + + @classmethod + def availability_link_list(cls, book_list): + """:return: A list of dictionaries with keys `id`, `title`, `availability_link`.""" + l = [] + if not "products" in book_list: + return [] + + products = book_list["products"] + for product in products: + if not "id" in product: + cls.log.warning("No ID found in %r", product) + continue + book_id = product["id"] + data = dict( + id=book_id, + title=product.get("title"), + author_name=None, + date_added=product.get("dateAdded"), + ) + if "primaryCreator" in product: + creator = product["primaryCreator"] + if creator.get("role") == "Author": + data["author_name"] = creator.get("name") + links = product.get("links", []) + if "availability" in links: + link = links["availability"]["href"] + data["availability_link"] = OverdriveAPI.make_link_safe(link) + else: + logging.getLogger("Overdrive API").warning( + "No availability link for %s", book_id + ) + l.append(data) + return l + + @classmethod + def link(self, page, rel): + if "links" in page and rel in page["links"]: + raw_link = page["links"][rel]["href"] + link = OverdriveAPI.make_link_safe(raw_link) + else: + link = None + return link + + format_data_for_overdrive_format = { + "ebook-pdf-adobe": (Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM), + "ebook-pdf-open": (Representation.PDF_MEDIA_TYPE, DeliveryMechanism.NO_DRM), + "ebook-epub-adobe": ( + Representation.EPUB_MEDIA_TYPE, + DeliveryMechanism.ADOBE_DRM, + ), + "ebook-epub-open": (Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM), + "audiobook-mp3": ("application/x-od-media", DeliveryMechanism.OVERDRIVE_DRM), + "music-mp3": ("application/x-od-media", DeliveryMechanism.OVERDRIVE_DRM), + "ebook-overdrive": [ + ( + MediaTypes.OVERDRIVE_EBOOK_MANIFEST_MEDIA_TYPE, + DeliveryMechanism.LIBBY_DRM, + ), + ( + DeliveryMechanism.STREAMING_TEXT_CONTENT_TYPE, + DeliveryMechanism.STREAMING_DRM, + ), + ], + "audiobook-overdrive": [ + ( + MediaTypes.OVERDRIVE_AUDIOBOOK_MANIFEST_MEDIA_TYPE, + DeliveryMechanism.LIBBY_DRM, + ), + ( + DeliveryMechanism.STREAMING_AUDIO_CONTENT_TYPE, + DeliveryMechanism.STREAMING_DRM, + ), + ], + "video-streaming": ( + DeliveryMechanism.STREAMING_VIDEO_CONTENT_TYPE, + DeliveryMechanism.STREAMING_DRM, + ), + "ebook-kindle": ( + DeliveryMechanism.KINDLE_CONTENT_TYPE, + DeliveryMechanism.KINDLE_DRM, + ), + "periodicals-nook": ( + DeliveryMechanism.NOOK_CONTENT_TYPE, + DeliveryMechanism.NOOK_DRM, + ), + } + + # A mapping of the overdrive format name to end sample content type + # Overdrive samples are not DRM protected so the links should be + # stored as the end sample content type + sample_format_to_content_type = { + "ebook-overdrive": "text/html", + "audiobook-wma": "audio/x-ms-wma", + "audiobook-mp3": "audio/mpeg", + "audiobook-overdrive": "text/html", + "ebook-epub-adobe": "application/epub+zip", + "magazine-overdrive": "text/html", + } + + @classmethod + def internal_formats(cls, overdrive_format): + """Yield all internal formats for the given Overdrive format. + + Some Overdrive formats become multiple internal formats. + + :yield: A sequence of (content type, DRM system) 2-tuples + """ + result = cls.format_data_for_overdrive_format.get(overdrive_format) + if not result: + return + if isinstance(result, list): + yield from result + else: + yield result + + ignorable_overdrive_formats: Set[str] = set() + + overdrive_role_to_simplified_role = { + "actor": Contributor.ACTOR_ROLE, + "artist": Contributor.ARTIST_ROLE, + "book producer": Contributor.PRODUCER_ROLE, + "associated name": Contributor.ASSOCIATED_ROLE, + "author": Contributor.AUTHOR_ROLE, + "author of introduction": Contributor.INTRODUCTION_ROLE, + "author of foreword": Contributor.FOREWORD_ROLE, + "author of afterword": Contributor.AFTERWORD_ROLE, + "contributor": Contributor.CONTRIBUTOR_ROLE, + "colophon": Contributor.COLOPHON_ROLE, + "adapter": Contributor.ADAPTER_ROLE, + "etc.": Contributor.UNKNOWN_ROLE, + "cast member": Contributor.ACTOR_ROLE, + "collaborator": Contributor.COLLABORATOR_ROLE, + "compiler": Contributor.COMPILER_ROLE, + "composer": Contributor.COMPOSER_ROLE, + "copyright holder": Contributor.COPYRIGHT_HOLDER_ROLE, + "director": Contributor.DIRECTOR_ROLE, + "editor": Contributor.EDITOR_ROLE, + "engineer": Contributor.ENGINEER_ROLE, + "executive producer": Contributor.EXECUTIVE_PRODUCER_ROLE, + "illustrator": Contributor.ILLUSTRATOR_ROLE, + "musician": Contributor.MUSICIAN_ROLE, + "narrator": Contributor.NARRATOR_ROLE, + "other": Contributor.UNKNOWN_ROLE, + "performer": Contributor.PERFORMER_ROLE, + "producer": Contributor.PRODUCER_ROLE, + "translator": Contributor.TRANSLATOR_ROLE, + "photographer": Contributor.PHOTOGRAPHER_ROLE, + "lyricist": Contributor.LYRICIST_ROLE, + "transcriber": Contributor.TRANSCRIBER_ROLE, + "designer": Contributor.DESIGNER_ROLE, + } + + overdrive_medium_to_simplified_medium = { + "eBook": Edition.BOOK_MEDIUM, + "Video": Edition.VIDEO_MEDIUM, + "Audiobook": Edition.AUDIO_MEDIUM, + "Music": Edition.MUSIC_MEDIUM, + "Periodicals": Edition.PERIODICAL_MEDIUM, + } + + DATE_FORMAT = "%Y-%m-%d" + + @classmethod + def parse_roles(cls, id, rolestring): + rolestring = rolestring.lower() + roles = [x.strip() for x in rolestring.split(",")] + if " and " in roles[-1]: + roles = roles[:-1] + [x.strip() for x in roles[-1].split(" and ")] + processed = [] + for x in roles: + if x not in cls.overdrive_role_to_simplified_role: + cls.log.error("Could not process role %s for %s", x, id) + else: + processed.append(cls.overdrive_role_to_simplified_role[x]) + return processed + + def book_info_to_circulation(self, book): + """Note: The json data passed into this method is from a different file/stream + from the json data that goes into the book_info_to_metadata() method. + """ + # In Overdrive, 'reserved' books show up as books on + # hold. There is no separate notion of reserved books. + licenses_reserved = 0 + + licenses_owned = None + licenses_available = None + patrons_in_hold_queue = None + + # TODO: The only reason this works for a NotFound error is the + # circulation code sticks the known book ID into `book` ahead + # of time. That's a code smell indicating that this system + # needs to be refactored. + if "reserveId" in book and not "id" in book: + book["id"] = book["reserveId"] + if not "id" in book: + return None + overdrive_id = book["id"] + primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID, overdrive_id) + # TODO: We might be able to use this information to avoid the + # need for explicit configuration of Advantage collections, or + # at least to keep Advantage collections more up-to-date than + # they would be otherwise, as a side effect of updating + # regular Overdrive collections. + + # TODO: this would be the place to handle simultaneous use + # titles -- these can be detected with + # availabilityType="AlwaysAvailable" and have their + # .licenses_owned set to LicensePool.UNLIMITED_ACCESS. + # see http://developer.overdrive.com/apis/library-availability-new + + # TODO: Cost-per-circ titles + # (availabilityType="LimitedAvailablility") can be handled + # similarly, though those can abruptly become unavailable, so + # UNLIMITED_ACCESS is probably not appropriate. + + error_code = book.get("errorCode") + # TODO: It's not clear what other error codes there might be. + # The current behavior will respond to errors other than + # NotFound by leaving the book alone, but this might not be + # the right behavior. + if error_code == "NotFound": + licenses_owned = 0 + licenses_available = 0 + patrons_in_hold_queue = 0 + elif book.get("isOwnedByCollections") is not False: + # We own this book. + licenses_owned = 0 + licenses_available = 0 + + for account in self._get_applicable_accounts(book.get("accounts", [])): + licenses_owned += int(account.get("copiesOwned", 0)) + licenses_available += int(account.get("copiesAvailable", 0)) + + if "numberOfHolds" in book: + if patrons_in_hold_queue is None: + patrons_in_hold_queue = 0 + patrons_in_hold_queue += book["numberOfHolds"] + + return CirculationData( + data_source=DataSource.OVERDRIVE, + primary_identifier=primary_identifier, + licenses_owned=licenses_owned, + licenses_available=licenses_available, + licenses_reserved=licenses_reserved, + patrons_in_hold_queue=patrons_in_hold_queue, + ) + + def _get_applicable_accounts( + self, accounts: List[Dict[str, Any]] + ) -> List[Dict[str, Any]]: + """ + Returns those accounts from the accounts array that apply the + current overdrive collection context. + + If this is an overdrive parent collection, we want to return accounts + associated with the main OverDrive "library" and any non-main account + with sharing enabled. + + If this is a child OverDrive collection, then we return only the + account associated with that child's OverDrive Advantage "library". + Additionally, we want to exclude the account if it is "shared" since + we will be counting it with the parent collection. + """ + + if self.library_id == OverdriveAPI.OVERDRIVE_MAIN_ACCOUNT_ID: + # this is a parent collection + filtered_result = filter( + lambda account: account.get("id") + == OverdriveAPI.OVERDRIVE_MAIN_ACCOUNT_ID + or account.get("shared", False), + accounts, + ) + else: + # this is child collection + filtered_result = filter( + lambda account: account.get("id") == self.library_id + and not account.get("shared", False), + accounts, + ) + + return list(filtered_result) + + @classmethod + def image_link_to_linkdata(cls, link, rel): + if not link or not "href" in link: + return None + href = link["href"] + if "00000000-0000-0000-0000" in href: + # This is a stand-in cover for preorders. It's better not + # to have a cover at all -- we might be able to get one + # later, or from another source. + return None + href = OverdriveAPI.make_link_safe(href) + media_type = link.get("type", None) + return LinkData(rel=rel, href=href, media_type=media_type) + + @classmethod + def book_info_to_metadata( + cls, book, include_bibliographic=True, include_formats=True + ): + """Turn Overdrive's JSON representation of a book into a Metadata + object. + + Note: The json data passed into this method is from a different file/stream + from the json data that goes into the book_info_to_circulation() method. + """ + if not "id" in book: + return None + overdrive_id = book["id"] + primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID, overdrive_id) + + # If we trust classification data, we'll give it this weight. + # Otherwise we'll probably give it a fraction of this weight. + trusted_weight = Classification.TRUSTED_DISTRIBUTOR_WEIGHT + + if include_bibliographic: + title = book.get("title", None) + sort_title = book.get("sortTitle") + subtitle = book.get("subtitle", None) + series = book.get("series", None) + publisher = book.get("publisher", None) + imprint = book.get("imprint", None) + + if "publishDate" in book: + published = strptime_utc(book["publishDate"][:10], cls.DATE_FORMAT) + else: + published = None + + languages = [l["code"] for l in book.get("languages", [])] + if "eng" in languages or not languages: + language = "eng" + else: + language = sorted(languages)[0] + + contributors = [] + for creator in book.get("creators", []): + sort_name = creator["fileAs"] + display_name = creator["name"] + role = creator["role"] + roles = cls.parse_roles(overdrive_id, role) or [ + Contributor.UNKNOWN_ROLE + ] + contributor = ContributorData( + sort_name=sort_name, + display_name=display_name, + roles=roles, + biography=creator.get("bioText", None), + ) + contributors.append(contributor) + + subjects = [] + for sub in book.get("subjects", []): + subject = SubjectData( + type=Subject.OVERDRIVE, + identifier=sub["value"], + weight=trusted_weight, + ) + subjects.append(subject) + + for sub in book.get("keywords", []): + subject = SubjectData( + type=Subject.TAG, + identifier=sub["value"], + # We don't use TRUSTED_DISTRIBUTOR_WEIGHT because + # we don't know where the tags come from -- + # probably Overdrive users -- and they're + # frequently wrong. + weight=1, + ) + subjects.append(subject) + + extra = dict() + if "grade_levels" in book: + # n.b. Grade levels are measurements of reading level, not + # age appropriateness. We can use them as a measure of age + # appropriateness in a pinch, but we weight them less + # heavily than TRUSTED_DISTRIBUTOR_WEIGHT. + for i in book["grade_levels"]: + subject = SubjectData( + type=Subject.GRADE_LEVEL, + identifier=i["value"], + weight=trusted_weight / 10, + ) + subjects.append(subject) + + overdrive_medium = book.get("mediaType", None) + if ( + overdrive_medium + and overdrive_medium not in cls.overdrive_medium_to_simplified_medium + ): + cls.log.error( + "Could not process medium %s for %s", overdrive_medium, overdrive_id + ) + + medium = cls.overdrive_medium_to_simplified_medium.get( + overdrive_medium, Edition.BOOK_MEDIUM + ) + + measurements = [] + if "awards" in book: + extra["awards"] = book.get("awards", []) + num_awards = len(extra["awards"]) + measurements.append( + MeasurementData(Measurement.AWARDS, str(num_awards)) + ) + + for name, subject_type in ( + ("ATOS", Subject.ATOS_SCORE), + ("lexileScore", Subject.LEXILE_SCORE), + ("interestLevel", Subject.INTEREST_LEVEL), + ): + if not name in book: + continue + identifier = str(book[name]) + subjects.append( + SubjectData( + type=subject_type, identifier=identifier, weight=trusted_weight + ) + ) + + for grade_level_info in book.get("gradeLevels", []): + grade_level = grade_level_info.get("value") + subjects.append( + SubjectData( + type=Subject.GRADE_LEVEL, + identifier=grade_level, + weight=trusted_weight, + ) + ) + + identifiers = [] + links = [] + sample_hrefs = set() + for format in book.get("formats", []): + for new_id in format.get("identifiers", []): + t = new_id["type"] + v = new_id["value"] + orig_v = v + type_key = None + if t == "ASIN": + type_key = Identifier.ASIN + elif t == "ISBN": + type_key = Identifier.ISBN + if len(v) == 10: + v = isbnlib.to_isbn13(v) + if v is None or not isbnlib.is_isbn13(v): + # Overdrive sometimes uses invalid values + # like "n/a" as placeholders. Ignore such + # values to avoid a situation where hundreds of + # books appear to have the same ISBN. ISBNs + # which fail check digit checks or are invalid + # also can occur. Log them for review. + cls.log.info("Bad ISBN value provided: %s", orig_v) + continue + elif t == "DOI": + type_key = Identifier.DOI + elif t == "UPC": + type_key = Identifier.UPC + elif t == "PublisherCatalogNumber": + continue + if type_key and v: + identifiers.append(IdentifierData(type_key, v, 1)) + + # Samples become links. + if "samples" in format: + for sample_info in format["samples"]: + href = sample_info["url"] + # Have we already parsed this sample? Overdrive repeats samples per format + if href in sample_hrefs: + continue + + # Every sample has its own format type + overdrive_format_name = sample_info.get("formatType") + if not overdrive_format_name: + # Malformed sample + continue + content_type = cls.sample_format_to_content_type.get( + overdrive_format_name + ) + if not content_type: + # Unusable by us. + cls.log.warning( + f"Did not find a sample format mapping for '{overdrive_format_name}': {href}" + ) + continue + + if Representation.is_media_type(content_type): + links.append( + LinkData( + rel=Hyperlink.SAMPLE, + href=href, + media_type=content_type, + ) + ) + sample_hrefs.add(href) + + # A cover and its thumbnail become a single LinkData. + if "images" in book: + images = book["images"] + image_data = cls.image_link_to_linkdata( + images.get("cover"), Hyperlink.IMAGE + ) + for name in ["cover300Wide", "cover150Wide", "thumbnail"]: + # Try to get a thumbnail that's as close as possible + # to the size we use. + image = images.get(name) + thumbnail_data = cls.image_link_to_linkdata( + image, Hyperlink.THUMBNAIL_IMAGE + ) + if not image_data: + image_data = cls.image_link_to_linkdata(image, Hyperlink.IMAGE) + if thumbnail_data: + break + + if image_data: + if thumbnail_data: + image_data.thumbnail = thumbnail_data + links.append(image_data) + + # Descriptions become links. + short = book.get("shortDescription") + full = book.get("fullDescription") + if full: + links.append( + LinkData( + rel=Hyperlink.DESCRIPTION, + content=full, + media_type="text/html", + ) + ) + + if short and (not full or not full.startswith(short)): + links.append( + LinkData( + rel=Hyperlink.SHORT_DESCRIPTION, + content=short, + media_type="text/html", + ) + ) + + # Add measurements: rating and popularity + if book.get("starRating") is not None and book["starRating"] > 0: + measurements.append( + MeasurementData( + quantity_measured=Measurement.RATING, value=book["starRating"] + ) + ) + + if book.get("popularity"): + measurements.append( + MeasurementData( + quantity_measured=Measurement.POPULARITY, + value=book["popularity"], + ) + ) + + metadata = Metadata( + data_source=DataSource.OVERDRIVE, + title=title, + subtitle=subtitle, + sort_title=sort_title, + language=language, + medium=medium, + series=series, + publisher=publisher, + imprint=imprint, + published=published, + primary_identifier=primary_identifier, + identifiers=identifiers, + subjects=subjects, + contributors=contributors, + measurements=measurements, + links=links, + ) + else: + metadata = Metadata( + data_source=DataSource.OVERDRIVE, + primary_identifier=primary_identifier, + ) + + if include_formats: + formats = [] + for format in book.get("formats", []): + format_id = format["id"] + internal_formats = list(cls.internal_formats(format_id)) + if internal_formats: + for content_type, drm_scheme in internal_formats: + formats.append(FormatData(content_type, drm_scheme)) + elif format_id not in cls.ignorable_overdrive_formats: + cls.log.error( + "Could not process Overdrive format %s for %s", + format_id, + overdrive_id, + ) + + # Also make a CirculationData so we can write the formats, + circulationdata = CirculationData( + data_source=DataSource.OVERDRIVE, + primary_identifier=primary_identifier, + formats=formats, + ) + + metadata.circulation = circulationdata + + return metadata + + +class OverdriveAdvantageAccount: + """Holder and parser for data associated with Overdrive Advantage.""" + + def __init__(self, parent_library_id: str, library_id: str, name: str, token: str): + """Constructor. + + :param parent_library_id: The library ID of the parent Overdrive + account. + :param library_id: The library ID of the Overdrive Advantage account. + :param name: The name of the library whose Advantage account this is. + :param token: The collection token for this Advantage account + """ + self.parent_library_id = parent_library_id + self.library_id = library_id + self.name = name + self.token = token + + @classmethod + def from_representation(cls, content): + """Turn the representation of an advantageAccounts link into a list of + OverdriveAdvantageAccount objects. + + :param content: The data obtained by following an advantageAccounts + link. + :yield: A sequence of OverdriveAdvantageAccount objects. + """ + data = json.loads(content) + parent_id = str(data.get("id")) + accounts = data.get("advantageAccounts", {}) + for account in accounts: + name = account["name"] + products_link = account["links"]["products"]["href"] + library_id = str(account.get("id")) + name = account.get("name") + token = account.get("collectionToken") + yield cls( + parent_library_id=parent_id, + library_id=library_id, + name=name, + token=token, + ) + + def to_collection(self, _db): + """Find or create a Collection object for this Overdrive Advantage + account. + + :return: a 2-tuple of Collections (primary Overdrive + collection, Overdrive Advantage collection) + """ + # First find the parent Collection. + try: + parent = ( + Collection.by_protocol(_db, ExternalIntegration.OVERDRIVE) + .filter(Collection.external_account_id == self.parent_library_id) + .one() + ) + except NoResultFound as e: + # Without the parent's credentials we can't access the child. + raise ValueError( + "Cannot create a Collection whose parent does not already exist." + ) + name = parent.name + " / " + self.name + child, is_new = get_one_or_create( + _db, + Collection, + parent_id=parent.id, + external_account_id=self.library_id, + create_method_kwargs=dict(name=name), + ) + if is_new: + # Make sure the child has its protocol set appropriately. + integration = child.create_external_integration( + ExternalIntegration.OVERDRIVE + ) + configuration = child.create_integration_configuration( + ExternalIntegration.OVERDRIVE + ) + + # Set or update the name of the collection to reflect the name of + # the library, just in case that name has changed. + child.name = name + return parent, child + + +class OverdriveBibliographicCoverageProvider(BibliographicCoverageProvider): + """Fill in bibliographic metadata for Overdrive records. + + This will occasionally fill in some availability information for a + single Collection, but we rely on Monitors to keep availability + information up to date for all Collections. + """ + + SERVICE_NAME = "Overdrive Bibliographic Coverage Provider" + DATA_SOURCE_NAME = DataSource.OVERDRIVE + PROTOCOL = ExternalIntegration.OVERDRIVE + INPUT_IDENTIFIER_TYPES = Identifier.OVERDRIVE_ID + + def __init__(self, collection, api_class=OverdriveAPI, **kwargs): + """Constructor. + + :param collection: Provide bibliographic coverage to all + Overdrive books in the given Collection. + :param api_class: Instantiate this class with the given Collection, + rather than instantiating OverdriveAPI. + """ + super().__init__(collection, **kwargs) + if isinstance(api_class, OverdriveAPI): + # Use a previously instantiated OverdriveAPI instance + # rather than creating a new one. + self.api = api_class + else: + # A web application should not use this option because it + # will put a non-scoped session in the mix. + _db = Session.object_session(collection) + self.api = api_class(_db, collection) + + def process_item(self, identifier): + info = self.api.metadata_lookup(identifier) + error = None + if info.get("errorCode") == "NotFound": + error = "ID not recognized by Overdrive: %s" % identifier.identifier + elif info.get("errorCode") == "InvalidGuid": + error = "Invalid Overdrive ID: %s" % identifier.identifier + + if error: + return self.failure(identifier, error, transient=False) + + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + + if not metadata: + e = "Could not extract metadata from Overdrive data: %r" % info + return self.failure(identifier, e) + + self.metadata_pre_hook(metadata) + return self.set_metadata(identifier, metadata) + + def metadata_pre_hook(self, metadata): + """A hook method that allows subclasses to modify a Metadata + object derived from Overdrive before it's applied. + """ + return metadata + + +class GenerateOverdriveAdvantageAccountList(InputScript): + """Generates a CSV containing the following fields: + circulation manager + collection + client_key + external_account_id + library_token + advantage_name + advantage_id + advantage_token + already_configured + """ + + def __init__(self, _db=None, *args, **kwargs): + super().__init__(_db, *args, **kwargs) + self._data: List[List[str]] = list() + + def _create_overdrive_api(self, collection: Collection): + return OverdriveAPI(_db=self._db, collection=collection) + + def do_run(self, *args, **kwargs): + parsed = GenerateOverdriveAdvantageAccountList.parse_command_line( + _db=self._db, *args, **kwargs + ) + query: Query = Collection.by_protocol( + self._db, protocol=ExternalIntegration.OVERDRIVE + ) + for c in query.filter(Collection.parent_id == None): + collection: Collection = c + api = self._create_overdrive_api(collection=collection) + client_key = api.client_key().decode() + client_secret = api.client_secret().decode() + + try: + library_token = api.collection_token + advantage_accounts = api.get_advantage_accounts() + + for aa in advantage_accounts: + existing_child_collections = query.filter( + Collection.parent_id == collection.id + ) + already_configured_aa_libraries = [ + e.external_account_id for e in existing_child_collections + ] + self._data.append( + [ + collection.name, + collection.external_account_id, + client_key, + client_secret, + library_token, + aa.name, + aa.library_id, + aa.token, + aa.library_id in already_configured_aa_libraries, + ] + ) + except Exception as e: + logging.error( + f"Could not connect to collection {c.name}: reason: {str(e)}." + ) + + file_path = parsed.output_file_path[0] + circ_manager_name = parsed.circulation_manager_name[0] + self.write_csv(output_file_path=file_path, circ_manager_name=circ_manager_name) + + def write_csv(self, output_file_path: str, circ_manager_name: str): + with open(output_file_path, "w", newline="") as csvfile: + writer = csv.writer(csvfile) + writer.writerow( + [ + "cm", + "collection", + "overdrive_library_id", + "client_key", + "client_secret", + "library_token", + "advantage_name", + "advantage_id", + "advantage_token", + "already_configured", + ] + ) + for i in self._data: + i.insert(0, circ_manager_name) + writer.writerow(i) + + @classmethod + def arg_parser(cls): + parser = argparse.ArgumentParser() + parser.add_argument( + "--output-file-path", + help="The path of an output file", + metavar="o", + nargs=1, + ) + + parser.add_argument( + "--circulation-manager-name", + help="The name of the circulation-manager", + metavar="c", + nargs=1, + required=True, + ) + + parser.add_argument( + "--file-format", + help="The file format of the output file", + metavar="f", + nargs=1, + default="csv", + ) + + return parser + + class OverdriveAdvantageAccountListScript(Script): def run(self): """Explain every Overdrive collection and, for each one, all of its diff --git a/bin/informational/overdrive-advantage-accounts b/bin/informational/overdrive-advantage-accounts index 269167f4a6..dff66cf3b6 100755 --- a/bin/informational/overdrive-advantage-accounts +++ b/bin/informational/overdrive-advantage-accounts @@ -6,6 +6,6 @@ bin_dir = os.path.split(__file__)[0] package_dir = os.path.join(bin_dir, "..", "..") sys.path.append(os.path.abspath(package_dir)) -from core.scripts import GenerateOverdriveAdvantageAccountList +from api.overdrive import GenerateOverdriveAdvantageAccountList GenerateOverdriveAdvantageAccountList().run() diff --git a/bin/informational/overdrive-advantage-list b/bin/informational/overdrive-advantage-list index f4bc9b92c6..4f7964c099 100755 --- a/bin/informational/overdrive-advantage-list +++ b/bin/informational/overdrive-advantage-list @@ -6,6 +6,6 @@ bin_dir = os.path.split(__file__)[0] package_dir = os.path.join(bin_dir, "..", "..") sys.path.append(os.path.abspath(package_dir)) -from api.overdrive import OverdriveAdvantageAccountListScript # noqa: E402 +from api.overdrive import OverdriveAdvantageAccountListScript OverdriveAdvantageAccountListScript().run() diff --git a/bin/repair/overdrive_bibliographic_coverage b/bin/repair/overdrive_bibliographic_coverage index 9c4218134e..9ac3242ae6 100755 --- a/bin/repair/overdrive_bibliographic_coverage +++ b/bin/repair/overdrive_bibliographic_coverage @@ -6,7 +6,7 @@ import sys bin_dir = os.path.split(__file__)[0] package_dir = os.path.join(bin_dir, "..", "..") sys.path.append(os.path.abspath(package_dir)) -from core.overdrive import OverdriveBibliographicCoverageProvider +from api.overdrive import OverdriveBibliographicCoverageProvider from core.scripts import RunCoverageProviderScript RunCoverageProviderScript(OverdriveBibliographicCoverageProvider).run() diff --git a/core/overdrive.py b/core/overdrive.py deleted file mode 100644 index 5513bcb11e..0000000000 --- a/core/overdrive.py +++ /dev/null @@ -1,1502 +0,0 @@ -from __future__ import annotations - -import datetime -import json -import logging -from threading import RLock -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, cast -from urllib.parse import quote, urlsplit, urlunsplit - -import isbnlib -from flask_babel import lazy_gettext as _ -from requests.adapters import CaseInsensitiveDict, Response -from sqlalchemy.orm.exc import NoResultFound -from sqlalchemy.orm.session import Session - -from api.circulation import CirculationConfigurationMixin -from api.circulation_exceptions import CannotFulfill -from core.config import CannotLoadConfiguration, Configuration -from core.coverage import BibliographicCoverageProvider -from core.importers import BaseImporterSettings -from core.integration.settings import ( - ConfigurationFormItem, - ConfigurationFormItemType, - FormField, -) -from core.metadata_layer import ( - CirculationData, - ContributorData, - FormatData, - IdentifierData, - LinkData, - MeasurementData, - Metadata, - SubjectData, -) -from core.model import ( - Classification, - Collection, - Contributor, - Credential, - DataSource, - DeliveryMechanism, - Edition, - ExternalIntegration, - Hyperlink, - Identifier, - Measurement, - MediaTypes, - Representation, - Subject, - get_one_or_create, -) -from core.model.configuration import HasExternalIntegration -from core.util.datetime_helpers import strptime_utc, utc_now -from core.util.http import HTTP, BadResponseException -from core.util.string_helpers import base64 - -if TYPE_CHECKING: - pass - - -class OverdriveConstants: - OVERDRIVE_CLIENT_KEY = "overdrive_client_key" - OVERDRIVE_CLIENT_SECRET = "overdrive_client_secret" - OVERDRIVE_SERVER_NICKNAME = "overdrive_server_nickname" - OVERDRIVE_WEBSITE_ID = "overdrive_website_id" - - # Note that the library ID is not included here because it is not Overdrive-specific - OVERDRIVE_CONFIGURATION_KEYS = { - OVERDRIVE_CLIENT_KEY, - OVERDRIVE_CLIENT_SECRET, - OVERDRIVE_SERVER_NICKNAME, - OVERDRIVE_WEBSITE_ID, - } - - PRODUCTION_SERVERS = "production" - TESTING_SERVERS = "testing" - - -class OverdriveSettings(BaseImporterSettings): - """The basic Overdrive configuration""" - - external_account_id: Optional[str] = FormField( - form=ConfigurationFormItem( - label=_("Library ID"), - type=ConfigurationFormItemType.TEXT, - description="The library identifier.", - required=True, - ), - ) - overdrive_website_id: str = FormField( - form=ConfigurationFormItem( - label=_("Website ID"), - type=ConfigurationFormItemType.TEXT, - description="The web site identifier.", - required=True, - ) - ) - overdrive_client_key: str = FormField( - form=ConfigurationFormItem( - label=_("Client Key"), - type=ConfigurationFormItemType.TEXT, - description="The Overdrive client key.", - required=True, - ) - ) - overdrive_client_secret: str = FormField( - form=ConfigurationFormItem( - label=_("Client Secret"), - type=ConfigurationFormItemType.TEXT, - description="The Overdrive client secret.", - required=True, - ) - ) - - overdrive_server_nickname: str = FormField( - default=OverdriveConstants.PRODUCTION_SERVERS, - form=ConfigurationFormItem( - label=_("Server family"), - type=ConfigurationFormItemType.SELECT, - required=False, - description="Unless you hear otherwise from Overdrive, your integration should use their production servers.", - options={ - OverdriveConstants.PRODUCTION_SERVERS: ("Production"), - OverdriveConstants.TESTING_SERVERS: _("Testing"), - }, - ), - ) - - -class OverdriveData: - overdrive_client_key: str - overdrive_client_secret: str - overdrive_website_id: str - overdrive_server_nickname: str = OverdriveConstants.PRODUCTION_SERVERS - max_retry_count: int = 0 - - -class OverdriveCoreAPI( - HasExternalIntegration, - CirculationConfigurationMixin, -): - # An OverDrive defined constant indicating the "main" or parent account - # associated with an OverDrive collection. - OVERDRIVE_MAIN_ACCOUNT_ID = -1 - - log = logging.getLogger("Overdrive API") - - # A lock for threaded usage. - lock = RLock() - - # Production and testing have different host names for some of the - # API endpoints. This is configurable on the collection level. - HOSTS = { - OverdriveConstants.PRODUCTION_SERVERS: dict( - host="https://api.overdrive.com", - patron_host="https://patron.api.overdrive.com", - ), - OverdriveConstants.TESTING_SERVERS: dict( - host="https://integration.api.overdrive.com", - patron_host="https://integration-patron.api.overdrive.com", - ), - } - - # Production and testing setups use the same URLs for Client - # Authentication and Patron Authentication, but we use the same - # system as for other hostnames to give a consistent look to the - # templates. - for host in list(HOSTS.values()): - host["oauth_patron_host"] = "https://oauth-patron.overdrive.com" - host["oauth_host"] = "https://oauth.overdrive.com" - - # Each of these endpoint URLs has a slot to plug in one of the - # appropriate servers. This will be filled in either by a call to - # the endpoint() method (if there are other variables in the - # template), or by the _do_get or _do_post methods (if there are - # no other variables). - TOKEN_ENDPOINT = "%(oauth_host)s/token" - PATRON_TOKEN_ENDPOINT = "%(oauth_patron_host)s/patrontoken" - - LIBRARY_ENDPOINT = "%(host)s/v1/libraries/%(library_id)s" - ADVANTAGE_LIBRARY_ENDPOINT = ( - "%(host)s/v1/libraries/%(parent_library_id)s/advantageAccounts/%(library_id)s" - ) - ALL_PRODUCTS_ENDPOINT = ( - "%(host)s/v1/collections/%(collection_token)s/products?sort=%(sort)s" - ) - METADATA_ENDPOINT = ( - "%(host)s/v1/collections/%(collection_token)s/products/%(item_id)s/metadata" - ) - EVENTS_ENDPOINT = "%(host)s/v1/collections/%(collection_token)s/products?lastUpdateTime=%(lastupdatetime)s&sort=%(sort)s&limit=%(limit)s" - AVAILABILITY_ENDPOINT = "%(host)s/v2/collections/%(collection_token)s/products/%(product_id)s/availability" - - PATRON_INFORMATION_ENDPOINT = "%(patron_host)s/v1/patrons/me" - CHECKOUTS_ENDPOINT = "%(patron_host)s/v1/patrons/me/checkouts" - CHECKOUT_ENDPOINT = "%(patron_host)s/v1/patrons/me/checkouts/%(overdrive_id)s" - FORMATS_ENDPOINT = ( - "%(patron_host)s/v1/patrons/me/checkouts/%(overdrive_id)s/formats" - ) - HOLDS_ENDPOINT = "%(patron_host)s/v1/patrons/me/holds" - HOLD_ENDPOINT = "%(patron_host)s/v1/patrons/me/holds/%(product_id)s" - ME_ENDPOINT = "%(patron_host)s/v1/patrons/me" - - MAX_CREDENTIAL_AGE = 50 * 60 - - PAGE_SIZE_LIMIT = 300 - EVENT_SOURCE = "Overdrive" - - EVENT_DELAY = datetime.timedelta(minutes=120) - - # The formats we care about. - FORMATS = "ebook-epub-open,ebook-epub-adobe,ebook-pdf-adobe,ebook-pdf-open,audiobook-overdrive".split( - "," - ) - - # The formats that can be read by the default Library Simplified reader. - DEFAULT_READABLE_FORMATS = { - "ebook-epub-open", - "ebook-epub-adobe", - "ebook-pdf-open", - "audiobook-overdrive", - } - - # The formats that indicate the book has been fulfilled on an - # incompatible platform and just can't be fulfilled on Simplified - # in any format. - INCOMPATIBLE_PLATFORM_FORMATS = {"ebook-kindle"} - - OVERDRIVE_READ_FORMAT = "ebook-overdrive" - - TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" - - # When associating an Overdrive account with a library, it's - # necessary to also specify an "ILS name" obtained from - # Overdrive. Components that don't authenticate patrons (such as - # the metadata wrangler) don't need to set this value. - ILS_NAME_KEY = "ils_name" - ILS_NAME_DEFAULT = "default" - - _external_integration: ExternalIntegration - _db: Session - _hosts: Dict[str, str] - _library_id: str - _collection_id: int - - def label(self): - return "Overdrive Core API" - - def description(self): - return "" - - @classmethod - def library_settings_class(cls): - raise NotImplementedError() - - @classmethod - def settings_class(cls): - return OverdriveSettings - - def __init__(self, _db: Session, collection: Collection): - if collection.protocol != ExternalIntegration.OVERDRIVE: - raise ValueError( - "Collection protocol is %s, but passed into OverdriveAPI!" - % collection.protocol - ) - - _library_id = collection.external_account_id - if not _library_id: - raise ValueError( - "Collection %s must have an external account ID" % collection.id - ) - else: - self._library_id = _library_id - - self._db = _db - self._external_integration = collection.external_integration - if collection.id is None: - raise ValueError( - "Collection passed into OverdriveAPI must have an ID, but %s does not" - % collection.name - ) - self._collection_id = collection.id - - # Initialize configuration information. - self._integration_configuration_id = cast( - int, collection.integration_configuration.id - ) - self._configuration = OverdriveData() - - if collection.parent: - # This is an Overdrive Advantage account. - self.parent_library_id = collection.parent.external_account_id - - # We're going to inherit all of the Overdrive credentials - # from the parent (the main Overdrive account), except for the - # library ID, which we already set. - parent_integration = collection.parent.integration_configuration - parent_config = self.settings_class()(**parent_integration.settings_dict) - for key in OverdriveConstants.OVERDRIVE_CONFIGURATION_KEYS: - parent_value = getattr(parent_config, key, None) - setattr(self._configuration, key, parent_value) - else: - self.parent_library_id = None - - # Self settings should override parent settings where available - settings = collection.integration_configuration.settings_dict - for name, schema in self.settings_class().schema()["properties"].items(): - if name in settings or not hasattr(self._configuration, name): - setattr( - self._configuration, name, settings.get(name, schema.get("default")) - ) - - if not self._configuration.overdrive_client_key: - raise CannotLoadConfiguration("Overdrive client key is not configured") - if not self._configuration.overdrive_client_secret: - raise CannotLoadConfiguration( - "Overdrive client password/secret is not configured" - ) - if not self._configuration.overdrive_website_id: - raise CannotLoadConfiguration("Overdrive website ID is not configured") - - self._server_nickname = self._configuration.overdrive_server_nickname - - self._hosts = self._determine_hosts(server_nickname=self._server_nickname) - - # This is set by an access to .token, or by a call to - # check_creds() or refresh_creds(). - self._token = None - - # This is set by an access to .collection_token - self._collection_token = None - - def configuration(self): - """Overdrive has a different implementation for configuration""" - return self._configuration - - def _determine_hosts(self, *, server_nickname: str) -> Dict[str, str]: - # Figure out which hostnames we'll be using when constructing - # endpoint URLs. - if server_nickname not in self.HOSTS: - server_nickname = OverdriveConstants.PRODUCTION_SERVERS - - return dict(self.HOSTS[server_nickname]) - - def external_integration(self, db: Session) -> ExternalIntegration: - return self._external_integration - - def endpoint(self, url: str, **kwargs) -> str: - """Create the URL to an Overdrive API endpoint. - - :param url: A template for the URL. - :param kwargs: Arguments to be interpolated into the template. - The server hostname will be interpolated automatically; you - don't have to pass it in. - """ - if not "%(" in url: - # Nothing to interpolate. - return url - kwargs.update(self._hosts) - return url % kwargs - - @property - def token(self): - if not self._token: - self.check_creds() - return self._token - - @property - def collection_token(self): - """Get the token representing this particular Overdrive collection. - - As a side effect, this will verify that the Overdrive - credentials are working. - """ - if not self._collection_token: - self.check_creds() - library = self.get_library() - error = library.get("errorCode") - if error: - message = library.get("message") - raise CannotLoadConfiguration( - "Overdrive credentials are valid but could not fetch library: %s" - % message - ) - self._collection_token = library["collectionToken"] - return self._collection_token - - @property - def collection(self) -> Optional[Collection]: - return Collection.by_id(self._db, id=self._collection_id) - - @property - def source(self): - return DataSource.lookup(self._db, DataSource.OVERDRIVE) - - def ils_name(self, library): - """Determine the ILS name to use for the given Library.""" - config = self.integration_configuration().for_library(library.id) - if not config: - return self.ILS_NAME_DEFAULT - return config.settings_dict.get(self.ILS_NAME_KEY, self.ILS_NAME_DEFAULT) - - @property - def advantage_library_id(self): - """The library ID for this library, as we should look for it in - certain API documents served by Overdrive. - - For ordinary collections (ie non-Advantage) with or without associated - Advantage (ie child) collections shared among libraries, this will be - equal to the OVERDRIVE_MAIN_ACCOUNT_ID. - - For Overdrive Advantage accounts, this will be the numeric - value of the Overdrive library ID. - """ - if self.parent_library_id is None: - # This is not an Overdrive Advantage collection. - # - # Instead of looking for the library ID itself in these - # documents, we should look for the constant main account id. - return self.OVERDRIVE_MAIN_ACCOUNT_ID - return int(self._library_id) - - def check_creds(self, force_refresh=False): - """If the Bearer Token has expired, update it.""" - with self.lock: - refresh_on_lookup = self.refresh_creds - if force_refresh: - refresh_on_lookup = lambda x: x - - credential = self.credential_object(refresh_on_lookup) - if force_refresh: - self.refresh_creds(credential) - self._token = credential.credential - - def credential_object(self, refresh): - """Look up the Credential object that allows us to use - the Overdrive API. - """ - return Credential.lookup( - self._db, - DataSource.OVERDRIVE, - None, - None, - refresh, - collection=self.collection, - ) - - def refresh_creds(self, credential): - """Fetch a new Bearer Token and update the given Credential object.""" - response = self.token_post( - self.TOKEN_ENDPOINT, - dict(grant_type="client_credentials"), - allowed_response_codes=[200], - ) - data = response.json() - self._update_credential(credential, data) - self._token = credential.credential - - def get( - self, url: str, extra_headers={}, exception_on_401=False - ) -> Tuple[int, CaseInsensitiveDict, bytes]: - """Make an HTTP GET request using the active Bearer Token.""" - request_headers = dict(Authorization="Bearer %s" % self.token) - request_headers.update(extra_headers) - - response: Response = self._do_get( - url, request_headers, allowed_response_codes=["2xx", "3xx", "401", "404"] - ) - status_code: int = response.status_code - headers: CaseInsensitiveDict = response.headers - content: bytes = response.content - - if status_code == 401: - if exception_on_401: - # This is our second try. Give up. - raise BadResponseException.from_response( - url, - "Something's wrong with the Overdrive OAuth Bearer Token!", - (status_code, headers, content), - ) - else: - # Refresh the token and try again. - self.check_creds(True) - return self.get(url, extra_headers, True) - else: - return status_code, headers, content - - @property - def token_authorization_header(self) -> str: - s = b"%s:%s" % (self.client_key(), self.client_secret()) - return "Basic " + base64.standard_b64encode(s).strip() - - @property - def fulfillment_authorization_header(self) -> str: - is_test_mode = ( - True - if self._server_nickname == OverdriveConstants.TESTING_SERVERS - else False - ) - try: - client_credentials = Configuration.overdrive_fulfillment_keys( - testing=is_test_mode - ) - except CannotLoadConfiguration as e: - raise CannotFulfill(*e.args) - - s = b"%s:%s" % ( - client_credentials["key"].encode(), - client_credentials["secret"].encode(), - ) - return "Basic " + base64.standard_b64encode(s).strip() - - def token_post( - self, - url: str, - payload: Dict[str, str], - is_fulfillment=False, - headers={}, - **kwargs, - ) -> Response: - """Make an HTTP POST request for purposes of getting an OAuth token.""" - headers = dict(headers) - headers["Authorization"] = ( - self.token_authorization_header - if not is_fulfillment - else self.fulfillment_authorization_header - ) - return self._do_post(url, payload, headers, **kwargs) - - @staticmethod - def _update_credential(credential, overdrive_data): - """Copy Overdrive OAuth data into a Credential object.""" - credential.credential = overdrive_data["access_token"] - expires_in = overdrive_data["expires_in"] * 0.9 - credential.expires = utc_now() + datetime.timedelta(seconds=expires_in) - - @property - def _library_endpoint(self) -> str: - """Which URL should we go to to get information about this collection? - - If this is an ordinary Overdrive account, we get information - from LIBRARY_ENDPOINT. - - If this is an Overdrive Advantage account, we get information - from LIBRARY_ADVANTAGE_ENDPOINT. - """ - args = dict(library_id=self._library_id) - if self.parent_library_id: - # This is an Overdrive advantage account. - args["parent_library_id"] = self.parent_library_id - endpoint = self.ADVANTAGE_LIBRARY_ENDPOINT - else: - endpoint = self.LIBRARY_ENDPOINT - return self.endpoint(endpoint, **args) - - def get_library(self): - """Get basic information about the collection, including - a link to the titles in the collection. - """ - url = self._library_endpoint - with self.lock: - representation, cached = Representation.get( - self._db, - url, - self.get, - exception_handler=Representation.reraise_exception, - ) - return json.loads(representation.content) - - def get_advantage_accounts(self): - """Find all the Overdrive Advantage accounts managed by this library. - - :yield: A sequence of OverdriveAdvantageAccount objects. - """ - library = self.get_library() - links = library.get("links", {}) - advantage = links.get("advantageAccounts") - if not advantage: - return [] - if advantage: - # This library has Overdrive Advantage accounts, or at - # least a link where some may be found. - advantage_url = advantage.get("href") - if not advantage_url: - return - representation, cached = Representation.get( - self._db, - advantage_url, - self.get, - exception_handler=Representation.reraise_exception, - ) - return OverdriveAdvantageAccount.from_representation(representation.content) - - def all_ids(self): - """Get IDs for every book in the system, with the most recently added - ones at the front. - """ - next_link = self._all_products_link - while next_link: - page_inventory, next_link = self._get_book_list_page(next_link, "next") - - yield from page_inventory - - @property - def _all_products_link(self) -> str: - url = self.endpoint( - self.ALL_PRODUCTS_ENDPOINT, - collection_token=self.collection_token, - sort="dateAdded:desc", - ) - return self.make_link_safe(url) - - def _get_book_list_page(self, link, rel_to_follow="next", extractor_class=None): - """Process a page of inventory whose circulation we need to check. - - Returns a 2-tuple: (availability_info, next_link). - `availability_info` is a list of dictionaries, each containing - basic availability and bibliographic information about - one book. - `next_link` is a link to the next page of results. - """ - extractor_class = extractor_class or OverdriveRepresentationExtractor - # We don't cache this because it changes constantly. - status_code, headers, content = self.get(link, {}) - if isinstance(content, (bytes, str)): - content = json.loads(content) - - # Find the link to the next page of results, if any. - next_link = extractor_class.link(content, rel_to_follow) - - # Prepare to get availability information for all the books on - # this page. - availability_queue = extractor_class.availability_link_list(content) - return availability_queue, next_link - - def recently_changed_ids(self, start, cutoff): - """Get IDs of books whose status has changed between the start time - and now. - """ - # `cutoff` is not supported by Overdrive, so we ignore it. All - # we can do is get events between the start time and now. - - last_update_time = start - self.EVENT_DELAY - self.log.info("Asking for circulation changes since %s", last_update_time) - last_update = last_update_time.strftime(self.TIME_FORMAT) - - next_link = self.endpoint( - self.EVENTS_ENDPOINT, - lastupdatetime=last_update, - sort="popularity:desc", - limit=self.PAGE_SIZE_LIMIT, - collection_token=self.collection_token, - ) - next_link = self.make_link_safe(next_link) - while next_link: - page_inventory, next_link = self._get_book_list_page(next_link) - # We won't be sending out any events for these books yet, - # because we don't know if anything changed, but we will - # be putting them on the list of inventory items to - # refresh. At that point we will send out events. - yield from page_inventory - - def metadata_lookup(self, identifier): - """Look up metadata for an Overdrive identifier.""" - url = self.endpoint( - self.METADATA_ENDPOINT, - collection_token=self.collection_token, - item_id=identifier.identifier, - ) - status_code, headers, content = self.get(url, {}) - if isinstance(content, (bytes, str)): - content = json.loads(content) - return content - - def metadata_lookup_obj(self, identifier): - url = self.endpoint( - self.METADATA_ENDPOINT, - collection_token=self.collection_token, - item_id=identifier, - ) - status_code, headers, content = self.get(url, {}) - if isinstance(content, (bytes, str)): - content = json.loads(content) - return OverdriveRepresentationExtractor.book_info_to_metadata(content) - - @classmethod - def make_link_safe(cls, url: str) -> str: - """Turn a server-provided link into a link the server will accept! - - The {} part is completely obnoxious and I have complained about it to - Overdrive. - - The availability part is to make sure we always use v2 of the - availability API, even if Overdrive sent us a link to v1. - """ - parts = list(urlsplit(url)) - parts[2] = quote(parts[2]) - endings = ("/availability", "/availability/") - if parts[2].startswith("/v1/collections/") and any( - parts[2].endswith(x) for x in endings - ): - parts[2] = parts[2].replace("/v1/collections/", "/v2/collections/", 1) - query_string = parts[3] - query_string = query_string.replace("+", "%2B") - query_string = query_string.replace(":", "%3A") - query_string = query_string.replace("{", "%7B") - query_string = query_string.replace("}", "%7D") - parts[3] = query_string - return urlunsplit(tuple(parts)) - - def _do_get(self, url: str, headers, **kwargs) -> Response: - """This method is overridden in MockOverdriveAPI.""" - url = self.endpoint(url) - kwargs["max_retry_count"] = int(self._configuration.max_retry_count) - kwargs["timeout"] = 120 - return HTTP.get_with_timeout(url, headers=headers, **kwargs) - - def _do_post(self, url: str, payload, headers, **kwargs) -> Response: - """This method is overridden in MockOverdriveAPI.""" - url = self.endpoint(url) - kwargs["max_retry_count"] = int(self._configuration.max_retry_count) - kwargs["timeout"] = 120 - return HTTP.post_with_timeout(url, payload, headers=headers, **kwargs) - - def website_id(self) -> bytes: - return self._configuration.overdrive_website_id.encode("utf-8") - - def client_key(self) -> bytes: - return self._configuration.overdrive_client_key.encode("utf-8") - - def client_secret(self) -> bytes: - return self._configuration.overdrive_client_secret.encode("utf-8") - - def library_id(self) -> str: - return self._library_id - - def hosts(self) -> Dict[str, str]: - return dict(self._hosts) - - -class OverdriveRepresentationExtractor: - """Extract useful information from Overdrive's JSON representations.""" - - log = logging.getLogger("Overdrive representation extractor") - - def __init__(self, api): - """Constructor. - - :param api: An OverdriveAPI object. This will be used when deciding - which portions of a JSON representation are relevant to the active - Overdrive collection. - """ - self.library_id = api.advantage_library_id - - @classmethod - def availability_link_list(cls, book_list): - """:return: A list of dictionaries with keys `id`, `title`, `availability_link`.""" - l = [] - if not "products" in book_list: - return [] - - products = book_list["products"] - for product in products: - if not "id" in product: - cls.log.warning("No ID found in %r", product) - continue - book_id = product["id"] - data = dict( - id=book_id, - title=product.get("title"), - author_name=None, - date_added=product.get("dateAdded"), - ) - if "primaryCreator" in product: - creator = product["primaryCreator"] - if creator.get("role") == "Author": - data["author_name"] = creator.get("name") - links = product.get("links", []) - if "availability" in links: - link = links["availability"]["href"] - data["availability_link"] = OverdriveCoreAPI.make_link_safe(link) - else: - logging.getLogger("Overdrive API").warning( - "No availability link for %s", book_id - ) - l.append(data) - return l - - @classmethod - def link(self, page, rel): - if "links" in page and rel in page["links"]: - raw_link = page["links"][rel]["href"] - link = OverdriveCoreAPI.make_link_safe(raw_link) - else: - link = None - return link - - format_data_for_overdrive_format = { - "ebook-pdf-adobe": (Representation.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM), - "ebook-pdf-open": (Representation.PDF_MEDIA_TYPE, DeliveryMechanism.NO_DRM), - "ebook-epub-adobe": ( - Representation.EPUB_MEDIA_TYPE, - DeliveryMechanism.ADOBE_DRM, - ), - "ebook-epub-open": (Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM), - "audiobook-mp3": ("application/x-od-media", DeliveryMechanism.OVERDRIVE_DRM), - "music-mp3": ("application/x-od-media", DeliveryMechanism.OVERDRIVE_DRM), - "ebook-overdrive": [ - ( - MediaTypes.OVERDRIVE_EBOOK_MANIFEST_MEDIA_TYPE, - DeliveryMechanism.LIBBY_DRM, - ), - ( - DeliveryMechanism.STREAMING_TEXT_CONTENT_TYPE, - DeliveryMechanism.STREAMING_DRM, - ), - ], - "audiobook-overdrive": [ - ( - MediaTypes.OVERDRIVE_AUDIOBOOK_MANIFEST_MEDIA_TYPE, - DeliveryMechanism.LIBBY_DRM, - ), - ( - DeliveryMechanism.STREAMING_AUDIO_CONTENT_TYPE, - DeliveryMechanism.STREAMING_DRM, - ), - ], - "video-streaming": ( - DeliveryMechanism.STREAMING_VIDEO_CONTENT_TYPE, - DeliveryMechanism.STREAMING_DRM, - ), - "ebook-kindle": ( - DeliveryMechanism.KINDLE_CONTENT_TYPE, - DeliveryMechanism.KINDLE_DRM, - ), - "periodicals-nook": ( - DeliveryMechanism.NOOK_CONTENT_TYPE, - DeliveryMechanism.NOOK_DRM, - ), - } - - # A mapping of the overdrive format name to end sample content type - # Overdrive samples are not DRM protected so the links should be - # stored as the end sample content type - sample_format_to_content_type = { - "ebook-overdrive": "text/html", - "audiobook-wma": "audio/x-ms-wma", - "audiobook-mp3": "audio/mpeg", - "audiobook-overdrive": "text/html", - "ebook-epub-adobe": "application/epub+zip", - "magazine-overdrive": "text/html", - } - - @classmethod - def internal_formats(cls, overdrive_format): - """Yield all internal formats for the given Overdrive format. - - Some Overdrive formats become multiple internal formats. - - :yield: A sequence of (content type, DRM system) 2-tuples - """ - result = cls.format_data_for_overdrive_format.get(overdrive_format) - if not result: - return - if isinstance(result, list): - yield from result - else: - yield result - - ignorable_overdrive_formats: Set[str] = set() - - overdrive_role_to_simplified_role = { - "actor": Contributor.ACTOR_ROLE, - "artist": Contributor.ARTIST_ROLE, - "book producer": Contributor.PRODUCER_ROLE, - "associated name": Contributor.ASSOCIATED_ROLE, - "author": Contributor.AUTHOR_ROLE, - "author of introduction": Contributor.INTRODUCTION_ROLE, - "author of foreword": Contributor.FOREWORD_ROLE, - "author of afterword": Contributor.AFTERWORD_ROLE, - "contributor": Contributor.CONTRIBUTOR_ROLE, - "colophon": Contributor.COLOPHON_ROLE, - "adapter": Contributor.ADAPTER_ROLE, - "etc.": Contributor.UNKNOWN_ROLE, - "cast member": Contributor.ACTOR_ROLE, - "collaborator": Contributor.COLLABORATOR_ROLE, - "compiler": Contributor.COMPILER_ROLE, - "composer": Contributor.COMPOSER_ROLE, - "copyright holder": Contributor.COPYRIGHT_HOLDER_ROLE, - "director": Contributor.DIRECTOR_ROLE, - "editor": Contributor.EDITOR_ROLE, - "engineer": Contributor.ENGINEER_ROLE, - "executive producer": Contributor.EXECUTIVE_PRODUCER_ROLE, - "illustrator": Contributor.ILLUSTRATOR_ROLE, - "musician": Contributor.MUSICIAN_ROLE, - "narrator": Contributor.NARRATOR_ROLE, - "other": Contributor.UNKNOWN_ROLE, - "performer": Contributor.PERFORMER_ROLE, - "producer": Contributor.PRODUCER_ROLE, - "translator": Contributor.TRANSLATOR_ROLE, - "photographer": Contributor.PHOTOGRAPHER_ROLE, - "lyricist": Contributor.LYRICIST_ROLE, - "transcriber": Contributor.TRANSCRIBER_ROLE, - "designer": Contributor.DESIGNER_ROLE, - } - - overdrive_medium_to_simplified_medium = { - "eBook": Edition.BOOK_MEDIUM, - "Video": Edition.VIDEO_MEDIUM, - "Audiobook": Edition.AUDIO_MEDIUM, - "Music": Edition.MUSIC_MEDIUM, - "Periodicals": Edition.PERIODICAL_MEDIUM, - } - - DATE_FORMAT = "%Y-%m-%d" - - @classmethod - def parse_roles(cls, id, rolestring): - rolestring = rolestring.lower() - roles = [x.strip() for x in rolestring.split(",")] - if " and " in roles[-1]: - roles = roles[:-1] + [x.strip() for x in roles[-1].split(" and ")] - processed = [] - for x in roles: - if x not in cls.overdrive_role_to_simplified_role: - cls.log.error("Could not process role %s for %s", x, id) - else: - processed.append(cls.overdrive_role_to_simplified_role[x]) - return processed - - def book_info_to_circulation(self, book): - """Note: The json data passed into this method is from a different file/stream - from the json data that goes into the book_info_to_metadata() method. - """ - # In Overdrive, 'reserved' books show up as books on - # hold. There is no separate notion of reserved books. - licenses_reserved = 0 - - licenses_owned = None - licenses_available = None - patrons_in_hold_queue = None - - # TODO: The only reason this works for a NotFound error is the - # circulation code sticks the known book ID into `book` ahead - # of time. That's a code smell indicating that this system - # needs to be refactored. - if "reserveId" in book and not "id" in book: - book["id"] = book["reserveId"] - if not "id" in book: - return None - overdrive_id = book["id"] - primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID, overdrive_id) - # TODO: We might be able to use this information to avoid the - # need for explicit configuration of Advantage collections, or - # at least to keep Advantage collections more up-to-date than - # they would be otherwise, as a side effect of updating - # regular Overdrive collections. - - # TODO: this would be the place to handle simultaneous use - # titles -- these can be detected with - # availabilityType="AlwaysAvailable" and have their - # .licenses_owned set to LicensePool.UNLIMITED_ACCESS. - # see http://developer.overdrive.com/apis/library-availability-new - - # TODO: Cost-per-circ titles - # (availabilityType="LimitedAvailablility") can be handled - # similarly, though those can abruptly become unavailable, so - # UNLIMITED_ACCESS is probably not appropriate. - - error_code = book.get("errorCode") - # TODO: It's not clear what other error codes there might be. - # The current behavior will respond to errors other than - # NotFound by leaving the book alone, but this might not be - # the right behavior. - if error_code == "NotFound": - licenses_owned = 0 - licenses_available = 0 - patrons_in_hold_queue = 0 - elif book.get("isOwnedByCollections") is not False: - # We own this book. - licenses_owned = 0 - licenses_available = 0 - - for account in self._get_applicable_accounts(book.get("accounts", [])): - licenses_owned += int(account.get("copiesOwned", 0)) - licenses_available += int(account.get("copiesAvailable", 0)) - - if "numberOfHolds" in book: - if patrons_in_hold_queue is None: - patrons_in_hold_queue = 0 - patrons_in_hold_queue += book["numberOfHolds"] - - return CirculationData( - data_source=DataSource.OVERDRIVE, - primary_identifier=primary_identifier, - licenses_owned=licenses_owned, - licenses_available=licenses_available, - licenses_reserved=licenses_reserved, - patrons_in_hold_queue=patrons_in_hold_queue, - ) - - def _get_applicable_accounts( - self, accounts: List[Dict[str, Any]] - ) -> List[Dict[str, Any]]: - """ - Returns those accounts from the accounts array that apply the - current overdrive collection context. - - If this is an overdrive parent collection, we want to return accounts - associated with the main OverDrive "library" and any non-main account - with sharing enabled. - - If this is a child OverDrive collection, then we return only the - account associated with that child's OverDrive Advantage "library". - Additionally, we want to exclude the account if it is "shared" since - we will be counting it with the parent collection. - """ - - if self.library_id == OverdriveCoreAPI.OVERDRIVE_MAIN_ACCOUNT_ID: - # this is a parent collection - filtered_result = filter( - lambda account: account.get("id") - == OverdriveCoreAPI.OVERDRIVE_MAIN_ACCOUNT_ID - or account.get("shared", False), - accounts, - ) - else: - # this is child collection - filtered_result = filter( - lambda account: account.get("id") == self.library_id - and not account.get("shared", False), - accounts, - ) - - return list(filtered_result) - - @classmethod - def image_link_to_linkdata(cls, link, rel): - if not link or not "href" in link: - return None - href = link["href"] - if "00000000-0000-0000-0000" in href: - # This is a stand-in cover for preorders. It's better not - # to have a cover at all -- we might be able to get one - # later, or from another source. - return None - href = OverdriveCoreAPI.make_link_safe(href) - media_type = link.get("type", None) - return LinkData(rel=rel, href=href, media_type=media_type) - - @classmethod - def book_info_to_metadata( - cls, book, include_bibliographic=True, include_formats=True - ): - """Turn Overdrive's JSON representation of a book into a Metadata - object. - - Note: The json data passed into this method is from a different file/stream - from the json data that goes into the book_info_to_circulation() method. - """ - if not "id" in book: - return None - overdrive_id = book["id"] - primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID, overdrive_id) - - # If we trust classification data, we'll give it this weight. - # Otherwise we'll probably give it a fraction of this weight. - trusted_weight = Classification.TRUSTED_DISTRIBUTOR_WEIGHT - - if include_bibliographic: - title = book.get("title", None) - sort_title = book.get("sortTitle") - subtitle = book.get("subtitle", None) - series = book.get("series", None) - publisher = book.get("publisher", None) - imprint = book.get("imprint", None) - - if "publishDate" in book: - published = strptime_utc(book["publishDate"][:10], cls.DATE_FORMAT) - else: - published = None - - languages = [l["code"] for l in book.get("languages", [])] - if "eng" in languages or not languages: - language = "eng" - else: - language = sorted(languages)[0] - - contributors = [] - for creator in book.get("creators", []): - sort_name = creator["fileAs"] - display_name = creator["name"] - role = creator["role"] - roles = cls.parse_roles(overdrive_id, role) or [ - Contributor.UNKNOWN_ROLE - ] - contributor = ContributorData( - sort_name=sort_name, - display_name=display_name, - roles=roles, - biography=creator.get("bioText", None), - ) - contributors.append(contributor) - - subjects = [] - for sub in book.get("subjects", []): - subject = SubjectData( - type=Subject.OVERDRIVE, - identifier=sub["value"], - weight=trusted_weight, - ) - subjects.append(subject) - - for sub in book.get("keywords", []): - subject = SubjectData( - type=Subject.TAG, - identifier=sub["value"], - # We don't use TRUSTED_DISTRIBUTOR_WEIGHT because - # we don't know where the tags come from -- - # probably Overdrive users -- and they're - # frequently wrong. - weight=1, - ) - subjects.append(subject) - - extra = dict() - if "grade_levels" in book: - # n.b. Grade levels are measurements of reading level, not - # age appropriateness. We can use them as a measure of age - # appropriateness in a pinch, but we weight them less - # heavily than TRUSTED_DISTRIBUTOR_WEIGHT. - for i in book["grade_levels"]: - subject = SubjectData( - type=Subject.GRADE_LEVEL, - identifier=i["value"], - weight=trusted_weight / 10, - ) - subjects.append(subject) - - overdrive_medium = book.get("mediaType", None) - if ( - overdrive_medium - and overdrive_medium not in cls.overdrive_medium_to_simplified_medium - ): - cls.log.error( - "Could not process medium %s for %s", overdrive_medium, overdrive_id - ) - - medium = cls.overdrive_medium_to_simplified_medium.get( - overdrive_medium, Edition.BOOK_MEDIUM - ) - - measurements = [] - if "awards" in book: - extra["awards"] = book.get("awards", []) - num_awards = len(extra["awards"]) - measurements.append( - MeasurementData(Measurement.AWARDS, str(num_awards)) - ) - - for name, subject_type in ( - ("ATOS", Subject.ATOS_SCORE), - ("lexileScore", Subject.LEXILE_SCORE), - ("interestLevel", Subject.INTEREST_LEVEL), - ): - if not name in book: - continue - identifier = str(book[name]) - subjects.append( - SubjectData( - type=subject_type, identifier=identifier, weight=trusted_weight - ) - ) - - for grade_level_info in book.get("gradeLevels", []): - grade_level = grade_level_info.get("value") - subjects.append( - SubjectData( - type=Subject.GRADE_LEVEL, - identifier=grade_level, - weight=trusted_weight, - ) - ) - - identifiers = [] - links = [] - sample_hrefs = set() - for format in book.get("formats", []): - for new_id in format.get("identifiers", []): - t = new_id["type"] - v = new_id["value"] - orig_v = v - type_key = None - if t == "ASIN": - type_key = Identifier.ASIN - elif t == "ISBN": - type_key = Identifier.ISBN - if len(v) == 10: - v = isbnlib.to_isbn13(v) - if v is None or not isbnlib.is_isbn13(v): - # Overdrive sometimes uses invalid values - # like "n/a" as placeholders. Ignore such - # values to avoid a situation where hundreds of - # books appear to have the same ISBN. ISBNs - # which fail check digit checks or are invalid - # also can occur. Log them for review. - cls.log.info("Bad ISBN value provided: %s", orig_v) - continue - elif t == "DOI": - type_key = Identifier.DOI - elif t == "UPC": - type_key = Identifier.UPC - elif t == "PublisherCatalogNumber": - continue - if type_key and v: - identifiers.append(IdentifierData(type_key, v, 1)) - - # Samples become links. - if "samples" in format: - for sample_info in format["samples"]: - href = sample_info["url"] - # Have we already parsed this sample? Overdrive repeats samples per format - if href in sample_hrefs: - continue - - # Every sample has its own format type - overdrive_format_name = sample_info.get("formatType") - if not overdrive_format_name: - # Malformed sample - continue - content_type = cls.sample_format_to_content_type.get( - overdrive_format_name - ) - if not content_type: - # Unusable by us. - cls.log.warning( - f"Did not find a sample format mapping for '{overdrive_format_name}': {href}" - ) - continue - - if Representation.is_media_type(content_type): - links.append( - LinkData( - rel=Hyperlink.SAMPLE, - href=href, - media_type=content_type, - ) - ) - sample_hrefs.add(href) - - # A cover and its thumbnail become a single LinkData. - if "images" in book: - images = book["images"] - image_data = cls.image_link_to_linkdata( - images.get("cover"), Hyperlink.IMAGE - ) - for name in ["cover300Wide", "cover150Wide", "thumbnail"]: - # Try to get a thumbnail that's as close as possible - # to the size we use. - image = images.get(name) - thumbnail_data = cls.image_link_to_linkdata( - image, Hyperlink.THUMBNAIL_IMAGE - ) - if not image_data: - image_data = cls.image_link_to_linkdata(image, Hyperlink.IMAGE) - if thumbnail_data: - break - - if image_data: - if thumbnail_data: - image_data.thumbnail = thumbnail_data - links.append(image_data) - - # Descriptions become links. - short = book.get("shortDescription") - full = book.get("fullDescription") - if full: - links.append( - LinkData( - rel=Hyperlink.DESCRIPTION, - content=full, - media_type="text/html", - ) - ) - - if short and (not full or not full.startswith(short)): - links.append( - LinkData( - rel=Hyperlink.SHORT_DESCRIPTION, - content=short, - media_type="text/html", - ) - ) - - # Add measurements: rating and popularity - if book.get("starRating") is not None and book["starRating"] > 0: - measurements.append( - MeasurementData( - quantity_measured=Measurement.RATING, value=book["starRating"] - ) - ) - - if book.get("popularity"): - measurements.append( - MeasurementData( - quantity_measured=Measurement.POPULARITY, - value=book["popularity"], - ) - ) - - metadata = Metadata( - data_source=DataSource.OVERDRIVE, - title=title, - subtitle=subtitle, - sort_title=sort_title, - language=language, - medium=medium, - series=series, - publisher=publisher, - imprint=imprint, - published=published, - primary_identifier=primary_identifier, - identifiers=identifiers, - subjects=subjects, - contributors=contributors, - measurements=measurements, - links=links, - ) - else: - metadata = Metadata( - data_source=DataSource.OVERDRIVE, - primary_identifier=primary_identifier, - ) - - if include_formats: - formats = [] - for format in book.get("formats", []): - format_id = format["id"] - internal_formats = list(cls.internal_formats(format_id)) - if internal_formats: - for content_type, drm_scheme in internal_formats: - formats.append(FormatData(content_type, drm_scheme)) - elif format_id not in cls.ignorable_overdrive_formats: - cls.log.error( - "Could not process Overdrive format %s for %s", - format_id, - overdrive_id, - ) - - # Also make a CirculationData so we can write the formats, - circulationdata = CirculationData( - data_source=DataSource.OVERDRIVE, - primary_identifier=primary_identifier, - formats=formats, - ) - - metadata.circulation = circulationdata - - return metadata - - -class OverdriveAdvantageAccount: - """Holder and parser for data associated with Overdrive Advantage.""" - - def __init__(self, parent_library_id: str, library_id: str, name: str, token: str): - """Constructor. - - :param parent_library_id: The library ID of the parent Overdrive - account. - :param library_id: The library ID of the Overdrive Advantage account. - :param name: The name of the library whose Advantage account this is. - :param token: The collection token for this Advantage account - """ - self.parent_library_id = parent_library_id - self.library_id = library_id - self.name = name - self.token = token - - @classmethod - def from_representation(cls, content): - """Turn the representation of an advantageAccounts link into a list of - OverdriveAdvantageAccount objects. - - :param content: The data obtained by following an advantageAccounts - link. - :yield: A sequence of OverdriveAdvantageAccount objects. - """ - data = json.loads(content) - parent_id = str(data.get("id")) - accounts = data.get("advantageAccounts", {}) - for account in accounts: - name = account["name"] - products_link = account["links"]["products"]["href"] - library_id = str(account.get("id")) - name = account.get("name") - token = account.get("collectionToken") - yield cls( - parent_library_id=parent_id, - library_id=library_id, - name=name, - token=token, - ) - - def to_collection(self, _db): - """Find or create a Collection object for this Overdrive Advantage - account. - - :return: a 2-tuple of Collections (primary Overdrive - collection, Overdrive Advantage collection) - """ - # First find the parent Collection. - try: - parent = ( - Collection.by_protocol(_db, ExternalIntegration.OVERDRIVE) - .filter(Collection.external_account_id == self.parent_library_id) - .one() - ) - except NoResultFound as e: - # Without the parent's credentials we can't access the child. - raise ValueError( - "Cannot create a Collection whose parent does not already exist." - ) - name = parent.name + " / " + self.name - child, is_new = get_one_or_create( - _db, - Collection, - parent_id=parent.id, - external_account_id=self.library_id, - create_method_kwargs=dict(name=name), - ) - if is_new: - # Make sure the child has its protocol set appropriately. - integration = child.create_external_integration( - ExternalIntegration.OVERDRIVE - ) - configuration = child.create_integration_configuration( - ExternalIntegration.OVERDRIVE - ) - - # Set or update the name of the collection to reflect the name of - # the library, just in case that name has changed. - child.name = name - return parent, child - - -class OverdriveBibliographicCoverageProvider(BibliographicCoverageProvider): - """Fill in bibliographic metadata for Overdrive records. - - This will occasionally fill in some availability information for a - single Collection, but we rely on Monitors to keep availability - information up to date for all Collections. - """ - - SERVICE_NAME = "Overdrive Bibliographic Coverage Provider" - DATA_SOURCE_NAME = DataSource.OVERDRIVE - PROTOCOL = ExternalIntegration.OVERDRIVE - INPUT_IDENTIFIER_TYPES = Identifier.OVERDRIVE_ID - - def __init__(self, collection, api_class=OverdriveCoreAPI, **kwargs): - """Constructor. - - :param collection: Provide bibliographic coverage to all - Overdrive books in the given Collection. - :param api_class: Instantiate this class with the given Collection, - rather than instantiating OverdriveAPI. - """ - super().__init__(collection, **kwargs) - if isinstance(api_class, OverdriveCoreAPI): - # Use a previously instantiated OverdriveAPI instance - # rather than creating a new one. - self.api = api_class - else: - # A web application should not use this option because it - # will put a non-scoped session in the mix. - _db = Session.object_session(collection) - self.api = api_class(_db, collection) - - def process_item(self, identifier): - info = self.api.metadata_lookup(identifier) - error = None - if info.get("errorCode") == "NotFound": - error = "ID not recognized by Overdrive: %s" % identifier.identifier - elif info.get("errorCode") == "InvalidGuid": - error = "Invalid Overdrive ID: %s" % identifier.identifier - - if error: - return self.failure(identifier, error, transient=False) - - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - - if not metadata: - e = "Could not extract metadata from Overdrive data: %r" % info - return self.failure(identifier, e) - - self.metadata_pre_hook(metadata) - return self.set_metadata(identifier, metadata) - - def metadata_pre_hook(self, metadata): - """A hook method that allows subclasses to modify a Metadata - object derived from Overdrive before it's applied. - """ - return metadata diff --git a/core/scripts.py b/core/scripts.py index 0c39c190c5..7e55eee156 100644 --- a/core/scripts.py +++ b/core/scripts.py @@ -1,5 +1,4 @@ import argparse -import csv import datetime import json import logging @@ -10,7 +9,7 @@ import unicodedata import uuid from enum import Enum -from typing import Generator, List, Optional, Type +from typing import Generator, Optional, Type from sqlalchemy import and_, exists, tuple_ from sqlalchemy.orm import Query, Session, defer @@ -57,7 +56,6 @@ from core.model.patron import Loan from core.monitor import CollectionMonitor, ReaperMonitor from core.opds_import import OPDSImporter, OPDSImportMonitor -from core.overdrive import OverdriveCoreAPI from core.query.customlist import CustomListQueries from core.search.coverage_remover import RemovesSearchCoverage from core.service.container import Services, container_instance @@ -2718,122 +2716,6 @@ def do_run(self): ) -class GenerateOverdriveAdvantageAccountList(InputScript): - """Generates a CSV containing the following fields: - circulation manager - collection - client_key - external_account_id - library_token - advantage_name - advantage_id - advantage_token - already_configured - """ - - def __init__(self, _db=None, *args, **kwargs): - super().__init__(_db, *args, **kwargs) - self._data: List[List[str]] = list() - - def _create_overdrive_api(self, collection: Collection): - return OverdriveCoreAPI(_db=self._db, collection=collection) - - def do_run(self, *args, **kwargs): - parsed = GenerateOverdriveAdvantageAccountList.parse_command_line( - _db=self._db, *args, **kwargs - ) - query: Query = Collection.by_protocol( - self._db, protocol=ExternalIntegration.OVERDRIVE - ) - for c in query.filter(Collection.parent_id == None): - collection: Collection = c - api = self._create_overdrive_api(collection=collection) - client_key = api.client_key().decode() - client_secret = api.client_secret().decode() - - try: - library_token = api.collection_token - advantage_accounts = api.get_advantage_accounts() - - for aa in advantage_accounts: - existing_child_collections = query.filter( - Collection.parent_id == collection.id - ) - already_configured_aa_libraries = [ - e.external_account_id for e in existing_child_collections - ] - self._data.append( - [ - collection.name, - collection.external_account_id, - client_key, - client_secret, - library_token, - aa.name, - aa.library_id, - aa.token, - aa.library_id in already_configured_aa_libraries, - ] - ) - except Exception as e: - logging.error( - f"Could not connect to collection {c.name}: reason: {str(e)}." - ) - - file_path = parsed.output_file_path[0] - circ_manager_name = parsed.circulation_manager_name[0] - self.write_csv(output_file_path=file_path, circ_manager_name=circ_manager_name) - - def write_csv(self, output_file_path: str, circ_manager_name: str): - with open(output_file_path, "w", newline="") as csvfile: - writer = csv.writer(csvfile) - writer.writerow( - [ - "cm", - "collection", - "overdrive_library_id", - "client_key", - "client_secret", - "library_token", - "advantage_name", - "advantage_id", - "advantage_token", - "already_configured", - ] - ) - for i in self._data: - i.insert(0, circ_manager_name) - writer.writerow(i) - - @classmethod - def arg_parser(cls): - parser = argparse.ArgumentParser() - parser.add_argument( - "--output-file-path", - help="The path of an output file", - metavar="o", - nargs=1, - ) - - parser.add_argument( - "--circulation-manager-name", - help="The name of the circulation-manager", - metavar="c", - nargs=1, - required=True, - ) - - parser.add_argument( - "--file-format", - help="The file format of the output file", - metavar="f", - nargs=1, - default="csv", - ) - - return parser - - class CustomListUpdateEntriesScript(CustomListSweeperScript): """Traverse all entries and update lists if they have auto_update_enabled""" diff --git a/tests/api/conftest.py b/tests/api/conftest.py index 3122398d97..7ced547870 100644 --- a/tests/api/conftest.py +++ b/tests/api/conftest.py @@ -33,7 +33,6 @@ "tests.fixtures.odl", "tests.fixtures.opds2_files", "tests.fixtures.opds_files", - "tests.fixtures.overdrive", "tests.fixtures.sample_covers", "tests.fixtures.search", "tests.fixtures.time", diff --git a/tests/core/files/overdrive/advantage_accounts.json b/tests/api/files/overdrive/advantage_accounts.json similarity index 100% rename from tests/core/files/overdrive/advantage_accounts.json rename to tests/api/files/overdrive/advantage_accounts.json diff --git a/tests/core/files/overdrive/audiobook.json b/tests/api/files/overdrive/audiobook.json similarity index 100% rename from tests/core/files/overdrive/audiobook.json rename to tests/api/files/overdrive/audiobook.json diff --git a/tests/core/files/overdrive/has_awards.json b/tests/api/files/overdrive/has_awards.json similarity index 100% rename from tests/core/files/overdrive/has_awards.json rename to tests/api/files/overdrive/has_awards.json diff --git a/tests/core/files/overdrive/has_grade_levels.json b/tests/api/files/overdrive/has_grade_levels.json similarity index 100% rename from tests/core/files/overdrive/has_grade_levels.json rename to tests/api/files/overdrive/has_grade_levels.json diff --git a/tests/core/files/overdrive/has_sample.json b/tests/api/files/overdrive/has_sample.json similarity index 100% rename from tests/core/files/overdrive/has_sample.json rename to tests/api/files/overdrive/has_sample.json diff --git a/tests/core/files/overdrive/overdrive_availability_advantage.json b/tests/api/files/overdrive/overdrive_availability_advantage.json similarity index 100% rename from tests/core/files/overdrive/overdrive_availability_advantage.json rename to tests/api/files/overdrive/overdrive_availability_advantage.json diff --git a/tests/core/files/overdrive/overdrive_availability_information.json b/tests/api/files/overdrive/overdrive_availability_information_2.json similarity index 100% rename from tests/core/files/overdrive/overdrive_availability_information.json rename to tests/api/files/overdrive/overdrive_availability_information_2.json diff --git a/tests/core/files/overdrive/overdrive_book_list.json b/tests/api/files/overdrive/overdrive_book_list.json similarity index 100% rename from tests/core/files/overdrive/overdrive_book_list.json rename to tests/api/files/overdrive/overdrive_book_list.json diff --git a/tests/core/files/overdrive/overdrive_book_list_missing_data.json b/tests/api/files/overdrive/overdrive_book_list_missing_data.json similarity index 100% rename from tests/core/files/overdrive/overdrive_book_list_missing_data.json rename to tests/api/files/overdrive/overdrive_book_list_missing_data.json diff --git a/tests/core/files/overdrive/overdrive_metadata.json b/tests/api/files/overdrive/overdrive_metadata.json similarity index 100% rename from tests/core/files/overdrive/overdrive_metadata.json rename to tests/api/files/overdrive/overdrive_metadata.json diff --git a/tests/api/mockapi/overdrive.py b/tests/api/mockapi/overdrive.py index ae7aa11192..d7ea7d7f0f 100644 --- a/tests/api/mockapi/overdrive.py +++ b/tests/api/mockapi/overdrive.py @@ -2,20 +2,45 @@ from sqlalchemy.orm import Session -from api.overdrive import OverdriveAPI +from api.overdrive import OverdriveAPI, OverdriveConstants from core.model import Library, get_one_or_create from core.model.collection import Collection from core.model.configuration import ExternalIntegration -from core.overdrive import OverdriveConstants, OverdriveCoreAPI from core.util.http import HTTP from tests.core.mock import MockRequestsResponse from tests.fixtures.database import DatabaseTransactionFixture -class MockOverdriveCoreAPI(OverdriveCoreAPI): +class MockOverdriveResponse: + def __init__(self, status_code, headers, content): + self.status_code = status_code + self.headers = headers + self.content = content + + def json(self): + return json.loads(self.content) + + +class MockOverdriveAPI(OverdriveAPI): + library_data = '{"id":1810,"name":"My Public Library (MA)","type":"Library","collectionToken":"1a09d9203","links":{"self":{"href":"http://api.overdrive.com/v1/libraries/1810","type":"application/vnd.overdrive.api+json"},"products":{"href":"http://api.overdrive.com/v1/collections/1a09d9203/products","type":"application/vnd.overdrive.api+json"},"dlrHomepage":{"href":"http://ebooks.nypl.org","type":"text/html"}},"formats":[{"id":"audiobook-wma","name":"OverDrive WMA Audiobook"},{"id":"ebook-pdf-adobe","name":"Adobe PDF eBook"},{"id":"ebook-mediado","name":"MediaDo eBook"},{"id":"ebook-epub-adobe","name":"Adobe EPUB eBook"},{"id":"ebook-kindle","name":"Kindle Book"},{"id":"audiobook-mp3","name":"OverDrive MP3 Audiobook"},{"id":"ebook-pdf-open","name":"Open PDF eBook"},{"id":"ebook-overdrive","name":"OverDrive Read"},{"id":"video-streaming","name":"Streaming Video"},{"id":"ebook-epub-open","name":"Open EPUB eBook"}]}' + + token_data = '{"access_token":"foo","token_type":"bearer","expires_in":3600,"scope":"LIB META AVAIL SRCH"}' + + def __init__(self, _db, collection): + self.access_token_requests = [] + self.requests = [] + self.responses = [] + + # Almost all tests will try to request the access token, so + # set the response that will be returned if an attempt is + # made. + self.access_token_response = self.mock_access_token_response("bearer token") + super().__init__(_db, collection) + self._collection_token = "fake token" + @classmethod def mock_collection( - self, + cls, _db: Session, library: Library, name: str = "Test Overdrive Collection", @@ -49,17 +74,6 @@ def mock_collection( _db.refresh(config) return collection - def __init__(self, _db, collection, *args, **kwargs): - self.access_token_requests = [] - self.requests = [] - self.responses = [] - - # Almost all tests will try to request the access token, so - # set the response that will be returned if an attempt is - # made. - self.access_token_response = self.mock_access_token_response("bearer token") - super().__init__(_db, collection, *args, **kwargs) - def queue_collection_token(self): # Many tests immediately try to access the # collection token. This is a helper method to make it easy to @@ -111,24 +125,6 @@ def _make_request(self, url, *args, **kwargs): kwargs.get("disallowed_response_codes"), ) - -class MockOverdriveResponse: - def __init__(self, status_code, headers, content): - self.status_code = status_code - self.headers = headers - self.content = content - - def json(self): - return json.loads(self.content) - - -class MockOverdriveAPI(MockOverdriveCoreAPI, OverdriveAPI): - library_data = '{"id":1810,"name":"My Public Library (MA)","type":"Library","collectionToken":"1a09d9203","links":{"self":{"href":"http://api.overdrive.com/v1/libraries/1810","type":"application/vnd.overdrive.api+json"},"products":{"href":"http://api.overdrive.com/v1/collections/1a09d9203/products","type":"application/vnd.overdrive.api+json"},"dlrHomepage":{"href":"http://ebooks.nypl.org","type":"text/html"}},"formats":[{"id":"audiobook-wma","name":"OverDrive WMA Audiobook"},{"id":"ebook-pdf-adobe","name":"Adobe PDF eBook"},{"id":"ebook-mediado","name":"MediaDo eBook"},{"id":"ebook-epub-adobe","name":"Adobe EPUB eBook"},{"id":"ebook-kindle","name":"Kindle Book"},{"id":"audiobook-mp3","name":"OverDrive MP3 Audiobook"},{"id":"ebook-pdf-open","name":"Open PDF eBook"},{"id":"ebook-overdrive","name":"OverDrive Read"},{"id":"video-streaming","name":"Streaming Video"},{"id":"ebook-epub-open","name":"Open EPUB eBook"}]}' - - token_data = '{"access_token":"foo","token_type":"bearer","expires_in":3600,"scope":"LIB META AVAIL SRCH"}' - - collection_token = "fake token" - def patron_request(self, patron, pin, *args, **kwargs): response = self._make_request(*args, **kwargs) diff --git a/tests/api/test_overdrive.py b/tests/api/test_overdrive.py index c54acd9f19..86ba258711 100644 --- a/tests/api/test_overdrive.py +++ b/tests/api/test_overdrive.py @@ -1,12 +1,14 @@ from __future__ import annotations import base64 +import csv import json +import logging import os import random from datetime import timedelta from typing import TYPE_CHECKING, Any, Dict -from unittest.mock import MagicMock, create_autospec +from unittest.mock import MagicMock, PropertyMock, create_autospec, patch import pytest from requests import Response @@ -16,33 +18,46 @@ from api.circulation_exceptions import * from api.config import Configuration from api.overdrive import ( + GenerateOverdriveAdvantageAccountList, NewTitlesOverdriveCollectionMonitor, + OverdriveAdvantageAccount, OverdriveAPI, + OverdriveBibliographicCoverageProvider, OverdriveCirculationMonitor, OverdriveCollectionReaper, + OverdriveConstants, OverdriveFormatSweep, OverdriveManifestFulfillmentInfo, + OverdriveRepresentationExtractor, RecentOverdriveCollectionMonitor, ) from core.config import CannotLoadConfiguration +from core.coverage import CoverageFailure from core.integration.goals import Goals from core.integration.registry import IntegrationRegistry -from core.metadata_layer import TimestampData +from core.metadata_layer import LinkData, TimestampData from core.model import ( + Collection, + Contributor, DataSource, DeliveryMechanism, Edition, ExternalIntegration, + Hyperlink, Identifier, LicensePool, + Measurement, MediaTypes, Representation, RightsStatus, + Subject, ) -from core.overdrive import OverdriveConstants +from core.scripts import RunCollectionCoverageProviderScript from core.util.datetime_helpers import datetime_utc, utc_now +from core.util.http import BadResponseException from tests.api.mockapi.overdrive import MockOverdriveAPI from tests.core.mock import DummyHTTPClient, MockRequestsResponse +from tests.core.util.test_mock_web_server import MockAPIServer, MockAPIServerResponse from tests.fixtures.database import DatabaseTransactionFixture from tests.fixtures.library import LibraryFixture @@ -52,6 +67,19 @@ from tests.fixtures.time import Time +@pytest.fixture +def mock_web_server(): + """A test fixture that yields a usable mock web server for the lifetime of the test.""" + _server = MockAPIServer("127.0.0.1", 10256) + _server.start() + logging.info(f"starting mock web server on {_server.address()}:{_server.port()}") + yield _server + logging.info( + f"shutting down mock web server on {_server.address()}:{_server.port()}" + ) + _server.stop() + + class OverdriveAPIFixture: def __init__(self, db: DatabaseTransactionFixture, data: OverdriveAPIFilesFixture): self.db = db @@ -98,6 +126,446 @@ def overdrive_api_fixture( class TestOverdriveAPI: + def test_errors_not_retried( + self, + overdrive_api_fixture: OverdriveAPIFixture, + mock_web_server: MockAPIServer, + ): + session = overdrive_api_fixture.db.session + library = overdrive_api_fixture.db.default_library() + collection = MockOverdriveAPI.mock_collection(session, library) + + # Enqueue a response for the request that the server will make for a token. + _r = MockAPIServerResponse() + _r.status_code = 200 + _r.set_content( + b"""{ + "access_token": "x", + "expires_in": 23 + } + """ + ) + mock_web_server.enqueue_response("POST", "/oauth/token", _r) + + api = OverdriveAPI(session, collection) + api._hosts["oauth_host"] = mock_web_server.url("/oauth") + + # Try a get() call for each error code + for code in [404]: + _r = MockAPIServerResponse() + _r.status_code = code + mock_web_server.enqueue_response("GET", "/a/b/c", _r) + _status, _, _ = api.get(mock_web_server.url("/a/b/c")) + assert _status == code + + for code in [400, 403, 500, 501, 502, 503]: + _r = MockAPIServerResponse() + _r.status_code = code + + # The default is to retry 5 times, so enqueue 5 responses. + for i in range(0, 6): + mock_web_server.enqueue_response("GET", "/a/b/c", _r) + try: + api.get(mock_web_server.url("/a/b/c")) + except BadResponseException: + pass + + # Exactly one request was made for each error code, plus one for a token + assert len(mock_web_server.requests()) == 8 + + def test_constructor_makes_no_requests( + self, + overdrive_api_fixture: OverdriveAPIFixture, + ): + session = overdrive_api_fixture.db.session + library = overdrive_api_fixture.db.default_library() + # Invoking the OverdriveAPI constructor does not, by itself, + # make any HTTP requests. + collection = MockOverdriveAPI.mock_collection(session, library) + + class NoRequests(OverdriveAPI): + MSG = "This is a unit test, you can't make HTTP requests!" + + def no_requests(self, *args, **kwargs): + raise Exception(self.MSG) + + _do_get = no_requests + _do_post = no_requests + _make_request = no_requests + + api = NoRequests(session, collection) + + # Attempting to access .token or .collection_token _will_ + # try to make an HTTP request. + for field in "token", "collection_token": + with pytest.raises(Exception) as excinfo: + getattr(api, field) + assert api.MSG in str(excinfo.value) + + def test_ils_name(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + transaction = overdrive_api_fixture.db + + """The 'ils_name' setting (defined in + MockOverdriveAPI.mock_collection) is available through + OverdriveAPI.ils_name(). + """ + assert "e" == fixture.api.ils_name(transaction.default_library()) + + # The value must be explicitly set for a given library, or + # else the default will be used. + l2 = transaction.library() + assert "default" == fixture.api.ils_name(l2) + + def test_make_link_safe(self): + # Unsafe characters are escaped. + assert "http://foo.com?q=%2B%3A%7B%7D" == OverdriveAPI.make_link_safe( + "http://foo.com?q=+:{}" + ) + + # Links to version 1 of the availability API are converted + # to links to version 2. + v1 = "https://qa.api.overdrive.com/v1/collections/abcde/products/12345/availability" + v2 = "https://qa.api.overdrive.com/v2/collections/abcde/products/12345/availability" + assert v2 == OverdriveAPI.make_link_safe(v1) + + # We also handle the case of a trailing slash, just in case Overdrive + # starts serving links with trailing slashes. + v1 = v1 + "/" + v2 = v2 + "/" + assert v2 == OverdriveAPI.make_link_safe(v1) + + # Links to other endpoints are not converted + leave_alone = "https://qa.api.overdrive.com/v1/collections/abcde/products/12345" + assert leave_alone == OverdriveAPI.make_link_safe(leave_alone) + + def test_hosts(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + session = overdrive_api_fixture.db.session + c = OverdriveAPI + + # By default, OverdriveAPI is initialized with the production + # set of hostnames. + assert fixture.api.hosts() == c.HOSTS[OverdriveConstants.PRODUCTION_SERVERS] + + # You can instead initialize it to use the testing set of + # hostnames. + def api_with_setting(x): + config = fixture.collection.integration_configuration + DatabaseTransactionFixture.set_settings(config, overdrive_server_nickname=x) + return c(session, fixture.collection) + + testing = api_with_setting(OverdriveConstants.TESTING_SERVERS) + assert testing.hosts() == c.HOSTS[OverdriveConstants.TESTING_SERVERS] + + # If the setting doesn't make sense, we default to production + # hostnames. + bad = api_with_setting("nonsensical") + assert bad.hosts() == c.HOSTS[OverdriveConstants.PRODUCTION_SERVERS] + + def test_endpoint(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + + # The .endpoint() method performs string interpolation, including + # the names of servers. + template = ( + "%(host)s %(patron_host)s %(oauth_host)s %(oauth_patron_host)s %(extra)s" + ) + result = fixture.api.endpoint(template, extra="val") + + # The host names and the 'extra' argument have been used to + # fill in the string interpolations. + expect_args = dict(fixture.api.hosts()) + expect_args["extra"] = "val" + assert result == template % expect_args + + # The string has been completely interpolated. + assert "%" not in result + + # Once interpolation has happened, doing it again has no effect. + assert result == fixture.api.endpoint(result, extra="something else") + + # This is important because an interpolated URL may superficially + # appear to contain extra formatting characters. + assert result + "%3A" == fixture.api.endpoint( + result + "%3A", extra="something else" + ) + + def test_token_authorization_header( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + + # Verify that the Authorization header needed to get an access + # token for a given collection is encoded properly. + assert fixture.api.token_authorization_header == "Basic YTpi" + assert ( + fixture.api.token_authorization_header + == "Basic " + + base64.standard_b64encode( + b"%s:%s" % (fixture.api.client_key(), fixture.api.client_secret()) + ).decode("utf8") + ) + + def test_token_post_success(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + transaction = fixture.db + + fixture.api.queue_response(200, content="some content") + response = fixture.api.token_post(transaction.fresh_url(), "the payload") + assert 200 == response.status_code + assert fixture.api.access_token_response.content == response.content + + def test_get_success(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + transaction = fixture.db + + fixture.api.queue_response(200, content="some content") + status_code, headers, content = fixture.api.get(transaction.fresh_url(), {}) + assert 200 == status_code + assert b"some content" == content + + def test_failure_to_get_library_is_fatal( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + + fixture.api.queue_response(500) + with pytest.raises(BadResponseException) as excinfo: + fixture.api.get_library() + assert "Got status code 500" in str(excinfo.value) + + def test_error_getting_library(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + session = fixture.db.session + + class MisconfiguredOverdriveAPI(MockOverdriveAPI): + """This Overdrive client has valid credentials but the library + can't be found -- probably because the library ID is wrong.""" + + def get_library(self): + return { + "errorCode": "Some error", + "message": "Some message.", + "token": "abc-def-ghi", + } + + # Just instantiating the API doesn't cause this error. + api = MisconfiguredOverdriveAPI(session, fixture.collection) + api._collection_token = None + + # But trying to access the collection token will cause it. + with pytest.raises(CannotLoadConfiguration) as excinfo: + api.collection_token() + assert ( + "Overdrive credentials are valid but could not fetch library: Some message." + in str(excinfo.value) + ) + + def test_401_on_get_refreshes_bearer_token( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + transaction = fixture.db + + # We have a token. + assert "bearer token" == fixture.api.token + + # But then we try to GET, and receive a 401. + fixture.api.queue_response(401) + + # We refresh the bearer token. (This happens in + # MockOverdriveAPI.token_post, so we don't mock the response + # in the normal way.) + fixture.api.access_token_response = fixture.api.mock_access_token_response( + "new bearer token" + ) + + # Then we retry the GET and it succeeds this time. + fixture.api.queue_response(200, content="at last, the content") + + status_code, headers, content = fixture.api.get(transaction.fresh_url(), {}) + + assert 200 == status_code + assert b"at last, the content" == content + + # The bearer token has been updated. + assert "new bearer token" == fixture.api.token + + def test_credential_refresh_success( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + + """Verify the process of refreshing the Overdrive bearer token.""" + # Perform the initial credential check. + fixture.api.check_creds() + credential = fixture.api.credential_object(lambda x: x) + assert "bearer token" == credential.credential + assert fixture.api.token == credential.credential + + fixture.api.access_token_response = fixture.api.mock_access_token_response( + "new bearer token" + ) + + # Refresh the credentials and the token will change to + # the mocked value. + fixture.api.refresh_creds(credential) + assert "new bearer token" == credential.credential + assert fixture.api.token == credential.credential + + def test_401_after_token_refresh_raises_error( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + + assert "bearer token" == fixture.api.token + + # We try to GET and receive a 401. + fixture.api.queue_response(401) + + # We refresh the bearer token. + fixture.api.access_token_response = fixture.api.mock_access_token_response( + "new bearer token" + ) + + # Then we retry the GET but we get another 401. + fixture.api.queue_response(401) + + credential = fixture.api.credential_object(lambda x: x) + fixture.api.refresh_creds(credential) + + # That raises a BadResponseException + with pytest.raises(BadResponseException) as excinfo: + fixture.api.get_library() + assert "Bad response from" in str(excinfo.value) + assert "Something's wrong with the Overdrive OAuth Bearer Token!" in str( + excinfo.value + ) + + def test_401_during_refresh_raises_error( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + + """If we fail to refresh the OAuth bearer token, an exception is + raised. + """ + fixture.api.access_token_response = MockRequestsResponse(401, {}, "") + with pytest.raises(BadResponseException) as excinfo: + fixture.api.refresh_creds(None) + assert "Got status code 401" in str(excinfo.value) + assert "can only continue on: 200." in str(excinfo.value) + + def test_advantage_differences(self, overdrive_api_fixture: OverdriveAPIFixture): + transaction = overdrive_api_fixture.db + session = transaction.session + + # Test the differences between Advantage collections and + # regular Overdrive collections. + + # Here's a regular Overdrive collection. + main = transaction.collection( + protocol=ExternalIntegration.OVERDRIVE, + external_account_id="1", + ) + DatabaseTransactionFixture.set_settings( + main.integration_configuration, "overdrive_client_key", "user" + ) + DatabaseTransactionFixture.set_settings( + main.integration_configuration, "overdrive_client_secret", "password" + ) + DatabaseTransactionFixture.set_settings( + main.integration_configuration, "overdrive_website_id", "100" + ) + DatabaseTransactionFixture.set_settings( + main.integration_configuration, "ils_name", "default" + ) + + # Here's an Overdrive API client for that collection. + overdrive_main = MockOverdriveAPI(session, main) + + # Note the "library" endpoint. + assert ( + "https://api.overdrive.com/v1/libraries/1" + == overdrive_main._library_endpoint + ) + + # The advantage_library_id of a non-Advantage Overdrive account + # is always -1. + assert "1" == overdrive_main.library_id() + assert -1 == overdrive_main.advantage_library_id + + # Here's an Overdrive Advantage collection associated with the + # main Overdrive collection. + child = transaction.collection( + protocol=ExternalIntegration.OVERDRIVE, + external_account_id="2", + ) + child.parent = main + overdrive_child = MockOverdriveAPI(session, child) + + # In URL-space, the "library" endpoint for the Advantage + # collection is beneath the the parent collection's "library" + # endpoint. + assert ( + "https://api.overdrive.com/v1/libraries/1/advantageAccounts/2" + == overdrive_child._library_endpoint + ) + + # The advantage_library_id of an Advantage collection is the + # numeric value of its external_account_id. + assert "2" == overdrive_child.library_id() + assert 2 == overdrive_child.advantage_library_id + + def test__get_book_list_page(self, overdrive_api_fixture: OverdriveAPIFixture): + fixture = overdrive_api_fixture + + # Test the internal method that retrieves a list of books and + # preprocesses it. + + class MockExtractor: + def link(self, content, rel_to_follow): + self.link_called_with = (content, rel_to_follow) + return "http://next-page/" + + def availability_link_list(self, content): + self.availability_link_list_called_with = content + return ["an availability queue"] + + original_data = {"key": "value"} + for content in ( + original_data, + json.dumps(original_data), + json.dumps(original_data).encode("utf8"), + ): + extractor = MockExtractor() + fixture.api.queue_response(200, content=content) + result = fixture.api._get_book_list_page( + "http://first-page/", "some-rel", extractor + ) + + # A single request was made to the requested page. + (url, headers, body) = fixture.api.requests.pop() + assert len(fixture.api.requests) == 0 + assert url == "http://first-page/" + + # The extractor was used to extract a link to the page + # with rel="some-rel". + # + # Note that the Python data structure (`original_data`) is passed in, + # regardless of whether the mock response body is a Python + # data structure, a bytestring, or a Unicode string. + assert extractor.link_called_with == (original_data, "some-rel") + + # The data structure was also passed into the extractor's + # availability_link_list() method. + assert extractor.availability_link_list_called_with == original_data + + # The final result is a queue of availability data (from + # this page) and a link to the next page. + assert result == (["an availability queue"], "http://next-page/") + def test_external_integration(self, overdrive_api_fixture: OverdriveAPIFixture): assert ( overdrive_api_fixture.collection.external_integration @@ -2820,3 +3288,735 @@ def test_instantiate(self, overdrive_api_fixture: OverdriveAPIFixture): monitor = OverdriveCollectionReaper( db.session, overdrive_api_fixture.collection, api_class=MockOverdriveAPI ) + + +class TestOverdriveRepresentationExtractor: + def test_availability_info(self, overdrive_api_fixture: OverdriveAPIFixture): + data, raw = overdrive_api_fixture.sample_json("overdrive_book_list.json") + availability = OverdriveRepresentationExtractor.availability_link_list(raw) + # Every item in the list has a few important values. + for item in availability: + for key in "availability_link", "author_name", "id", "title", "date_added": + assert key in item + + # Also run a spot check on the actual values. + spot = availability[0] + assert "210bdcad-29b7-445f-8d05-cdbb40abc03a" == spot["id"] + assert "King and Maxwell" == spot["title"] + assert "David Baldacci" == spot["author_name"] + assert "2013-11-12T14:13:00-05:00" == spot["date_added"] + + def test_availability_info_missing_data( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + # overdrive_book_list_missing_data.json has two products. One + # only has a title, the other only has an ID. + data, raw = overdrive_api_fixture.sample_json( + "overdrive_book_list_missing_data.json" + ) + [item] = OverdriveRepresentationExtractor.availability_link_list(raw) + + # We got a data structure -- full of missing data -- for the + # item that has an ID. + assert "i only have an id" == item["id"] + assert None == item["title"] + assert None == item["author_name"] + assert None == item["date_added"] + + # We did not get a data structure for the item that only has a + # title, because an ID is required -- otherwise we don't know + # what book we're talking about. + + def test_link(self, overdrive_api_fixture: OverdriveAPIFixture): + data, raw = overdrive_api_fixture.sample_json("overdrive_book_list.json") + expect = OverdriveAPI.make_link_safe( + "http://api.overdrive.com/v1/collections/collection-id/products?limit=300&offset=0&lastupdatetime=2014-04-28%2009:25:09&sort=popularity:desc&formats=ebook-epub-open,ebook-epub-adobe,ebook-pdf-adobe,ebook-pdf-open" + ) + assert expect == OverdriveRepresentationExtractor.link(raw, "first") + + def test_book_info_to_circulation(self, overdrive_api_fixture: OverdriveAPIFixture): + # Tests that can convert an overdrive json block into a CirculationData object. + fixture = overdrive_api_fixture + session = overdrive_api_fixture.db.session + + raw, info = fixture.sample_json("overdrive_availability_information_2.json") + extractor = OverdriveRepresentationExtractor(fixture.api) + circulationdata = extractor.book_info_to_circulation(info) + + # NOTE: It's not realistic for licenses_available and + # patrons_in_hold_queue to both be nonzero; this is just to + # verify that the test picks up whatever data is in the + # document. + assert 3 == circulationdata.licenses_owned + assert 1 == circulationdata.licenses_available + assert 10 == circulationdata.patrons_in_hold_queue + + # Related IDs. + identifier = circulationdata.primary_identifier(session) + assert (Identifier.OVERDRIVE_ID, "2a005d55-a417-4053-b90d-7a38ca6d2065") == ( + identifier.type, + identifier.identifier, + ) + + def test_book_info_to_circulation_advantage( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + # Overdrive Advantage accounts (a.k.a. "child" or "sub" accounts derive + # different information from the same API responses as "main" Overdrive + # accounts. + fixture = overdrive_api_fixture + raw, info = fixture.sample_json("overdrive_availability_advantage.json") + + extractor = OverdriveRepresentationExtractor(fixture.api) + # Calling in the context of a main account should return a count of + # the main account and any shared sub account owned and available. + consortial_data = extractor.book_info_to_circulation(info) + assert 10 == consortial_data.licenses_owned + assert 10 == consortial_data.licenses_available + + class MockAPI: + # Pretend to be an API for an Overdrive Advantage collection with + # library ID 61. + advantage_library_id = 61 + + extractor = OverdriveRepresentationExtractor(MockAPI()) + advantage_data = extractor.book_info_to_circulation(info) + assert 1 == advantage_data.licenses_owned + assert 1 == advantage_data.licenses_available + + # Both collections have the same information about active + # holds, because that information is not split out by + # collection. + assert 0 == advantage_data.patrons_in_hold_queue + assert 0 == consortial_data.patrons_in_hold_queue + + # If for whatever reason Overdrive doesn't mention the + # relevant collection at all, no collection-specific + # information is gleaned. + # + # TODO: It would probably be better not to return a + # CirculationData object at all, but this shouldn't happen in + # a real scenario. + class MockAPI2: + # Pretend to be an API for an Overdrive Advantage collection with + # library ID 62. + advantage_library_id = 62 + + extractor = OverdriveRepresentationExtractor(MockAPI2()) + advantage_data = extractor.book_info_to_circulation(info) + assert 0 == advantage_data.licenses_owned + assert 0 == advantage_data.licenses_available + + class MockAPI3: + # Pretend to be an API for an Overdrive Advantage collection with + # library ID 63 which contains shared copies. + advantage_library_id = 63 + + extractor = OverdriveRepresentationExtractor(MockAPI3()) + advantage_data = extractor.book_info_to_circulation(info) + # since these copies are shared and counted as part of the main + # context we do not count them here. + assert 0 == advantage_data.licenses_owned + assert 0 == advantage_data.licenses_available + + def test_not_found_error_to_circulationdata( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + fixture = overdrive_api_fixture + transaction = fixture.db + raw, info = fixture.sample_json("overdrive_availability_not_found.json") + + # By default, a "NotFound" error can't be converted to a + # CirculationData object, because we don't know _which_ book it + # was that wasn't found. + extractor = OverdriveRepresentationExtractor(fixture.api) + m = extractor.book_info_to_circulation + assert None == m(info) + + # However, if an ID was added to `info` ahead of time (as the + # circulation code does), we do know, and we can create a + # CirculationData. + identifier = transaction.identifier(identifier_type=Identifier.OVERDRIVE_ID) + info["id"] = identifier.identifier + data = m(info) + assert identifier == data.primary_identifier(transaction.session) + assert 0 == data.licenses_owned + assert 0 == data.licenses_available + assert 0 == data.patrons_in_hold_queue + + def test_book_info_with_metadata(self, overdrive_api_fixture: OverdriveAPIFixture): + # Tests that can convert an overdrive json block into a Metadata object. + + raw, info = overdrive_api_fixture.sample_json("overdrive_metadata.json") + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + + assert "Agile Documentation" == metadata.title + assert ( + "Agile Documentation A Pattern Guide to Producing Lightweight Documents for Software Projects" + == metadata.sort_title + ) + assert ( + "A Pattern Guide to Producing Lightweight Documents for Software Projects" + == metadata.subtitle + ) + assert Edition.BOOK_MEDIUM == metadata.medium + assert "Wiley Software Patterns" == metadata.series + assert "eng" == metadata.language + assert "Wiley" == metadata.publisher + assert "John Wiley & Sons, Inc." == metadata.imprint + assert 2005 == metadata.published.year + assert 1 == metadata.published.month + assert 31 == metadata.published.day + + [author] = metadata.contributors + assert "RĂ¼ping, Andreas" == author.sort_name + assert "Andreas Rüping" == author.display_name + assert [Contributor.AUTHOR_ROLE] == author.roles + + subjects = sorted(metadata.subjects, key=lambda x: x.identifier) + + assert [ + ("Computer Technology", Subject.OVERDRIVE, 100), + ("Nonfiction", Subject.OVERDRIVE, 100), + ("Object Technologies - Miscellaneous", "tag", 1), + ] == [(x.identifier, x.type, x.weight) for x in subjects] + + # Related IDs. + assert (Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5") == ( + metadata.primary_identifier.type, + metadata.primary_identifier.identifier, + ) + + ids = [(x.type, x.identifier) for x in metadata.identifiers] + + # The original data contains an actual ASIN and ISBN, plus a blank + # ASIN and three invalid ISBNs: one which is common placeholder + # text, one which is mis-typed and has a bad check digit, and one + # which has an invalid character; the bad identifiers do not show + # up here. + assert [ + (Identifier.ASIN, "B000VI88N2"), + (Identifier.ISBN, "9780470856246"), + (Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5"), + ] == sorted(ids) + + # Available formats. + [kindle, pdf] = sorted( + metadata.circulation.formats, key=lambda x: x.content_type + ) + assert DeliveryMechanism.KINDLE_CONTENT_TYPE == kindle.content_type + assert DeliveryMechanism.KINDLE_DRM == kindle.drm_scheme + + assert Representation.PDF_MEDIA_TYPE == pdf.content_type + assert DeliveryMechanism.ADOBE_DRM == pdf.drm_scheme + + # Links to various resources. + shortd, image, longd = sorted(metadata.links, key=lambda x: x.rel) + + assert Hyperlink.DESCRIPTION == longd.rel + assert longd.content.startswith("
Software documentation") + + assert Hyperlink.SHORT_DESCRIPTION == shortd.rel + assert shortd.content.startswith("
Software documentation") + assert len(shortd.content) < len(longd.content) + + assert Hyperlink.IMAGE == image.rel + assert ( + "http://images.contentreserve.com/ImageType-100/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg100.jpg" + == image.href + ) + + thumbnail = image.thumbnail + + assert Hyperlink.THUMBNAIL_IMAGE == thumbnail.rel + assert ( + "http://images.contentreserve.com/ImageType-200/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg200.jpg" + == thumbnail.href + ) + + # Measurements associated with the book. + + measurements = metadata.measurements + popularity = [ + x for x in measurements if x.quantity_measured == Measurement.POPULARITY + ][0] + assert 2 == popularity.value + + rating = [x for x in measurements if x.quantity_measured == Measurement.RATING][ + 0 + ] + assert 1 == rating.value + + # Request only the bibliographic information. + metadata = OverdriveRepresentationExtractor.book_info_to_metadata( + info, include_bibliographic=True, include_formats=False + ) + + assert "Agile Documentation" == metadata.title + assert None == metadata.circulation + + # Request only the format information. + metadata = OverdriveRepresentationExtractor.book_info_to_metadata( + info, include_bibliographic=False, include_formats=True + ) + + assert None == metadata.title + + [kindle, pdf] = sorted( + metadata.circulation.formats, key=lambda x: x.content_type + ) + assert DeliveryMechanism.KINDLE_CONTENT_TYPE == kindle.content_type + assert DeliveryMechanism.KINDLE_DRM == kindle.drm_scheme + + assert Representation.PDF_MEDIA_TYPE == pdf.content_type + assert DeliveryMechanism.ADOBE_DRM == pdf.drm_scheme + + def test_audiobook_info(self, overdrive_api_fixture: OverdriveAPIFixture): + # This book will be available in three formats: a link to the + # Overdrive Read website, a manifest file that SimplyE can + # download, and the legacy format used by the mobile app + # called 'Overdrive'. + raw, info = overdrive_api_fixture.sample_json("audiobook.json") + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + streaming, manifest, legacy = sorted( + metadata.circulation.formats, key=lambda x: x.content_type + ) + assert DeliveryMechanism.STREAMING_AUDIO_CONTENT_TYPE == streaming.content_type + assert ( + MediaTypes.OVERDRIVE_AUDIOBOOK_MANIFEST_MEDIA_TYPE == manifest.content_type + ) + assert "application/x-od-media" == legacy.content_type + + def test_book_info_with_sample(self, overdrive_api_fixture: OverdriveAPIFixture): + # This book has two samples; one available as a direct download and + # one available through a manifest file. + raw, info = overdrive_api_fixture.sample_json("has_sample.json") + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + samples = [x for x in metadata.links if x.rel == Hyperlink.SAMPLE] + epub_sample, manifest_sample = sorted(samples, key=lambda x: x.media_type) + + # Here's the direct download. + assert ( + "http://excerpts.contentreserve.com/FormatType-410/1071-1/9BD/24F/82/BridesofConvenienceBundle9781426803697.epub" + == epub_sample.href + ) + assert MediaTypes.EPUB_MEDIA_TYPE == epub_sample.media_type + + # Here's the manifest. + assert ( + "https://samples.overdrive.com/?crid=9BD24F82-35C0-4E0A-B5E7-BCFED07835CF&.epub-sample.overdrive.com" + == manifest_sample.href + ) + # Assert we have the end content type of the sample, no DRM formats + assert "text/html" == manifest_sample.media_type + + def test_book_info_with_unknown_sample( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + raw, info = overdrive_api_fixture.sample_json("has_sample.json") + + # Just use one format, and change a sample type to unknown + # Only one (known sample) should be extracted then + info["formats"] = [info["formats"][1]] + info["formats"][0]["samples"][1]["formatType"] = "overdrive-unknown" + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + samples = [x for x in metadata.links if x.rel == Hyperlink.SAMPLE] + + assert 1 == len(samples) + assert samples[0].media_type == MediaTypes.EPUB_MEDIA_TYPE + + def test_book_info_with_grade_levels( + self, overdrive_api_fixture: OverdriveAPIFixture + ): + raw, info = overdrive_api_fixture.sample_json("has_grade_levels.json") + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + + grade_levels = sorted( + x.identifier for x in metadata.subjects if x.type == Subject.GRADE_LEVEL + ) + assert ["Grade 4", "Grade 5", "Grade 6", "Grade 7", "Grade 8"] == grade_levels + + def test_book_info_with_awards(self, overdrive_api_fixture: OverdriveAPIFixture): + raw, info = overdrive_api_fixture.sample_json("has_awards.json") + metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) + + [awards] = [ + x + for x in metadata.measurements + if Measurement.AWARDS == x.quantity_measured + ] + assert 1 == awards.value + assert 1 == awards.weight + + def test_image_link_to_linkdata(self): + def m(link): + return OverdriveRepresentationExtractor.image_link_to_linkdata(link, "rel") + + # Test missing data. + assert None == m(None) + assert None == m(dict()) + + # Test an ordinary success case. + url = "http://images.overdrive.com/image.png" + type = "image/type" + data = m(dict(href=url, type=type)) + assert isinstance(data, LinkData) + assert url == data.href + assert type == data.media_type + + # Test a case where no media type is provided. + data = m(dict(href=url)) + assert None == data.media_type + + # Verify that invalid URLs are made link-safe. + data = m(dict(href="http://api.overdrive.com/v1/foo:bar")) + assert "http://api.overdrive.com/v1/foo%3Abar" == data.href + + # Stand-in cover images are detected and filtered out. + data = m( + dict( + href="https://img1.od-cdn.com/ImageType-100/0293-1/{00000000-0000-0000-0000-000000000002}Img100.jpg" + ) + ) + assert None == data + + def test_internal_formats(self): + # Overdrive's internal format names may correspond to one or more + # delivery mechanisms. + def assert_formats(overdrive_name, *expect): + actual = OverdriveRepresentationExtractor.internal_formats(overdrive_name) + assert list(expect) == list(actual) + + # Most formats correspond to one delivery mechanism. + assert_formats( + "ebook-pdf-adobe", (MediaTypes.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM) + ) + + assert_formats( + "ebook-epub-open", (MediaTypes.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) + ) + + # ebook-overdrive and audiobook-overdrive each correspond to + # two delivery mechanisms. + assert_formats( + "ebook-overdrive", + ( + MediaTypes.OVERDRIVE_EBOOK_MANIFEST_MEDIA_TYPE, + DeliveryMechanism.LIBBY_DRM, + ), + ( + DeliveryMechanism.STREAMING_TEXT_CONTENT_TYPE, + DeliveryMechanism.STREAMING_DRM, + ), + ) + + assert_formats( + "audiobook-overdrive", + ( + MediaTypes.OVERDRIVE_AUDIOBOOK_MANIFEST_MEDIA_TYPE, + DeliveryMechanism.LIBBY_DRM, + ), + ( + DeliveryMechanism.STREAMING_AUDIO_CONTENT_TYPE, + DeliveryMechanism.STREAMING_DRM, + ), + ) + + # An unrecognized format does not correspond to any delivery + # mechanisms. + assert_formats("no-such-format") + + +class TestOverdriveAdvantageAccount: + def test_no_advantage_accounts(self, overdrive_api_fixture: OverdriveAPIFixture): + """When there are no Advantage accounts, get_advantage_accounts() + returns an empty list. + """ + fixture = overdrive_api_fixture + fixture.api.queue_collection_token() + assert [] == fixture.api.get_advantage_accounts() + + def test_from_representation(self, overdrive_api_fixture: OverdriveAPIFixture): + """Test the creation of OverdriveAdvantageAccount objects + from Overdrive's representation of a list of accounts. + """ + fixture = overdrive_api_fixture + raw, data = fixture.sample_json("advantage_accounts.json") + [ac1, ac2] = OverdriveAdvantageAccount.from_representation(raw) + + # The two Advantage accounts have the same parent library ID. + assert "1225" == ac1.parent_library_id + assert "1225" == ac2.parent_library_id + + # But they have different names and library IDs. + assert "3" == ac1.library_id + assert "The Other Side of Town Library" == ac1.name + + assert "9" == ac2.library_id + assert "The Common Community Library" == ac2.name + + def test_to_collection(self, overdrive_api_fixture: OverdriveAPIFixture): + # Test that we can turn an OverdriveAdvantageAccount object into + # a Collection object. + fixture = overdrive_api_fixture + transaction, session = ( + fixture.db, + fixture.db.session, + ) + + account = OverdriveAdvantageAccount( + "parent_id", + "child_id", + "Library Name", + "token value", + ) + + # We can't just create a Collection object for this object because + # the parent doesn't exist. + with pytest.raises(ValueError) as excinfo: + account.to_collection(session) + assert "Cannot create a Collection whose parent does not already exist." in str( + excinfo.value + ) + + # So, create a Collection to be the parent. + parent = transaction.collection( + name="Parent", + protocol=ExternalIntegration.OVERDRIVE, + external_account_id="parent_id", + ) + + # Now it works. + p, collection = account.to_collection(session) + assert p == parent + assert parent == collection.parent + assert collection.external_account_id == account.library_id + assert ExternalIntegration.LICENSE_GOAL == collection.external_integration.goal + assert ExternalIntegration.OVERDRIVE == collection.protocol + assert Goals.LICENSE_GOAL == collection.integration_configuration.goal + assert ExternalIntegration.OVERDRIVE == collection.protocol + + # To ensure uniqueness, the collection was named after its + # parent. + assert f"{parent.name} / {account.name}" == collection.name + + +class OverdriveBibliographicCoverageProviderFixture: + overdrive: OverdriveAPIFixture + provider: OverdriveBibliographicCoverageProvider + api: MockOverdriveAPI + + +@pytest.fixture +def overdrive_biblio_provider_fixture( + overdrive_api_fixture: OverdriveAPIFixture, +) -> OverdriveBibliographicCoverageProviderFixture: + fix = OverdriveBibliographicCoverageProviderFixture() + fix.overdrive = overdrive_api_fixture + fix.provider = OverdriveBibliographicCoverageProvider( + overdrive_api_fixture.collection, api_class=MockOverdriveAPI + ) + fix.api = fix.provider.api + return fix + + +class TestOverdriveBibliographicCoverageProvider: + """Test the code that looks up bibliographic information from Overdrive.""" + + def test_script_instantiation( + self, + overdrive_biblio_provider_fixture: OverdriveBibliographicCoverageProviderFixture, + ): + """Test that RunCoverageProviderScript can instantiate + the coverage provider. + """ + + fixture = overdrive_biblio_provider_fixture + db = fixture.overdrive.db + + script = RunCollectionCoverageProviderScript( + OverdriveBibliographicCoverageProvider, + db.session, + api_class=MockOverdriveAPI, + ) + [provider] = script.providers + assert isinstance(provider, OverdriveBibliographicCoverageProvider) + assert isinstance(provider.api, MockOverdriveAPI) + assert fixture.overdrive.collection == provider.collection + + def test_invalid_or_unrecognized_guid( + self, + overdrive_biblio_provider_fixture: OverdriveBibliographicCoverageProviderFixture, + ): + """A bad or malformed GUID can't get coverage.""" + fixture = overdrive_biblio_provider_fixture + db = fixture.overdrive.db + + identifier = db.identifier() + identifier.identifier = "bad guid" + + error = '{"errorCode": "InvalidGuid", "message": "An invalid guid was given.", "token": "7aebce0e-2e88-41b3-b6d3-82bf15f8e1a2"}' + fixture.api.queue_response(200, content=error) + + failure = fixture.provider.process_item(identifier) + assert isinstance(failure, CoverageFailure) + assert False == failure.transient + assert "Invalid Overdrive ID: bad guid" == failure.exception + + # This is for when the GUID is well-formed but doesn't + # correspond to any real Overdrive book. + error = '{"errorCode": "NotFound", "message": "Not found in Overdrive collection.", "token": "7aebce0e-2e88-41b3-b6d3-82bf15f8e1a2"}' + fixture.api.queue_response(200, content=error) + + failure = fixture.provider.process_item(identifier) + assert isinstance(failure, CoverageFailure) + assert False == failure.transient + assert "ID not recognized by Overdrive: bad guid" == failure.exception + + def test_process_item_creates_presentation_ready_work( + self, + overdrive_biblio_provider_fixture: OverdriveBibliographicCoverageProviderFixture, + ): + """Test the normal workflow where we ask Overdrive for data, + Overdrive provides it, and we create a presentation-ready work. + """ + fixture = overdrive_biblio_provider_fixture + db = fixture.overdrive.db + + # Here's the book mentioned in overdrive_metadata.json. + identifier = db.identifier(identifier_type=Identifier.OVERDRIVE_ID) + identifier.identifier = "3896665d-9d81-4cac-bd43-ffc5066de1f5" + + # This book has no LicensePool. + assert [] == identifier.licensed_through + + # Run it through the OverdriveBibliographicCoverageProvider + raw, info = fixture.overdrive.sample_json("overdrive_metadata.json") + fixture.api.queue_response(200, content=raw) + + [result] = fixture.provider.process_batch([identifier]) + assert identifier == result + + # A LicensePool was created, not because we know anything + # about how we've licensed this book, but to have a place to + # store the information about what formats the book is + # available in. + [pool] = identifier.licensed_through + assert 0 == pool.licenses_owned + [lpdm1, lpdm2] = pool.delivery_mechanisms + names = [x.delivery_mechanism.name for x in pool.delivery_mechanisms] + assert sorted( + [ + "application/pdf (application/vnd.adobe.adept+xml)", + "Kindle via Amazon (Kindle DRM)", + ] + ) == sorted(names) + + # A Work was created and made presentation ready. + assert "Agile Documentation" == pool.work.title + assert True == pool.work.presentation_ready + + +class TestGenerateOverdriveAdvantageAccountList: + def test_generate_od_advantage_account_list(self, db: DatabaseTransactionFixture): + output_file_path = "test-output.csv" + circ_manager_name = "circ_man_name" + parent_library_name = "Parent" + parent_od_library_id = "parent_id" + child1_library_name = "child1" + child1_advantage_library_id = "1" + child1_token = "token1" + child2_library_name = "child2" + child2_advantage_library_id = "2" + child2_token = "token2" + client_key = "ck" + client_secret = "cs" + library_token = "lt" + + parent: Collection = db.collection( + name=parent_library_name, + protocol=ExternalIntegration.OVERDRIVE, + external_account_id=parent_od_library_id, + ) + child1: Collection = db.collection( + name=child1_library_name, + protocol=ExternalIntegration.OVERDRIVE, + external_account_id=child1_advantage_library_id, + ) + child1.parent = parent + overdrive_api = MagicMock() + overdrive_api.get_advantage_accounts.return_value = [ + OverdriveAdvantageAccount( + parent_od_library_id, + child1_advantage_library_id, + child1_library_name, + child1_token, + ), + OverdriveAdvantageAccount( + parent_od_library_id, + child2_advantage_library_id, + child2_library_name, + child2_token, + ), + ] + + overdrive_api.client_key.return_value = bytes(client_key, "utf-8") + overdrive_api.client_secret.return_value = bytes(client_secret, "utf-8") + type(overdrive_api).collection_token = PropertyMock(return_value=library_token) + + with patch( + "api.overdrive.GenerateOverdriveAdvantageAccountList._create_overdrive_api" + ) as create_od_api: + create_od_api.return_value = overdrive_api + GenerateOverdriveAdvantageAccountList(db.session).do_run( + cmd_args=[ + "--output-file-path", + output_file_path, + "--circulation-manager-name", + circ_manager_name, + ] + ) + + with open(output_file_path, newline="") as csv_file: + csvreader = csv.reader(csv_file) + for index, row in enumerate(csvreader): + if index == 0: + assert "cm" == row[0] + assert "collection" == row[1] + assert "overdrive_library_id" == row[2] + assert "client_key" == row[3] + assert "client_secret" == row[4] + assert "library_token" == row[5] + assert "advantage_name" == row[6] + assert "advantage_id" == row[7] + assert "advantage_token" == row[8] + assert "already_configured" == row[9] + elif index == 1: + assert circ_manager_name == row[0] + assert parent_library_name == row[1] + assert parent_od_library_id == row[2] + assert client_key == row[3] + assert client_secret == row[4] + assert library_token == row[5] + assert child1_library_name == row[6] + assert child1_advantage_library_id == row[7] + assert child1_token == row[8] + assert "True" == row[9] + else: + assert circ_manager_name == row[0] + assert parent_library_name == row[1] + assert parent_od_library_id == row[2] + assert client_key == row[3] + assert client_secret == row[4] + assert library_token == row[5] + assert child2_library_name == row[6] + assert child2_advantage_library_id == row[7] + assert child2_token == row[8] + assert "False" == row[9] + last_index = index + + os.remove(output_file_path) + assert last_index == 2 + overdrive_api.client_key.assert_called_once() + overdrive_api.client_secret.assert_called_once() + overdrive_api.get_advantage_accounts.assert_called_once() diff --git a/tests/core/conftest.py b/tests/core/conftest.py index 9ea0933198..15b69bb34a 100644 --- a/tests/core/conftest.py +++ b/tests/core/conftest.py @@ -5,7 +5,6 @@ "tests.fixtures.library", "tests.fixtures.opds2_files", "tests.fixtures.opds_files", - "tests.fixtures.overdrive", "tests.fixtures.s3", "tests.fixtures.sample_covers", "tests.fixtures.search", diff --git a/tests/core/files/overdrive/overdrive_availability_not_found.json b/tests/core/files/overdrive/overdrive_availability_not_found.json deleted file mode 100644 index 99a61e65f1..0000000000 --- a/tests/core/files/overdrive/overdrive_availability_not_found.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "errorCode": "NotFound", - "message": "The requested resource could not be found.", - "token": "60a18218-0d25-42b8-80c3-0bf9df782f1b" -} diff --git a/tests/core/test_overdrive.py b/tests/core/test_overdrive.py deleted file mode 100644 index eb1cc1e5cc..0000000000 --- a/tests/core/test_overdrive.py +++ /dev/null @@ -1,1127 +0,0 @@ -import json -import logging - -import pytest - -from core.config import CannotLoadConfiguration -from core.coverage import CoverageFailure -from core.integration.goals import Goals -from core.metadata_layer import LinkData -from core.model import ( - Contributor, - DeliveryMechanism, - Edition, - ExternalIntegration, - Hyperlink, - Identifier, - Measurement, - MediaTypes, - Representation, - Subject, -) -from core.overdrive import ( - OverdriveAdvantageAccount, - OverdriveBibliographicCoverageProvider, - OverdriveConstants, - OverdriveCoreAPI, - OverdriveRepresentationExtractor, -) -from core.scripts import RunCollectionCoverageProviderScript -from core.util.http import BadResponseException -from core.util.string_helpers import base64 -from tests.api.mockapi.overdrive import MockOverdriveCoreAPI -from tests.core.mock import MockRequestsResponse -from tests.core.util.test_mock_web_server import MockAPIServer, MockAPIServerResponse -from tests.fixtures.database import DatabaseTransactionFixture -from tests.fixtures.overdrive import OverdriveFixture, OverdriveWithAPIFixture - - -@pytest.fixture -def mock_web_server(): - """A test fixture that yields a usable mock web server for the lifetime of the test.""" - _server = MockAPIServer("127.0.0.1", 10256) - _server.start() - logging.info(f"starting mock web server on {_server.address()}:{_server.port()}") - yield _server - logging.info( - f"shutting down mock web server on {_server.address()}:{_server.port()}" - ) - _server.stop() - - -class TestOverdriveCoreAPI: - def test_errors_not_retried( - self, - overdrive_with_api_fixture: OverdriveWithAPIFixture, - mock_web_server: MockAPIServer, - ): - session = overdrive_with_api_fixture.overdrive.transaction.session - library = overdrive_with_api_fixture.overdrive.transaction.default_library() - collection = MockOverdriveCoreAPI.mock_collection(session, library) - - # Enqueue a response for the request that the server will make for a token. - _r = MockAPIServerResponse() - _r.status_code = 200 - _r.set_content( - b"""{ - "access_token": "x", - "expires_in": 23 - } - """ - ) - mock_web_server.enqueue_response("POST", "/oauth/token", _r) - - api = OverdriveCoreAPI(session, collection) - api._hosts["oauth_host"] = mock_web_server.url("/oauth") - - # Try a get() call for each error code - for code in [404]: - _r = MockAPIServerResponse() - _r.status_code = code - mock_web_server.enqueue_response("GET", "/a/b/c", _r) - _status, _, _ = api.get(mock_web_server.url("/a/b/c")) - assert _status == code - - for code in [400, 403, 500, 501, 502, 503]: - _r = MockAPIServerResponse() - _r.status_code = code - - # The default is to retry 5 times, so enqueue 5 responses. - for i in range(0, 6): - mock_web_server.enqueue_response("GET", "/a/b/c", _r) - try: - api.get(mock_web_server.url("/a/b/c")) - except BadResponseException: - pass - - # Exactly one request was made for each error code, plus one for a token - assert len(mock_web_server.requests()) == 8 - - def test_constructor_makes_no_requests( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - session = overdrive_with_api_fixture.overdrive.transaction.session - library = overdrive_with_api_fixture.overdrive.transaction.default_library() - # Invoking the OverdriveCoreAPI constructor does not, by itself, - # make any HTTP requests. - collection = MockOverdriveCoreAPI.mock_collection(session, library) - - class NoRequests(OverdriveCoreAPI): - MSG = "This is a unit test, you can't make HTTP requests!" - - def no_requests(self, *args, **kwargs): - raise Exception(self.MSG) - - _do_get = no_requests - _do_post = no_requests - _make_request = no_requests - - api = NoRequests(session, collection) - - # Attempting to access .token or .collection_token _will_ - # try to make an HTTP request. - for field in "token", "collection_token": - with pytest.raises(Exception) as excinfo: - getattr(api, field) - assert api.MSG in str(excinfo.value) - - def test_ils_name(self, overdrive_with_api_fixture: OverdriveWithAPIFixture): - fixture = overdrive_with_api_fixture - transaction = fixture.overdrive.transaction - - """The 'ils_name' setting (defined in - MockOverdriveCoreAPI.mock_collection) is available through - OverdriveCoreAPI.ils_name(). - """ - assert "e" == fixture.api.ils_name(transaction.default_library()) - - # The value must be explicitly set for a given library, or - # else the default will be used. - l2 = transaction.library() - assert "default" == fixture.api.ils_name(l2) - - def test_make_link_safe(self): - # Unsafe characters are escaped. - assert "http://foo.com?q=%2B%3A%7B%7D" == OverdriveCoreAPI.make_link_safe( - "http://foo.com?q=+:{}" - ) - - # Links to version 1 of the availability API are converted - # to links to version 2. - v1 = "https://qa.api.overdrive.com/v1/collections/abcde/products/12345/availability" - v2 = "https://qa.api.overdrive.com/v2/collections/abcde/products/12345/availability" - assert v2 == OverdriveCoreAPI.make_link_safe(v1) - - # We also handle the case of a trailing slash, just in case Overdrive - # starts serving links with trailing slashes. - v1 = v1 + "/" - v2 = v2 + "/" - assert v2 == OverdriveCoreAPI.make_link_safe(v1) - - # Links to other endpoints are not converted - leave_alone = "https://qa.api.overdrive.com/v1/collections/abcde/products/12345" - assert leave_alone == OverdriveCoreAPI.make_link_safe(leave_alone) - - def test_hosts(self, overdrive_with_api_fixture: OverdriveWithAPIFixture): - fixture = overdrive_with_api_fixture - session = fixture.overdrive.transaction.session - c = OverdriveCoreAPI - - # By default, OverdriveCoreAPI is initialized with the production - # set of hostnames. - assert fixture.api.hosts() == c.HOSTS[OverdriveConstants.PRODUCTION_SERVERS] - - # You can instead initialize it to use the testing set of - # hostnames. - def api_with_setting(x): - config = fixture.overdrive.collection.integration_configuration - DatabaseTransactionFixture.set_settings(config, overdrive_server_nickname=x) - return c(session, fixture.overdrive.collection) - - testing = api_with_setting(OverdriveConstants.TESTING_SERVERS) - assert testing.hosts() == c.HOSTS[OverdriveConstants.TESTING_SERVERS] - - # If the setting doesn't make sense, we default to production - # hostnames. - bad = api_with_setting("nonsensical") - assert bad.hosts() == c.HOSTS[OverdriveConstants.PRODUCTION_SERVERS] - - def test_endpoint(self, overdrive_with_api_fixture: OverdriveWithAPIFixture): - fixture = overdrive_with_api_fixture - - # The .endpoint() method performs string interpolation, including - # the names of servers. - template = ( - "%(host)s %(patron_host)s %(oauth_host)s %(oauth_patron_host)s %(extra)s" - ) - result = fixture.api.endpoint(template, extra="val") - - # The host names and the 'extra' argument have been used to - # fill in the string interpolations. - expect_args = dict(fixture.api.hosts()) - expect_args["extra"] = "val" - assert result == template % expect_args - - # The string has been completely interpolated. - assert "%" not in result - - # Once interpolation has happened, doing it again has no effect. - assert result == fixture.api.endpoint(result, extra="something else") - - # This is important because an interpolated URL may superficially - # appear to contain extra formatting characters. - assert result + "%3A" == fixture.api.endpoint( - result + "%3A", extra="something else" - ) - - def test_token_authorization_header( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - - # Verify that the Authorization header needed to get an access - # token for a given collection is encoded properly. - assert fixture.api.token_authorization_header == "Basic YTpi" - assert ( - fixture.api.token_authorization_header - == "Basic " - + base64.standard_b64encode( - b"%s:%s" % (fixture.api.client_key(), fixture.api.client_secret()) - ) - ) - - def test_token_post_success( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - transaction = fixture.overdrive.transaction - - fixture.api.queue_response(200, content="some content") - response = fixture.api.token_post(transaction.fresh_url(), "the payload") - assert 200 == response.status_code - assert fixture.api.access_token_response.content == response.content - - def test_get_success(self, overdrive_with_api_fixture: OverdriveWithAPIFixture): - fixture = overdrive_with_api_fixture - transaction = fixture.overdrive.transaction - - fixture.api.queue_response(200, content="some content") - status_code, headers, content = fixture.api.get(transaction.fresh_url(), {}) - assert 200 == status_code - assert b"some content" == content - - def test_failure_to_get_library_is_fatal( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - - fixture.api.queue_response(500) - with pytest.raises(BadResponseException) as excinfo: - fixture.api.get_library() - assert "Got status code 500" in str(excinfo.value) - - def test_error_getting_library( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - session = fixture.overdrive.transaction.session - - class MisconfiguredOverdriveCoreAPI(MockOverdriveCoreAPI): - """This Overdrive client has valid credentials but the library - can't be found -- probably because the library ID is wrong.""" - - def get_library(self): - return { - "errorCode": "Some error", - "message": "Some message.", - "token": "abc-def-ghi", - } - - # Just instantiating the API doesn't cause this error. - api = MisconfiguredOverdriveCoreAPI(session, fixture.overdrive.collection) - - # But trying to access the collection token will cause it. - with pytest.raises(CannotLoadConfiguration) as excinfo: - api.collection_token() - assert ( - "Overdrive credentials are valid but could not fetch library: Some message." - in str(excinfo.value) - ) - - def test_401_on_get_refreshes_bearer_token( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - transaction = fixture.overdrive.transaction - - # We have a token. - assert "bearer token" == fixture.api.token - - # But then we try to GET, and receive a 401. - fixture.api.queue_response(401) - - # We refresh the bearer token. (This happens in - # MockOverdriveCoreAPI.token_post, so we don't mock the response - # in the normal way.) - fixture.api.access_token_response = fixture.api.mock_access_token_response( - "new bearer token" - ) - - # Then we retry the GET and it succeeds this time. - fixture.api.queue_response(200, content="at last, the content") - - status_code, headers, content = fixture.api.get(transaction.fresh_url(), {}) - - assert 200 == status_code - assert b"at last, the content" == content - - # The bearer token has been updated. - assert "new bearer token" == fixture.api.token - - def test_credential_refresh_success( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - - """Verify the process of refreshing the Overdrive bearer token.""" - # Perform the initial credential check. - fixture.api.check_creds() - credential = fixture.api.credential_object(lambda x: x) - assert "bearer token" == credential.credential - assert fixture.api.token == credential.credential - - fixture.api.access_token_response = fixture.api.mock_access_token_response( - "new bearer token" - ) - - # Refresh the credentials and the token will change to - # the mocked value. - fixture.api.refresh_creds(credential) - assert "new bearer token" == credential.credential - assert fixture.api.token == credential.credential - - def test_401_after_token_refresh_raises_error( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - - assert "bearer token" == fixture.api.token - - # We try to GET and receive a 401. - fixture.api.queue_response(401) - - # We refresh the bearer token. - fixture.api.access_token_response = fixture.api.mock_access_token_response( - "new bearer token" - ) - - # Then we retry the GET but we get another 401. - fixture.api.queue_response(401) - - credential = fixture.api.credential_object(lambda x: x) - fixture.api.refresh_creds(credential) - - # That raises a BadResponseException - with pytest.raises(BadResponseException) as excinfo: - fixture.api.get_library() - assert "Bad response from" in str(excinfo.value) - assert "Something's wrong with the Overdrive OAuth Bearer Token!" in str( - excinfo.value - ) - - def test_401_during_refresh_raises_error( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - - """If we fail to refresh the OAuth bearer token, an exception is - raised. - """ - fixture.api.access_token_response = MockRequestsResponse(401, {}, "") - with pytest.raises(BadResponseException) as excinfo: - fixture.api.refresh_creds(None) - assert "Got status code 401" in str(excinfo.value) - assert "can only continue on: 200." in str(excinfo.value) - - def test_advantage_differences( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - transaction = overdrive_with_api_fixture.overdrive.transaction - session = transaction.session - - # Test the differences between Advantage collections and - # regular Overdrive collections. - - # Here's a regular Overdrive collection. - main = transaction.collection( - protocol=ExternalIntegration.OVERDRIVE, - external_account_id="1", - ) - DatabaseTransactionFixture.set_settings( - main.integration_configuration, "overdrive_client_key", "user" - ) - DatabaseTransactionFixture.set_settings( - main.integration_configuration, "overdrive_client_secret", "password" - ) - DatabaseTransactionFixture.set_settings( - main.integration_configuration, "overdrive_website_id", "100" - ) - DatabaseTransactionFixture.set_settings( - main.integration_configuration, "ils_name", "default" - ) - - # Here's an Overdrive API client for that collection. - overdrive_main = MockOverdriveCoreAPI(session, main) - - # Note the "library" endpoint. - assert ( - "https://api.overdrive.com/v1/libraries/1" - == overdrive_main._library_endpoint - ) - - # The advantage_library_id of a non-Advantage Overdrive account - # is always -1. - assert "1" == overdrive_main.library_id() - assert -1 == overdrive_main.advantage_library_id - - # Here's an Overdrive Advantage collection associated with the - # main Overdrive collection. - child = transaction.collection( - protocol=ExternalIntegration.OVERDRIVE, - external_account_id="2", - ) - child.parent = main - overdrive_child = MockOverdriveCoreAPI(session, child) - - # In URL-space, the "library" endpoint for the Advantage - # collection is beneath the the parent collection's "library" - # endpoint. - assert ( - "https://api.overdrive.com/v1/libraries/1/advantageAccounts/2" - == overdrive_child._library_endpoint - ) - - # The advantage_library_id of an Advantage collection is the - # numeric value of its external_account_id. - assert "2" == overdrive_child.library_id() - assert 2 == overdrive_child.advantage_library_id - - def test__get_book_list_page( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - fixture = overdrive_with_api_fixture - - # Test the internal method that retrieves a list of books and - # preprocesses it. - - class MockExtractor: - def link(self, content, rel_to_follow): - self.link_called_with = (content, rel_to_follow) - return "http://next-page/" - - def availability_link_list(self, content): - self.availability_link_list_called_with = content - return ["an availability queue"] - - original_data = {"key": "value"} - for content in ( - original_data, - json.dumps(original_data), - json.dumps(original_data).encode("utf8"), - ): - extractor = MockExtractor() - fixture.api.queue_response(200, content=content) - result = fixture.api._get_book_list_page( - "http://first-page/", "some-rel", extractor - ) - - # A single request was made to the requested page. - (url, headers, body) = fixture.api.requests.pop() - assert len(fixture.api.requests) == 0 - assert url == "http://first-page/" - - # The extractor was used to extract a link to the page - # with rel="some-rel". - # - # Note that the Python data structure (`original_data`) is passed in, - # regardless of whether the mock response body is a Python - # data structure, a bytestring, or a Unicode string. - assert extractor.link_called_with == (original_data, "some-rel") - - # The data structure was also passed into the extractor's - # availability_link_list() method. - assert extractor.availability_link_list_called_with == original_data - - # The final result is a queue of availability data (from - # this page) and a link to the next page. - assert result == (["an availability queue"], "http://next-page/") - - -class TestOverdriveRepresentationExtractor: - def test_availability_info(self, overdrive_fixture: OverdriveFixture): - data, raw = overdrive_fixture.sample_json("overdrive_book_list.json") - availability = OverdriveRepresentationExtractor.availability_link_list(raw) - # Every item in the list has a few important values. - for item in availability: - for key in "availability_link", "author_name", "id", "title", "date_added": - assert key in item - - # Also run a spot check on the actual values. - spot = availability[0] - assert "210bdcad-29b7-445f-8d05-cdbb40abc03a" == spot["id"] - assert "King and Maxwell" == spot["title"] - assert "David Baldacci" == spot["author_name"] - assert "2013-11-12T14:13:00-05:00" == spot["date_added"] - - def test_availability_info_missing_data(self, overdrive_fixture: OverdriveFixture): - # overdrive_book_list_missing_data.json has two products. One - # only has a title, the other only has an ID. - data, raw = overdrive_fixture.sample_json( - "overdrive_book_list_missing_data.json" - ) - [item] = OverdriveRepresentationExtractor.availability_link_list(raw) - - # We got a data structure -- full of missing data -- for the - # item that has an ID. - assert "i only have an id" == item["id"] - assert None == item["title"] - assert None == item["author_name"] - assert None == item["date_added"] - - # We did not get a data structure for the item that only has a - # title, because an ID is required -- otherwise we don't know - # what book we're talking about. - - def test_link(self, overdrive_fixture: OverdriveFixture): - data, raw = overdrive_fixture.sample_json("overdrive_book_list.json") - expect = OverdriveCoreAPI.make_link_safe( - "http://api.overdrive.com/v1/collections/collection-id/products?limit=300&offset=0&lastupdatetime=2014-04-28%2009:25:09&sort=popularity:desc&formats=ebook-epub-open,ebook-epub-adobe,ebook-pdf-adobe,ebook-pdf-open" - ) - assert expect == OverdriveRepresentationExtractor.link(raw, "first") - - def test_book_info_to_circulation( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - # Tests that can convert an overdrive json block into a CirculationData object. - overdrive = overdrive_with_api_fixture.overdrive - session = overdrive.transaction.session - - raw, info = overdrive.sample_json("overdrive_availability_information.json") - extractor = OverdriveRepresentationExtractor(overdrive_with_api_fixture.api) - circulationdata = extractor.book_info_to_circulation(info) - - # NOTE: It's not realistic for licenses_available and - # patrons_in_hold_queue to both be nonzero; this is just to - # verify that the test picks up whatever data is in the - # document. - assert 3 == circulationdata.licenses_owned - assert 1 == circulationdata.licenses_available - assert 10 == circulationdata.patrons_in_hold_queue - - # Related IDs. - identifier = circulationdata.primary_identifier(session) - assert (Identifier.OVERDRIVE_ID, "2a005d55-a417-4053-b90d-7a38ca6d2065") == ( - identifier.type, - identifier.identifier, - ) - - def test_book_info_to_circulation_advantage( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - # Overdrive Advantage accounts (a.k.a. "child" or "sub" accounts derive - # different information from the same API responses as "main" Overdrive - # accounts. - overdrive = overdrive_with_api_fixture.overdrive - raw, info = overdrive.sample_json("overdrive_availability_advantage.json") - - extractor = OverdriveRepresentationExtractor(overdrive_with_api_fixture.api) - # Calling in the context of a main account should return a count of - # the main account and any shared sub account owned and available. - consortial_data = extractor.book_info_to_circulation(info) - assert 10 == consortial_data.licenses_owned - assert 10 == consortial_data.licenses_available - - class MockAPI: - # Pretend to be an API for an Overdrive Advantage collection with - # library ID 61. - advantage_library_id = 61 - - extractor = OverdriveRepresentationExtractor(MockAPI()) - advantage_data = extractor.book_info_to_circulation(info) - assert 1 == advantage_data.licenses_owned - assert 1 == advantage_data.licenses_available - - # Both collections have the same information about active - # holds, because that information is not split out by - # collection. - assert 0 == advantage_data.patrons_in_hold_queue - assert 0 == consortial_data.patrons_in_hold_queue - - # If for whatever reason Overdrive doesn't mention the - # relevant collection at all, no collection-specific - # information is gleaned. - # - # TODO: It would probably be better not to return a - # CirculationData object at all, but this shouldn't happen in - # a real scenario. - class MockAPI2: - # Pretend to be an API for an Overdrive Advantage collection with - # library ID 62. - advantage_library_id = 62 - - extractor = OverdriveRepresentationExtractor(MockAPI2()) - advantage_data = extractor.book_info_to_circulation(info) - assert 0 == advantage_data.licenses_owned - assert 0 == advantage_data.licenses_available - - class MockAPI3: - # Pretend to be an API for an Overdrive Advantage collection with - # library ID 63 which contains shared copies. - advantage_library_id = 63 - - extractor = OverdriveRepresentationExtractor(MockAPI3()) - advantage_data = extractor.book_info_to_circulation(info) - # since these copies are shared and counted as part of the main - # context we do not count them here. - assert 0 == advantage_data.licenses_owned - assert 0 == advantage_data.licenses_available - - def test_not_found_error_to_circulationdata( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - overdrive = overdrive_with_api_fixture.overdrive - transaction = overdrive.transaction - raw, info = overdrive.sample_json("overdrive_availability_not_found.json") - - # By default, a "NotFound" error can't be converted to a - # CirculationData object, because we don't know _which_ book it - # was that wasn't found. - extractor = OverdriveRepresentationExtractor(overdrive_with_api_fixture.api) - m = extractor.book_info_to_circulation - assert None == m(info) - - # However, if an ID was added to `info` ahead of time (as the - # circulation code does), we do know, and we can create a - # CirculationData. - identifier = transaction.identifier(identifier_type=Identifier.OVERDRIVE_ID) - info["id"] = identifier.identifier - data = m(info) - assert identifier == data.primary_identifier(transaction.session) - assert 0 == data.licenses_owned - assert 0 == data.licenses_available - assert 0 == data.patrons_in_hold_queue - - def test_book_info_with_metadata(self, overdrive_fixture: OverdriveFixture): - # Tests that can convert an overdrive json block into a Metadata object. - - raw, info = overdrive_fixture.sample_json("overdrive_metadata.json") - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - - assert "Agile Documentation" == metadata.title - assert ( - "Agile Documentation A Pattern Guide to Producing Lightweight Documents for Software Projects" - == metadata.sort_title - ) - assert ( - "A Pattern Guide to Producing Lightweight Documents for Software Projects" - == metadata.subtitle - ) - assert Edition.BOOK_MEDIUM == metadata.medium - assert "Wiley Software Patterns" == metadata.series - assert "eng" == metadata.language - assert "Wiley" == metadata.publisher - assert "John Wiley & Sons, Inc." == metadata.imprint - assert 2005 == metadata.published.year - assert 1 == metadata.published.month - assert 31 == metadata.published.day - - [author] = metadata.contributors - assert "RĂ¼ping, Andreas" == author.sort_name - assert "Andreas Rüping" == author.display_name - assert [Contributor.AUTHOR_ROLE] == author.roles - - subjects = sorted(metadata.subjects, key=lambda x: x.identifier) - - assert [ - ("Computer Technology", Subject.OVERDRIVE, 100), - ("Nonfiction", Subject.OVERDRIVE, 100), - ("Object Technologies - Miscellaneous", "tag", 1), - ] == [(x.identifier, x.type, x.weight) for x in subjects] - - # Related IDs. - assert (Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5") == ( - metadata.primary_identifier.type, - metadata.primary_identifier.identifier, - ) - - ids = [(x.type, x.identifier) for x in metadata.identifiers] - - # The original data contains an actual ASIN and ISBN, plus a blank - # ASIN and three invalid ISBNs: one which is common placeholder - # text, one which is mis-typed and has a bad check digit, and one - # which has an invalid character; the bad identifiers do not show - # up here. - assert [ - (Identifier.ASIN, "B000VI88N2"), - (Identifier.ISBN, "9780470856246"), - (Identifier.OVERDRIVE_ID, "3896665d-9d81-4cac-bd43-ffc5066de1f5"), - ] == sorted(ids) - - # Available formats. - [kindle, pdf] = sorted( - metadata.circulation.formats, key=lambda x: x.content_type - ) - assert DeliveryMechanism.KINDLE_CONTENT_TYPE == kindle.content_type - assert DeliveryMechanism.KINDLE_DRM == kindle.drm_scheme - - assert Representation.PDF_MEDIA_TYPE == pdf.content_type - assert DeliveryMechanism.ADOBE_DRM == pdf.drm_scheme - - # Links to various resources. - shortd, image, longd = sorted(metadata.links, key=lambda x: x.rel) - - assert Hyperlink.DESCRIPTION == longd.rel - assert longd.content.startswith("
Software documentation") - - assert Hyperlink.SHORT_DESCRIPTION == shortd.rel - assert shortd.content.startswith("
Software documentation") - assert len(shortd.content) < len(longd.content) - - assert Hyperlink.IMAGE == image.rel - assert ( - "http://images.contentreserve.com/ImageType-100/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg100.jpg" - == image.href - ) - - thumbnail = image.thumbnail - - assert Hyperlink.THUMBNAIL_IMAGE == thumbnail.rel - assert ( - "http://images.contentreserve.com/ImageType-200/0128-1/%7B3896665D-9D81-4CAC-BD43-FFC5066DE1F5%7DImg200.jpg" - == thumbnail.href - ) - - # Measurements associated with the book. - - measurements = metadata.measurements - popularity = [ - x for x in measurements if x.quantity_measured == Measurement.POPULARITY - ][0] - assert 2 == popularity.value - - rating = [x for x in measurements if x.quantity_measured == Measurement.RATING][ - 0 - ] - assert 1 == rating.value - - # Request only the bibliographic information. - metadata = OverdriveRepresentationExtractor.book_info_to_metadata( - info, include_bibliographic=True, include_formats=False - ) - - assert "Agile Documentation" == metadata.title - assert None == metadata.circulation - - # Request only the format information. - metadata = OverdriveRepresentationExtractor.book_info_to_metadata( - info, include_bibliographic=False, include_formats=True - ) - - assert None == metadata.title - - [kindle, pdf] = sorted( - metadata.circulation.formats, key=lambda x: x.content_type - ) - assert DeliveryMechanism.KINDLE_CONTENT_TYPE == kindle.content_type - assert DeliveryMechanism.KINDLE_DRM == kindle.drm_scheme - - assert Representation.PDF_MEDIA_TYPE == pdf.content_type - assert DeliveryMechanism.ADOBE_DRM == pdf.drm_scheme - - def test_audiobook_info(self, overdrive_fixture: OverdriveFixture): - # This book will be available in three formats: a link to the - # Overdrive Read website, a manifest file that SimplyE can - # download, and the legacy format used by the mobile app - # called 'Overdrive'. - raw, info = overdrive_fixture.sample_json("audiobook.json") - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - streaming, manifest, legacy = sorted( - metadata.circulation.formats, key=lambda x: x.content_type - ) - assert DeliveryMechanism.STREAMING_AUDIO_CONTENT_TYPE == streaming.content_type - assert ( - MediaTypes.OVERDRIVE_AUDIOBOOK_MANIFEST_MEDIA_TYPE == manifest.content_type - ) - assert "application/x-od-media" == legacy.content_type - - def test_book_info_with_sample(self, overdrive_fixture: OverdriveFixture): - # This book has two samples; one available as a direct download and - # one available through a manifest file. - raw, info = overdrive_fixture.sample_json("has_sample.json") - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - samples = [x for x in metadata.links if x.rel == Hyperlink.SAMPLE] - epub_sample, manifest_sample = sorted(samples, key=lambda x: x.media_type) - - # Here's the direct download. - assert ( - "http://excerpts.contentreserve.com/FormatType-410/1071-1/9BD/24F/82/BridesofConvenienceBundle9781426803697.epub" - == epub_sample.href - ) - assert MediaTypes.EPUB_MEDIA_TYPE == epub_sample.media_type - - # Here's the manifest. - assert ( - "https://samples.overdrive.com/?crid=9BD24F82-35C0-4E0A-B5E7-BCFED07835CF&.epub-sample.overdrive.com" - == manifest_sample.href - ) - # Assert we have the end content type of the sample, no DRM formats - assert "text/html" == manifest_sample.media_type - - def test_book_info_with_unknown_sample(self, overdrive_fixture: OverdriveFixture): - raw, info = overdrive_fixture.sample_json("has_sample.json") - - # Just use one format, and change a sample type to unknown - # Only one (known sample) should be extracted then - info["formats"] = [info["formats"][1]] - info["formats"][0]["samples"][1]["formatType"] = "overdrive-unknown" - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - samples = [x for x in metadata.links if x.rel == Hyperlink.SAMPLE] - - assert 1 == len(samples) - assert samples[0].media_type == MediaTypes.EPUB_MEDIA_TYPE - - def test_book_info_with_grade_levels(self, overdrive_fixture: OverdriveFixture): - raw, info = overdrive_fixture.sample_json("has_grade_levels.json") - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - - grade_levels = sorted( - x.identifier for x in metadata.subjects if x.type == Subject.GRADE_LEVEL - ) - assert ["Grade 4", "Grade 5", "Grade 6", "Grade 7", "Grade 8"] == grade_levels - - def test_book_info_with_awards(self, overdrive_fixture: OverdriveFixture): - raw, info = overdrive_fixture.sample_json("has_awards.json") - metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info) - - [awards] = [ - x - for x in metadata.measurements - if Measurement.AWARDS == x.quantity_measured - ] - assert 1 == awards.value - assert 1 == awards.weight - - def test_image_link_to_linkdata(self): - def m(link): - return OverdriveRepresentationExtractor.image_link_to_linkdata(link, "rel") - - # Test missing data. - assert None == m(None) - assert None == m(dict()) - - # Test an ordinary success case. - url = "http://images.overdrive.com/image.png" - type = "image/type" - data = m(dict(href=url, type=type)) - assert isinstance(data, LinkData) - assert url == data.href - assert type == data.media_type - - # Test a case where no media type is provided. - data = m(dict(href=url)) - assert None == data.media_type - - # Verify that invalid URLs are made link-safe. - data = m(dict(href="http://api.overdrive.com/v1/foo:bar")) - assert "http://api.overdrive.com/v1/foo%3Abar" == data.href - - # Stand-in cover images are detected and filtered out. - data = m( - dict( - href="https://img1.od-cdn.com/ImageType-100/0293-1/{00000000-0000-0000-0000-000000000002}Img100.jpg" - ) - ) - assert None == data - - def test_internal_formats(self): - # Overdrive's internal format names may correspond to one or more - # delivery mechanisms. - def assert_formats(overdrive_name, *expect): - actual = OverdriveRepresentationExtractor.internal_formats(overdrive_name) - assert list(expect) == list(actual) - - # Most formats correspond to one delivery mechanism. - assert_formats( - "ebook-pdf-adobe", (MediaTypes.PDF_MEDIA_TYPE, DeliveryMechanism.ADOBE_DRM) - ) - - assert_formats( - "ebook-epub-open", (MediaTypes.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM) - ) - - # ebook-overdrive and audiobook-overdrive each correspond to - # two delivery mechanisms. - assert_formats( - "ebook-overdrive", - ( - MediaTypes.OVERDRIVE_EBOOK_MANIFEST_MEDIA_TYPE, - DeliveryMechanism.LIBBY_DRM, - ), - ( - DeliveryMechanism.STREAMING_TEXT_CONTENT_TYPE, - DeliveryMechanism.STREAMING_DRM, - ), - ) - - assert_formats( - "audiobook-overdrive", - ( - MediaTypes.OVERDRIVE_AUDIOBOOK_MANIFEST_MEDIA_TYPE, - DeliveryMechanism.LIBBY_DRM, - ), - ( - DeliveryMechanism.STREAMING_AUDIO_CONTENT_TYPE, - DeliveryMechanism.STREAMING_DRM, - ), - ) - - # An unrecognized format does not correspond to any delivery - # mechanisms. - assert_formats("no-such-format") - - -class TestOverdriveAdvantageAccount: - def test_no_advantage_accounts( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - """When there are no Advantage accounts, get_advantage_accounts() - returns an empty list. - """ - fixture = overdrive_with_api_fixture - fixture.api.queue_collection_token() - assert [] == fixture.api.get_advantage_accounts() - - def test_from_representation( - self, overdrive_with_api_fixture: OverdriveWithAPIFixture - ): - """Test the creation of OverdriveAdvantageAccount objects - from Overdrive's representation of a list of accounts. - """ - fixture = overdrive_with_api_fixture - raw, data = fixture.overdrive.sample_json("advantage_accounts.json") - [ac1, ac2] = OverdriveAdvantageAccount.from_representation(raw) - - # The two Advantage accounts have the same parent library ID. - assert "1225" == ac1.parent_library_id - assert "1225" == ac2.parent_library_id - - # But they have different names and library IDs. - assert "3" == ac1.library_id - assert "The Other Side of Town Library" == ac1.name - - assert "9" == ac2.library_id - assert "The Common Community Library" == ac2.name - - def test_to_collection(self, overdrive_with_api_fixture: OverdriveWithAPIFixture): - # Test that we can turn an OverdriveAdvantageAccount object into - # a Collection object. - fixture = overdrive_with_api_fixture - transaction, session = ( - fixture.overdrive.transaction, - fixture.overdrive.transaction.session, - ) - - account = OverdriveAdvantageAccount( - "parent_id", - "child_id", - "Library Name", - "token value", - ) - - # We can't just create a Collection object for this object because - # the parent doesn't exist. - with pytest.raises(ValueError) as excinfo: - account.to_collection(session) - assert "Cannot create a Collection whose parent does not already exist." in str( - excinfo.value - ) - - # So, create a Collection to be the parent. - parent = transaction.collection( - name="Parent", - protocol=ExternalIntegration.OVERDRIVE, - external_account_id="parent_id", - ) - - # Now it works. - p, collection = account.to_collection(session) - assert p == parent - assert parent == collection.parent - assert collection.external_account_id == account.library_id - assert ExternalIntegration.LICENSE_GOAL == collection.external_integration.goal - assert ExternalIntegration.OVERDRIVE == collection.protocol - assert Goals.LICENSE_GOAL == collection.integration_configuration.goal - assert ExternalIntegration.OVERDRIVE == collection.protocol - - # To ensure uniqueness, the collection was named after its - # parent. - assert f"{parent.name} / {account.name}" == collection.name - - -class OverdriveBibliographicCoverageProviderFixture: - overdrive: OverdriveFixture - provider: OverdriveBibliographicCoverageProvider - api: MockOverdriveCoreAPI - - -@pytest.fixture -def overdrive_biblio_provider_fixture( - overdrive_fixture: OverdriveFixture, -) -> OverdriveBibliographicCoverageProviderFixture: - fix = OverdriveBibliographicCoverageProviderFixture() - fix.overdrive = overdrive_fixture - fix.provider = OverdriveBibliographicCoverageProvider( - overdrive_fixture.collection, api_class=MockOverdriveCoreAPI - ) - fix.api = fix.provider.api - return fix - - -class TestOverdriveBibliographicCoverageProvider: - """Test the code that looks up bibliographic information from Overdrive.""" - - def test_script_instantiation( - self, - overdrive_biblio_provider_fixture: OverdriveBibliographicCoverageProviderFixture, - ): - """Test that RunCoverageProviderScript can instantiate - the coverage provider. - """ - - fixture = overdrive_biblio_provider_fixture - transaction = fixture.overdrive.transaction - - script = RunCollectionCoverageProviderScript( - OverdriveBibliographicCoverageProvider, - transaction.session, - api_class=MockOverdriveCoreAPI, - ) - [provider] = script.providers - assert isinstance(provider, OverdriveBibliographicCoverageProvider) - assert isinstance(provider.api, MockOverdriveCoreAPI) - assert fixture.overdrive.collection == provider.collection - - def test_invalid_or_unrecognized_guid( - self, - overdrive_biblio_provider_fixture: OverdriveBibliographicCoverageProviderFixture, - ): - """A bad or malformed GUID can't get coverage.""" - fixture = overdrive_biblio_provider_fixture - transaction = fixture.overdrive.transaction - - identifier = transaction.identifier() - identifier.identifier = "bad guid" - fixture.api.queue_collection_token() - - error = '{"errorCode": "InvalidGuid", "message": "An invalid guid was given.", "token": "7aebce0e-2e88-41b3-b6d3-82bf15f8e1a2"}' - fixture.api.queue_response(200, content=error) - - failure = fixture.provider.process_item(identifier) - assert isinstance(failure, CoverageFailure) - assert False == failure.transient - assert "Invalid Overdrive ID: bad guid" == failure.exception - - # This is for when the GUID is well-formed but doesn't - # correspond to any real Overdrive book. - error = '{"errorCode": "NotFound", "message": "Not found in Overdrive collection.", "token": "7aebce0e-2e88-41b3-b6d3-82bf15f8e1a2"}' - fixture.api.queue_response(200, content=error) - - failure = fixture.provider.process_item(identifier) - assert isinstance(failure, CoverageFailure) - assert False == failure.transient - assert "ID not recognized by Overdrive: bad guid" == failure.exception - - def test_process_item_creates_presentation_ready_work( - self, - overdrive_biblio_provider_fixture: OverdriveBibliographicCoverageProviderFixture, - ): - """Test the normal workflow where we ask Overdrive for data, - Overdrive provides it, and we create a presentation-ready work. - """ - fixture = overdrive_biblio_provider_fixture - transaction = fixture.overdrive.transaction - - fixture.api.queue_collection_token() - - # Here's the book mentioned in overdrive_metadata.json. - identifier = transaction.identifier(identifier_type=Identifier.OVERDRIVE_ID) - identifier.identifier = "3896665d-9d81-4cac-bd43-ffc5066de1f5" - - # This book has no LicensePool. - assert [] == identifier.licensed_through - - # Run it through the OverdriveBibliographicCoverageProvider - raw, info = fixture.overdrive.sample_json("overdrive_metadata.json") - fixture.api.queue_response(200, content=raw) - - [result] = fixture.provider.process_batch([identifier]) - assert identifier == result - - # A LicensePool was created, not because we know anything - # about how we've licensed this book, but to have a place to - # store the information about what formats the book is - # available in. - [pool] = identifier.licensed_through - assert 0 == pool.licenses_owned - [lpdm1, lpdm2] = pool.delivery_mechanisms - names = [x.delivery_mechanism.name for x in pool.delivery_mechanisms] - assert sorted( - [ - "application/pdf (application/vnd.adobe.adept+xml)", - "Kindle via Amazon (Kindle DRM)", - ] - ) == sorted(names) - - # A Work was created and made presentation ready. - assert "Agile Documentation" == pool.work.title - assert True == pool.work.presentation_ready diff --git a/tests/core/test_scripts.py b/tests/core/test_scripts.py index bba5afac41..38154fb1c2 100644 --- a/tests/core/test_scripts.py +++ b/tests/core/test_scripts.py @@ -1,12 +1,10 @@ from __future__ import annotations -import csv import datetime import json -import os import random from io import StringIO -from unittest.mock import MagicMock, PropertyMock, call, patch +from unittest.mock import MagicMock, call, patch import pytest from freezegun import freeze_time @@ -42,7 +40,6 @@ from core.model.patron import Patron from core.monitor import CollectionMonitor, Monitor, ReaperMonitor from core.opds_import import OPDSImportMonitor -from core.overdrive import OverdriveAdvantageAccount from core.scripts import ( AddClassificationScript, CheckContributorNamesInDB, @@ -56,7 +53,6 @@ CustomListUpdateEntriesScript, DeleteInvisibleLanesScript, Explain, - GenerateOverdriveAdvantageAccountList, IdentifierInputScript, LaneSweeperScript, LibraryInputScript, @@ -2568,111 +2564,6 @@ def test_do_run(self, db: DatabaseTransactionFixture): assert self.script.process_loan.call_count == 0 -class TestGenerateOverdriveAdvantageAccountList: - def test_generate_od_advantage_account_list(self, db: DatabaseTransactionFixture): - output_file_path = "test-output.csv" - circ_manager_name = "circ_man_name" - parent_library_name = "Parent" - parent_od_library_id = "parent_id" - child1_library_name = "child1" - child1_advantage_library_id = "1" - child1_token = "token1" - child2_library_name = "child2" - child2_advantage_library_id = "2" - child2_token = "token2" - client_key = "ck" - client_secret = "cs" - library_token = "lt" - - parent: Collection = db.collection( - name=parent_library_name, - protocol=ExternalIntegration.OVERDRIVE, - external_account_id=parent_od_library_id, - ) - child1: Collection = db.collection( - name=child1_library_name, - protocol=ExternalIntegration.OVERDRIVE, - external_account_id=child1_advantage_library_id, - ) - child1.parent = parent - overdrive_api = MagicMock() - overdrive_api.get_advantage_accounts.return_value = [ - OverdriveAdvantageAccount( - parent_od_library_id, - child1_advantage_library_id, - child1_library_name, - child1_token, - ), - OverdriveAdvantageAccount( - parent_od_library_id, - child2_advantage_library_id, - child2_library_name, - child2_token, - ), - ] - - overdrive_api.client_key.return_value = bytes(client_key, "utf-8") - overdrive_api.client_secret.return_value = bytes(client_secret, "utf-8") - type(overdrive_api).collection_token = PropertyMock(return_value=library_token) - - with patch( - "core.scripts.GenerateOverdriveAdvantageAccountList._create_overdrive_api" - ) as create_od_api: - create_od_api.return_value = overdrive_api - GenerateOverdriveAdvantageAccountList(db.session).do_run( - cmd_args=[ - "--output-file-path", - output_file_path, - "--circulation-manager-name", - circ_manager_name, - ] - ) - - with open(output_file_path, newline="") as csv_file: - csvreader = csv.reader(csv_file) - for index, row in enumerate(csvreader): - if index == 0: - assert "cm" == row[0] - assert "collection" == row[1] - assert "overdrive_library_id" == row[2] - assert "client_key" == row[3] - assert "client_secret" == row[4] - assert "library_token" == row[5] - assert "advantage_name" == row[6] - assert "advantage_id" == row[7] - assert "advantage_token" == row[8] - assert "already_configured" == row[9] - elif index == 1: - assert circ_manager_name == row[0] - assert parent_library_name == row[1] - assert parent_od_library_id == row[2] - assert client_key == row[3] - assert client_secret == row[4] - assert library_token == row[5] - assert child1_library_name == row[6] - assert child1_advantage_library_id == row[7] - assert child1_token == row[8] - assert "True" == row[9] - else: - assert circ_manager_name == row[0] - assert parent_library_name == row[1] - assert parent_od_library_id == row[2] - assert client_key == row[3] - assert client_secret == row[4] - assert library_token == row[5] - assert child2_library_name == row[6] - assert child2_advantage_library_id == row[7] - assert child2_token == row[8] - assert "False" == row[9] - last_index = index - - os.remove(output_file_path) - assert last_index == 2 - overdrive_api.client_key.assert_called_once() - overdrive_api.client_secret.assert_called_once() - overdrive_api.get_advantage_accounts.assert_called_once() - - class TestWorkConsolidationScript: """TODO""" diff --git a/tests/fixtures/overdrive.py b/tests/fixtures/overdrive.py deleted file mode 100644 index 1aa8c98163..0000000000 --- a/tests/fixtures/overdrive.py +++ /dev/null @@ -1,73 +0,0 @@ -import json -import os -from pathlib import Path - -import pytest - -from core.model import Collection -from tests.api.mockapi.overdrive import MockOverdriveCoreAPI -from tests.fixtures.database import DatabaseTransactionFixture - - -class OverdriveFixture: - """A basic fixture for Overdrive tests.""" - - transaction: DatabaseTransactionFixture - collection: Collection - _resource_path: str - _base_path: str - - @classmethod - def create(cls, transaction: DatabaseTransactionFixture) -> "OverdriveFixture": - fix = OverdriveFixture() - fix._base_path = str(Path(__file__).parent.parent) - fix._resource_path = os.path.join(fix._base_path, "core", "files", "overdrive") - fix.transaction = transaction - fix.collection = MockOverdriveCoreAPI.mock_collection( - transaction.session, transaction.default_library() - ) - return fix - - def sample_json(self, filename): - path = os.path.join(self._resource_path, filename) - data = open(path).read() - return data, json.loads(data) - - -@pytest.fixture() -def overdrive_fixture( - db, -) -> OverdriveFixture: - """A basic fixture for Overdrive tests.""" - return OverdriveFixture.create(db) - - -class OverdriveWithAPIFixture: - overdrive: OverdriveFixture - api: MockOverdriveCoreAPI - - """Automatically create a MockOverdriveCoreAPI class during setup. - - We don't always do this because - TestOverdriveBibliographicCoverageProvider needs to create a - MockOverdriveCoreAPI during the test, and at the moment the second - MockOverdriveCoreAPI request created in a test behaves differently - from the first one. - """ - - @classmethod - def create( - cls, transaction: DatabaseTransactionFixture - ) -> "OverdriveWithAPIFixture": - fix = OverdriveWithAPIFixture() - fix.overdrive = OverdriveFixture.create(transaction) - fix.api = MockOverdriveCoreAPI(transaction.session, fix.overdrive.collection) - return fix - - -@pytest.fixture() -def overdrive_with_api_fixture( - db, -) -> OverdriveWithAPIFixture: - """A fixture for Overdrive tests that includes a mocked API.""" - return OverdriveWithAPIFixture.create(db)