From 80d445f054cc1b7fa81c6f7906da5fe1c3f93def Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 13 Jun 2024 15:04:35 -0300 Subject: [PATCH 1/8] Update to support ODL removals --- bin/opds2_import_monitor | 5 +- poetry.lock | 38 ++-- pyproject.toml | 4 +- src/palace/manager/api/odl2.py | 188 ++++++++++-------- src/palace/manager/core/opds2_import.py | 91 +++------ .../manager/sqlalchemy/model/licensing.py | 7 +- tests/files/odl2/feed.json | 6 - tests/manager/api/test_odl2.py | 119 ++++++++++- tests/manager/core/test_opds2_import.py | 183 ++++++++++++++++- 9 files changed, 458 insertions(+), 183 deletions(-) diff --git a/bin/opds2_import_monitor b/bin/opds2_import_monitor index 7c4592b4b9..1bbcc36b0b 100755 --- a/bin/opds2_import_monitor +++ b/bin/opds2_import_monitor @@ -1,11 +1,10 @@ #!/usr/bin/env python """Update the circulation manager server with new books from OPDS 2.0 import collections.""" - +from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory from palace.manager.core.opds2_import import ( OPDS2Importer, OPDS2ImportMonitor, - PalaceOPDS2FeedParserFactory, RWPMManifestParser, ) from palace.manager.scripts.opds_import import OPDSImportScript @@ -14,7 +13,7 @@ import_script = OPDSImportScript( importer_class=OPDS2Importer, monitor_class=OPDS2ImportMonitor, protocol=OPDS2Importer.NAME, - parser=RWPMManifestParser(PalaceOPDS2FeedParserFactory()), + parser=RWPMManifestParser(OPDS2FeedParserFactory()), ) import_script.run() diff --git a/poetry.lock b/poetry.lock index 8dad45f5d0..076f585be5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2929,24 +2929,28 @@ files = [ [[package]] name = "palace-webpub-manifest-parser" -version = "3.1.1" +version = "0" description = "A parser for the Readium Web Publication Manifest, OPDS 2.0 and ODL formats." optional = false python-versions = ">=3.8,<4" -files = [ - {file = "palace_webpub_manifest_parser-3.1.1-py3-none-any.whl", hash = "sha256:ac43d7f16414810cf7aeea26b9825ae8678404887ecf7a0345aa47ad992510d8"}, - {file = "palace_webpub_manifest_parser-3.1.1.tar.gz", hash = "sha256:7025164e2ae997371ed355355d8321685c6eb1228b86d10430e682d7316351b3"}, -] +files = [] +develop = false [package.dependencies] -jsonschema = ">=4.19,<5.0" -multipledispatch = ">=1.0,<2.0" -pyrsistent = ">=0.20,<0.21" -python-dateutil = ">=2.8,<3.0" -pytz = ">=2023.3,<2024.0" -requests = ">=2.27,<3.0" -rfc3987 = ">=1.3,<2.0" -uritemplate = ">=4.1,<5.0" +jsonschema = "^4.19" +multipledispatch = "^1.0" +pyrsistent = "^0.20" +python-dateutil = "^2.8" +pytz = "^2024.1" +requests = "^2.27" +rfc3987 = "^1.3" +uritemplate = "^4.1" + +[package.source] +type = "git" +url = "https://github.com/ThePalaceProject/webpub-manifest-parser.git" +reference = "feature/add-odl-availability-proposal" +resolved_reference = "6970ea8c877926da5f9db18d60b5c59305171aec" [[package]] name = "pillow" @@ -3765,13 +3769,13 @@ test = ["coverage (>=4.5.2)", "flake8 (>=3.6.0,<=5.0.0)", "freezegun (>=0.3.11,< [[package]] name = "pytz" -version = "2023.4" +version = "2024.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.4-py2.py3-none-any.whl", hash = "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a"}, - {file = "pytz-2023.4.tar.gz", hash = "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40"}, + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] [[package]] @@ -5053,4 +5057,4 @@ lxml = ">=3.8" [metadata] lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "6926c293e113bb1539170fb512788115a9946d1c74d0dbd439d63fdbf2805198" +content-hash = "bf0913a75e48852d711701bcffcfdd7e82825e1e49b81d58181235ff8df91536" diff --git a/pyproject.toml b/pyproject.toml index 67445dda30..8a4ad31385 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -247,7 +247,7 @@ nameparser = "^1.1" # nameparser is for author name manipulations nltk = "3.8.1" # nltk is a textblob dependency. opensearch-dsl = "~1.0" opensearch-py = "~1.1" -palace-webpub-manifest-parser = "^3.1.1" +palace-webpub-manifest-parser = {git = "https://github.com/ThePalaceProject/webpub-manifest-parser.git", branch = "feature/add-odl-availability-proposal"} pillow = "^10.0" pycryptodome = "^3.18" pydantic = {version = "^1.10.9", extras = ["dotenv", "email"]} @@ -261,7 +261,7 @@ pyspellchecker = "0.8.1" python = ">=3.10,<4" python-dateutil = "2.9.0.post0" python3-saml = "^1.16" # python-saml is required for SAML authentication -pytz = "^2023.3" +pytz = "^2024.1" pyyaml = "^6.0" redis = "^5.0.5" redmail = "^0.6.0" diff --git a/src/palace/manager/api/odl2.py b/src/palace/manager/api/odl2.py index 93a3dfcab0..20631e2a15 100644 --- a/src/palace/manager/api/odl2.py +++ b/src/palace/manager/api/odl2.py @@ -21,7 +21,7 @@ ODLLibrarySettings, ODLSettings, ) -from palace.manager.core.metadata_layer import FormatData, TimestampData +from palace.manager.core.metadata_layer import FormatData, LicenseData, TimestampData from palace.manager.core.monitor import CollectionMonitor from palace.manager.core.opds2_import import ( OPDS2Importer, @@ -36,7 +36,11 @@ ) from palace.manager.sqlalchemy.model.collection import Collection from palace.manager.sqlalchemy.model.edition import Edition -from palace.manager.sqlalchemy.model.licensing import LicensePool, RightsStatus +from palace.manager.sqlalchemy.model.licensing import ( + LicensePool, + LicenseStatus, + RightsStatus, +) from palace.manager.sqlalchemy.model.patron import Hold from palace.manager.sqlalchemy.model.resource import HttpResponseTuple from palace.manager.util import first_or_default @@ -213,103 +217,121 @@ def _extract_publication_metadata( metadata = super()._extract_publication_metadata( feed, publication, data_source_name ) + + if ( + metadata.circulation.licenses_owned == 0 + and metadata.circulation.licenses_available == 0 + ): + # This title is not available, so we don't need to process it. + return metadata + + if not publication.licenses: + # This title is an open-access title, no need to process licenses. + return metadata + formats = [] licenses = [] medium = None skipped_license_formats = set(self.settings.skipped_license_formats) - if publication.licenses: - for odl_license in publication.licenses: - identifier = odl_license.metadata.identifier - checkout_link = first_or_default( - odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) - ) - if checkout_link: - checkout_link = checkout_link.href + for odl_license in publication.licenses: + identifier = odl_license.metadata.identifier - license_info_document_link = first_or_default( - odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) - ) - if license_info_document_link: - license_info_document_link = license_info_document_link.href + checkout_link = first_or_default( + odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) + ) + if checkout_link: + checkout_link = checkout_link.href - expires = ( - to_utc(odl_license.metadata.terms.expires) - if odl_license.metadata.terms - else None + license_info_document_link = first_or_default( + odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) + ) + if license_info_document_link: + license_info_document_link = license_info_document_link.href + + expires = ( + to_utc(odl_license.metadata.terms.expires) + if odl_license.metadata.terms + else None + ) + concurrency = ( + int(odl_license.metadata.terms.concurrency) + if odl_license.metadata.terms + else None + ) + + if not license_info_document_link: + parsed_license = None + elif not self._extract_availability(odl_license.metadata.availability): + # No need to fetch the license document, we already know that this title is not available. + parsed_license = LicenseData( + identifier=identifier, + checkout_url=None, + status_url=license_info_document_link, + status=LicenseStatus.get(odl_license.metadata.availability.state), + checkouts_available=0, ) - concurrency = ( - int(odl_license.metadata.terms.concurrency) - if odl_license.metadata.terms - else None + else: + parsed_license = self.get_license_data( + license_info_document_link, + checkout_link, + identifier, + expires, + concurrency, + self.http_get, ) - if not license_info_document_link: - parsed_license = None + if parsed_license is not None: + licenses.append(parsed_license) + + license_formats = set(odl_license.metadata.formats) + for license_format in license_formats: + if ( + skipped_license_formats + and license_format in skipped_license_formats + ): + continue + + if not medium: + medium = Edition.medium_from_media_type(license_format) + + drm_schemes: list[str | None] + if license_format in self.LICENSE_FORMATS: + # Special case to handle DeMarque audiobooks which include the protection + # in the content type. When we see a license format of + # application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction + # it means that this audiobook title is available through the DeMarque streaming manifest + # endpoint. + drm_schemes = [ + self.LICENSE_FORMATS[license_format][self.DRM_SCHEME] + ] + license_format = self.LICENSE_FORMATS[license_format][ + self.CONTENT_TYPE + ] else: - parsed_license = self.get_license_data( - license_info_document_link, - checkout_link, - identifier, - expires, - concurrency, - self.http_get, + drm_schemes = ( + odl_license.metadata.protection.formats + if odl_license.metadata.protection + else [] ) - if parsed_license is not None: - licenses.append(parsed_license) - - license_formats = set(odl_license.metadata.formats) - for license_format in license_formats: - if ( - skipped_license_formats - and license_format in skipped_license_formats - ): - continue - - if not medium: - medium = Edition.medium_from_media_type(license_format) - - drm_schemes: list[str | None] - if license_format in self.LICENSE_FORMATS: - # Special case to handle DeMarque audiobooks which include the protection - # in the content type. When we see a license format of - # application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction - # it means that this audiobook title is available through the DeMarque streaming manifest - # endpoint. - drm_schemes = [ - self.LICENSE_FORMATS[license_format][self.DRM_SCHEME] - ] - license_format = self.LICENSE_FORMATS[license_format][ - self.CONTENT_TYPE - ] - else: - drm_schemes = ( - odl_license.metadata.protection.formats - if odl_license.metadata.protection - else [] - ) - - for drm_scheme in drm_schemes or [None]: - formats.append( - FormatData( - content_type=license_format, - drm_scheme=drm_scheme, - rights_uri=RightsStatus.IN_COPYRIGHT, - ) + for drm_scheme in drm_schemes or [None]: + formats.append( + FormatData( + content_type=license_format, + drm_scheme=drm_scheme, + rights_uri=RightsStatus.IN_COPYRIGHT, ) + ) - # If we don't have any licenses, then this title is an open-access title. - # So we don't change the circulation data. - if len(licenses) != 0: - metadata.circulation.licenses = licenses - metadata.circulation.licenses_owned = None - metadata.circulation.licenses_available = None - metadata.circulation.licenses_reserved = None - metadata.circulation.patrons_in_hold_queue = None - metadata.circulation.formats.extend(formats) - metadata.medium = medium + metadata.circulation.licenses = licenses + metadata.circulation.licenses_owned = None + metadata.circulation.licenses_available = None + metadata.circulation.licenses_reserved = None + metadata.circulation.patrons_in_hold_queue = None + metadata.circulation.formats.extend(formats) + metadata.medium = medium return metadata diff --git a/src/palace/manager/core/opds2_import.py b/src/palace/manager/core/opds2_import.py index cf1a6a7bce..faa17d101e 100644 --- a/src/palace/manager/core/opds2_import.py +++ b/src/palace/manager/core/opds2_import.py @@ -13,20 +13,8 @@ from uritemplate import URITemplate from webpub_manifest_parser.core import ManifestParserFactory, ManifestParserResult from webpub_manifest_parser.core.analyzer import NodeFinder -from webpub_manifest_parser.core.ast import ( - ArrayOfCollectionsProperty, - Link, - Manifestlike, -) -from webpub_manifest_parser.core.properties import BooleanProperty +from webpub_manifest_parser.core.ast import Link, Manifestlike from webpub_manifest_parser.errors import BaseError -from webpub_manifest_parser.opds2 import ( - ManifestParser, - OPDS2CollectionRolesRegistry, - OPDS2FeedParserFactory, - OPDS2SemanticAnalyzer, - OPDS2SyntaxAnalyzer, -) from webpub_manifest_parser.opds2.registry import ( OPDS2LinkRelationsRegistry, OPDS2MediaTypesRegistry, @@ -80,6 +68,7 @@ Hyperlink, Representation, ) +from palace.manager.util.datetime_helpers import utc_now from palace.manager.util.http import HTTP, BadResponseException from palace.manager.util.opds_writer import OPDSFeed @@ -142,53 +131,6 @@ def parse_manifest( return result -class PalaceOPDS2PresentationMetadata(opds2_ast.PresentationMetadata): # type: ignore[misc] - time_tracking = BooleanProperty( - "http://palaceproject.io/terms/timeTracking", False, default_value=False - ) - - -class PalaceOPDS2Publication(opds2_ast.OPDS2Publication): # type: ignore[misc] - metadata = opds2_ast.TypeProperty( - key="metadata", required=True, nested_type=PalaceOPDS2PresentationMetadata - ) - - -class PalaceOPDS2Feed(opds2_ast.OPDS2Feed): # type: ignore[misc] - publications = ArrayOfCollectionsProperty( - "publications", - required=False, - role=OPDS2CollectionRolesRegistry.PUBLICATIONS, - collection_type=PalaceOPDS2Publication, - ) - - -class PalaceOPDS2SyntaxAnalyzer(OPDS2SyntaxAnalyzer): # type: ignore[misc] - def _create_manifest(self) -> opds2_ast.OPDS2Feed: - return PalaceOPDS2Feed() - - -class PalaceOPDS2FeedParserFactory(OPDS2FeedParserFactory): # type: ignore[misc] - def create(self) -> ManifestParser: - """Create a new OPDS 2.0 parser. - - :return: OPDS 2.0 parser - :rtype: Parser - """ - media_types_registry = OPDS2MediaTypesRegistry() - link_relations_registry = OPDS2LinkRelationsRegistry() - collection_roles_registry = OPDS2CollectionRolesRegistry() - syntax_analyzer = ( - PalaceOPDS2SyntaxAnalyzer() - ) # This is the only change from the base class - semantic_analyzer = OPDS2SemanticAnalyzer( - media_types_registry, link_relations_registry, collection_roles_registry - ) - parser = ManifestParser(syntax_analyzer, semantic_analyzer) - - return parser - - class OPDS2ImporterSettings(OPDSImporterSettings): custom_accept_header: str = FormField( default="{}, {};q=0.9, */*;q=0.1".format( @@ -741,6 +683,24 @@ def _extract_identifier( """ return self.parse_identifier(publication.metadata.identifier) # type: ignore[no-any-return] + @classmethod + def _extract_availability( + cls, availability: opds2_ast.OPDS2AvailabilityInformation | None + ) -> bool: + """Extract the publication's availability from its availability information. + + :return: Boolean value indicating whether the publication is available. + """ + available = opds2_ast.OPDS2AvailabilityType.AVAILABLE.value + if ( + availability + and availability.state != available + and (not availability.until or availability.until > utc_now()) + ): + return False + + return True + def _extract_publication_metadata( self, feed: opds2_ast.OPDS2Feed, @@ -848,13 +808,20 @@ def _extract_publication_metadata( # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one rights_uri = RightsStatus.rights_uri_from_string("") + if self._extract_availability(publication.metadata.availability): + licenses_owned = LicensePool.UNLIMITED_ACCESS + licenses_available = LicensePool.UNLIMITED_ACCESS + else: + licenses_owned = 0 + licenses_available = 0 + circulation_data = CirculationData( default_rights_uri=rights_uri, data_source=data_source_name, primary_identifier=identifier_data, links=links, - licenses_owned=LicensePool.UNLIMITED_ACCESS, - licenses_available=LicensePool.UNLIMITED_ACCESS, + licenses_owned=licenses_owned, + licenses_available=licenses_available, licenses_reserved=0, patrons_in_hold_queue=0, formats=[], diff --git a/src/palace/manager/sqlalchemy/model/licensing.py b/src/palace/manager/sqlalchemy/model/licensing.py index 006cc2c755..df7629add2 100644 --- a/src/palace/manager/sqlalchemy/model/licensing.py +++ b/src/palace/manager/sqlalchemy/model/licensing.py @@ -13,6 +13,7 @@ from sqlalchemy.orm import Mapped, relationship from sqlalchemy.orm.session import Session from sqlalchemy.sql.expression import or_ +from typing_extensions import Self from palace.manager.core.exceptions import BasePalaceException from palace.manager.sqlalchemy.constants import ( @@ -46,7 +47,9 @@ class LicenseStatus(PythonEnum): unavailable = "unavailable" @classmethod - def get(cls, value: str): + def get(cls, value: str | None) -> Self: + if value is None: + return cls.unavailable return cls.__members__.get(value.lower(), cls.unavailable) @@ -246,7 +249,7 @@ class LicensePool(Base): ) # The date this LicensePool was first created in our db - # (the date we first discovered that ​we had that book in ​our collection). + # (the date we first discovered that we had that book in our collection). availability_time = Column(DateTime(timezone=True), index=True) # A LicensePool may be superceded by some other LicensePool diff --git a/tests/files/odl2/feed.json b/tests/files/odl2/feed.json index bb12faff0a..8f40b24cea 100644 --- a/tests/files/odl2/feed.json +++ b/tests/files/odl2/feed.json @@ -231,14 +231,8 @@ "text/html", "application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction" ], - "price": { - "currency": "USD", - "value": 7.99 - }, "created": "2014-04-25T12:25:21+02:00", "terms": { - "checkouts": 30, - "expires": "2016-04-25T12:25:21+02:00", "concurrency": 10, "length": 5097600 }, diff --git a/tests/manager/api/test_odl2.py b/tests/manager/api/test_odl2.py index d9af900573..98c579da98 100644 --- a/tests/manager/api/test_odl2.py +++ b/tests/manager/api/test_odl2.py @@ -1,4 +1,5 @@ import datetime +import json import pytest from freezegun import freeze_time @@ -28,6 +29,7 @@ DeliveryMechanism, LicensePool, LicensePoolDeliveryMechanism, + LicenseStatus, ) from palace.manager.sqlalchemy.model.patron import Hold from palace.manager.sqlalchemy.model.resource import Hyperlink @@ -84,7 +86,6 @@ def test_import( # Arrange moby_dick_license = LicenseInfoHelper( license=LicenseHelper( - identifier="urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799", concurrency=10, checkouts=30, expires="2016-04-25T12:25:21+02:00", @@ -378,6 +379,122 @@ def test_import_open_access( ) assert oa_ebook_delivery_mechanism is not None + @freeze_time("2016-01-01T00:00:00+00:00") + def test_import_availability( + self, + odl2_importer: ODL2Importer, + odl_mock_get: MockGet, + api_odl2_files_fixture: ODL2APIFilesFixture, + ) -> None: + """ + Ensure that ODL2Importer2 correctly processes and imports a feed with an + open access book. + """ + feed_json = json.loads(api_odl2_files_fixture.sample_text("feed.json")) + + moby_dick_license_dict = feed_json["publications"][0]["licenses"][0] + test_book_license_dict = feed_json["publications"][2]["licenses"][0] + + MOBY_DICK_LICENSE_ID = "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799" + TEST_BOOK_LICENSE_ID = "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9798" + + moby_dick_license_dict["metadata"]["availability"] = { + "state": "unavailable", + } + test_book_license_dict["metadata"]["availability"] = { + "state": "unavailable", + "reason": "https://registry.opds.io/reason#preordered", + "until": "2016-01-20T00:00:00Z", + } + + imported_editions, pools, works, failures = odl2_importer.import_from_feed( + json.dumps(feed_json) + ) + + assert isinstance(pools, list) + assert 2 == len(pools) + + [moby_dick_pool, test_book_pool] = pools + + assert moby_dick_pool.identifier.identifier == "978-3-16-148410-0" + assert moby_dick_pool.identifier.type == "ISBN" + assert moby_dick_pool.licenses_owned == 0 + assert moby_dick_pool.licenses_available == 0 + assert len(moby_dick_pool.licenses) == 1 + [moby_dick_license] = moby_dick_pool.licenses + assert moby_dick_license.identifier == MOBY_DICK_LICENSE_ID + assert moby_dick_license.is_available_for_borrowing is False + assert moby_dick_license.status == LicenseStatus.unavailable + + assert test_book_pool.identifier.identifier == "http://example.org/test-book" + assert test_book_pool.identifier.type == "URI" + assert test_book_pool.licenses_owned == 0 + assert test_book_pool.licenses_available == 0 + assert len(test_book_pool.licenses) == 1 + [test_book_license] = test_book_pool.licenses + assert test_book_license.identifier == TEST_BOOK_LICENSE_ID + assert test_book_license.is_available_for_borrowing is False + assert test_book_license.status == LicenseStatus.unavailable + + # Harvest the feed again, but this time the status has changed + moby_dick_license_dict["metadata"]["availability"] = { + "state": "available", + } + del test_book_license_dict["metadata"]["availability"] + + # Mock responses from license status server + odl_mock_get.add( + LicenseInfoHelper( + license=LicenseHelper( + identifier=MOBY_DICK_LICENSE_ID, + concurrency=10, + checkouts=30, + expires="2016-04-25T12:25:21+02:00", + ), + left=30, + available=10, + ) + ) + odl_mock_get.add( + LicenseInfoHelper( + license=LicenseHelper( + identifier=TEST_BOOK_LICENSE_ID, + concurrency=10, + ), + available=10, + ) + ) + + # Harvest the feed again + imported_editions, pools, works, failures = odl2_importer.import_from_feed( + json.dumps(feed_json) + ) + + assert isinstance(pools, list) + assert 2 == len(pools) + + [moby_dick_pool, test_book_pool] = pools + + assert moby_dick_pool.identifier.identifier == "978-3-16-148410-0" + assert moby_dick_pool.identifier.type == "ISBN" + assert moby_dick_pool.licenses_owned == 30 + assert moby_dick_pool.licenses_available == 10 + assert len(moby_dick_pool.licenses) == 1 + [moby_dick_license] = moby_dick_pool.licenses + assert moby_dick_license.identifier == MOBY_DICK_LICENSE_ID + assert moby_dick_license.is_available_for_borrowing is True + assert moby_dick_license.status == LicenseStatus.available + + assert test_book_pool.identifier.identifier == "http://example.org/test-book" + assert test_book_pool.identifier.type == "URI" + assert test_book_pool.licenses_owned == 10 + assert test_book_pool.licenses_available == 10 + assert len(test_book_pool.licenses) == 1 + [test_book_license] = test_book_pool.licenses + assert test_book_license.identifier == TEST_BOOK_LICENSE_ID + assert test_book_license.is_available_for_borrowing is True + assert test_book_license.status == LicenseStatus.available + class TestODL2API: def test_loan_limit(self, odl2_api_test_fixture: ODL2APITestFixture): diff --git a/tests/manager/core/test_opds2_import.py b/tests/manager/core/test_opds2_import.py index daabc1d9ab..6b46047dc0 100644 --- a/tests/manager/core/test_opds2_import.py +++ b/tests/manager/core/test_opds2_import.py @@ -1,19 +1,16 @@ import datetime +import json from collections.abc import Generator from unittest.mock import MagicMock, patch import pytest from pytest import LogCaptureFixture from requests import Response +from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory from palace.manager.api.circulation import CirculationAPI, FulfillmentInfo from palace.manager.api.circulation_exceptions import CannotFulfill -from palace.manager.core.opds2_import import ( - OPDS2API, - OPDS2Importer, - PalaceOPDS2FeedParserFactory, - RWPMManifestParser, -) +from palace.manager.core.opds2_import import OPDS2API, OPDS2Importer, RWPMManifestParser from palace.manager.sqlalchemy.constants import ( EditionConstants, IdentifierType, @@ -31,6 +28,7 @@ ) from palace.manager.sqlalchemy.model.patron import Loan from palace.manager.sqlalchemy.model.work import Work +from palace.manager.util.datetime_helpers import utc_now from tests.fixtures.database import DatabaseTransactionFixture from tests.fixtures.files import FilesFixture @@ -124,7 +122,7 @@ def opds2_importer_fixture( ) data.collection.data_source = data.data_source data.importer = OPDS2Importer( - db.session, data.collection, RWPMManifestParser(PalaceOPDS2FeedParserFactory()) + db.session, data.collection, RWPMManifestParser(OPDS2FeedParserFactory()) ) return data @@ -482,6 +480,177 @@ def test_auth_token_feed( # Did the token endpoint get stored correctly? assert token_endpoint == "http://example.org/auth?userName={patron_id}" + def test_opds2_importer_imports_feeds_with_availability_info( + self, + opds2_importer_fixture: TestOPDS2ImporterFixture, + opds2_files_fixture: OPDS2FilesFixture, + ): + """Ensure that OPDS2Importer correctly imports feeds with availability information.""" + data, transaction, session = ( + opds2_importer_fixture, + opds2_importer_fixture.transaction, + opds2_importer_fixture.transaction.session, + ) + feed_json = json.loads(opds2_files_fixture.sample_text("feed.json")) + + moby_dick_metadata = feed_json["publications"][0]["metadata"] + huckleberry_finn_metadata = feed_json["publications"][1]["metadata"] + postmodernism_metadata = feed_json["publications"][2]["metadata"] + + week_ago = utc_now() - datetime.timedelta(days=7) + moby_dick_metadata["availability"] = { + "state": "unavailable", + } + huckleberry_finn_metadata["availability"] = { + "state": "available", + } + postmodernism_metadata["availability"] = { + "state": "unavailable", + "until": week_ago.isoformat(), + } + + imported_editions, pools, works, failures = data.importer.import_from_feed( + json.dumps(feed_json) + ) + + # Make we have the correct number of editions + assert isinstance(imported_editions, list) + assert len(imported_editions) == 3 + + # Make we have the correct number of licensepools + assert isinstance(pools, list) + assert len(pools) == 3 + + # Moby dick should be imported but is unavailable + moby_dick_edition = self._get_edition_by_identifier( + imported_editions, self.MOBY_DICK_ISBN_IDENTIFIER + ) + assert isinstance(moby_dick_edition, Edition) + + assert moby_dick_edition.title == "Moby-Dick" + + moby_dick_license_pool = self._get_license_pool_by_identifier( + pools, self.MOBY_DICK_ISBN_IDENTIFIER + ) + assert isinstance(moby_dick_license_pool, LicensePool) + assert moby_dick_license_pool.open_access + assert moby_dick_license_pool.licenses_owned == 0 + assert moby_dick_license_pool.licenses_available == 0 + + # Adventures of Huckleberry Finn is imported and is available + huckleberry_finn_edition = self._get_edition_by_identifier( + imported_editions, self.HUCKLEBERRY_FINN_URI_IDENTIFIER + ) + assert isinstance(huckleberry_finn_edition, Edition) + + assert huckleberry_finn_edition.title == "Adventures of Huckleberry Finn" + + huckleberry_finn_license_pool = self._get_license_pool_by_identifier( + pools, self.HUCKLEBERRY_FINN_URI_IDENTIFIER + ) + assert isinstance(huckleberry_finn_license_pool, LicensePool) is True + assert huckleberry_finn_license_pool.open_access is False + assert ( + huckleberry_finn_license_pool.licenses_owned == LicensePool.UNLIMITED_ACCESS + ) + assert ( + huckleberry_finn_license_pool.licenses_available + == LicensePool.UNLIMITED_ACCESS + ) + + # Politics of postmodernism is unavailable, but it is past the until date, so it + # should be available + postmodernism_edition = self._get_edition_by_identifier( + imported_editions, self.POSTMODERNISM_PROQUEST_IDENTIFIER + ) + assert isinstance(postmodernism_edition, Edition) + + assert postmodernism_edition.title == "The Politics of Postmodernism" + + postmodernism_license_pool = self._get_license_pool_by_identifier( + pools, self.POSTMODERNISM_PROQUEST_IDENTIFIER + ) + assert isinstance(postmodernism_license_pool, LicensePool) is True + assert postmodernism_license_pool.open_access is False + assert postmodernism_license_pool.licenses_owned == LicensePool.UNLIMITED_ACCESS + assert ( + postmodernism_license_pool.licenses_available + == LicensePool.UNLIMITED_ACCESS + ) + + # We harvest the feed again but this time the availability has changed + moby_dick_metadata["availability"]["state"] = "available" + moby_dick_metadata["modified"] = utc_now().isoformat() + + huckleberry_finn_metadata["availability"]["state"] = "unavailable" + huckleberry_finn_metadata["modified"] = utc_now().isoformat() + + del postmodernism_metadata["availability"] + postmodernism_metadata["modified"] = utc_now().isoformat() + + imported_editions, pools, works, failures = data.importer.import_from_feed( + json.dumps(feed_json) + ) + + # Make we have the correct number of editions + assert isinstance(imported_editions, list) + assert len(imported_editions) == 3 + + # Make we have the correct number of licensepools + assert isinstance(pools, list) + assert len(pools) == 3 + + # Moby dick should be imported and is now available + moby_dick_edition = self._get_edition_by_identifier( + imported_editions, self.MOBY_DICK_ISBN_IDENTIFIER + ) + assert isinstance(moby_dick_edition, Edition) + + assert moby_dick_edition.title == "Moby-Dick" + + moby_dick_license_pool = self._get_license_pool_by_identifier( + pools, self.MOBY_DICK_ISBN_IDENTIFIER + ) + assert isinstance(moby_dick_license_pool, LicensePool) + assert moby_dick_license_pool.open_access + assert moby_dick_license_pool.licenses_owned == LicensePool.UNLIMITED_ACCESS + assert moby_dick_license_pool.licenses_available == LicensePool.UNLIMITED_ACCESS + + # Adventures of Huckleberry Finn is imported and is now unavailable + huckleberry_finn_edition = self._get_edition_by_identifier( + imported_editions, self.HUCKLEBERRY_FINN_URI_IDENTIFIER + ) + assert isinstance(huckleberry_finn_edition, Edition) + + assert huckleberry_finn_edition.title == "Adventures of Huckleberry Finn" + + huckleberry_finn_license_pool = self._get_license_pool_by_identifier( + pools, self.HUCKLEBERRY_FINN_URI_IDENTIFIER + ) + assert isinstance(huckleberry_finn_license_pool, LicensePool) is True + assert huckleberry_finn_license_pool.open_access is False + assert huckleberry_finn_license_pool.licenses_owned == 0 + assert huckleberry_finn_license_pool.licenses_available == 0 + + # Politics of postmodernism is still available + postmodernism_edition = self._get_edition_by_identifier( + imported_editions, self.POSTMODERNISM_PROQUEST_IDENTIFIER + ) + assert isinstance(postmodernism_edition, Edition) + + assert postmodernism_edition.title == "The Politics of Postmodernism" + + postmodernism_license_pool = self._get_license_pool_by_identifier( + pools, self.POSTMODERNISM_PROQUEST_IDENTIFIER + ) + assert isinstance(postmodernism_license_pool, LicensePool) is True + assert postmodernism_license_pool.open_access is False + assert postmodernism_license_pool.licenses_owned == LicensePool.UNLIMITED_ACCESS + assert ( + postmodernism_license_pool.licenses_available + == LicensePool.UNLIMITED_ACCESS + ) + class Opds2ApiFixture: def __init__(self, db: DatabaseTransactionFixture, mock_http: MagicMock): From dafaf4866d94f9bd752db04eeb3a7ad74941ad66 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 13 Jun 2024 15:08:58 -0300 Subject: [PATCH 2/8] Update type hint --- src/palace/manager/sqlalchemy/model/licensing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/palace/manager/sqlalchemy/model/licensing.py b/src/palace/manager/sqlalchemy/model/licensing.py index df7629add2..2aaf818313 100644 --- a/src/palace/manager/sqlalchemy/model/licensing.py +++ b/src/palace/manager/sqlalchemy/model/licensing.py @@ -13,7 +13,6 @@ from sqlalchemy.orm import Mapped, relationship from sqlalchemy.orm.session import Session from sqlalchemy.sql.expression import or_ -from typing_extensions import Self from palace.manager.core.exceptions import BasePalaceException from palace.manager.sqlalchemy.constants import ( @@ -47,7 +46,7 @@ class LicenseStatus(PythonEnum): unavailable = "unavailable" @classmethod - def get(cls, value: str | None) -> Self: + def get(cls, value: str | None) -> LicenseStatus: if value is None: return cls.unavailable return cls.__members__.get(value.lower(), cls.unavailable) From b2b56490b30e84c53091ea3bea16cf39ed6dadfa Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 13 Jun 2024 15:12:36 -0300 Subject: [PATCH 3/8] Add back in identifier --- tests/manager/api/test_odl2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/manager/api/test_odl2.py b/tests/manager/api/test_odl2.py index 98c579da98..df057bb19f 100644 --- a/tests/manager/api/test_odl2.py +++ b/tests/manager/api/test_odl2.py @@ -86,6 +86,7 @@ def test_import( # Arrange moby_dick_license = LicenseInfoHelper( license=LicenseHelper( + identifier="urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799", concurrency=10, checkouts=30, expires="2016-04-25T12:25:21+02:00", From abb23908839ee9c7a47976b4555e67e541bc17f1 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 13 Jun 2024 16:02:03 -0300 Subject: [PATCH 4/8] Update metadata class we are using. --- tests/manager/api/test_odl2.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/manager/api/test_odl2.py b/tests/manager/api/test_odl2.py index df057bb19f..578311ca87 100644 --- a/tests/manager/api/test_odl2.py +++ b/tests/manager/api/test_odl2.py @@ -3,11 +3,11 @@ import pytest from freezegun import freeze_time -from webpub_manifest_parser.core.ast import PresentationMetadata from webpub_manifest_parser.odl.ast import ODLPublication from webpub_manifest_parser.odl.semantic import ( ODL_PUBLICATION_MUST_CONTAIN_EITHER_LICENSES_OR_OA_ACQUISITION_LINK_ERROR, ) +from webpub_manifest_parser.opds2.ast import OPDS2PublicationMetadata from palace.manager.api.circulation_exceptions import ( HoldsNotPermitted, @@ -224,7 +224,9 @@ def test_import( huck_finn_semantic_error = ( ODL_PUBLICATION_MUST_CONTAIN_EITHER_LICENSES_OR_OA_ACQUISITION_LINK_ERROR( node=ODLPublication( - metadata=PresentationMetadata(identifier="urn:isbn:9781234567897") + metadata=OPDS2PublicationMetadata( + identifier="urn:isbn:9781234567897" + ) ), node_property=None, ) From def99d7c92f49ba44636e09525adb00a5a4407d9 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 13 Jun 2024 21:05:34 -0300 Subject: [PATCH 5/8] Roll back licensing change --- src/palace/manager/sqlalchemy/model/licensing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/palace/manager/sqlalchemy/model/licensing.py b/src/palace/manager/sqlalchemy/model/licensing.py index 2aaf818313..006cc2c755 100644 --- a/src/palace/manager/sqlalchemy/model/licensing.py +++ b/src/palace/manager/sqlalchemy/model/licensing.py @@ -46,9 +46,7 @@ class LicenseStatus(PythonEnum): unavailable = "unavailable" @classmethod - def get(cls, value: str | None) -> LicenseStatus: - if value is None: - return cls.unavailable + def get(cls, value: str): return cls.__members__.get(value.lower(), cls.unavailable) @@ -248,7 +246,7 @@ class LicensePool(Base): ) # The date this LicensePool was first created in our db - # (the date we first discovered that we had that book in our collection). + # (the date we first discovered that ​we had that book in ​our collection). availability_time = Column(DateTime(timezone=True), index=True) # A LicensePool may be superceded by some other LicensePool From dc0be068d825be558dc7fbd1a8cd3293467296af Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 13 Jun 2024 22:10:27 -0300 Subject: [PATCH 6/8] Reverse some of the test logic. Better coverage and more realistic case. --- src/palace/manager/api/odl2.py | 17 ++- tests/manager/api/test_odl2.py | 205 ++++++++++++++++++++++----------- 2 files changed, 144 insertions(+), 78 deletions(-) diff --git a/src/palace/manager/api/odl2.py b/src/palace/manager/api/odl2.py index 20631e2a15..5f446c380b 100644 --- a/src/palace/manager/api/odl2.py +++ b/src/palace/manager/api/odl2.py @@ -218,13 +218,6 @@ def _extract_publication_metadata( feed, publication, data_source_name ) - if ( - metadata.circulation.licenses_owned == 0 - and metadata.circulation.licenses_available == 0 - ): - # This title is not available, so we don't need to process it. - return metadata - if not publication.licenses: # This title is an open-access title, no need to process licenses. return metadata @@ -234,6 +227,9 @@ def _extract_publication_metadata( medium = None skipped_license_formats = set(self.settings.skipped_license_formats) + publication_availability = self._extract_availability( + publication.metadata.availability + ) for odl_license in publication.licenses: identifier = odl_license.metadata.identifier @@ -263,13 +259,16 @@ def _extract_publication_metadata( if not license_info_document_link: parsed_license = None - elif not self._extract_availability(odl_license.metadata.availability): + elif ( + not self._extract_availability(odl_license.metadata.availability) + or not publication_availability + ): # No need to fetch the license document, we already know that this title is not available. parsed_license = LicenseData( identifier=identifier, checkout_url=None, status_url=license_info_document_link, - status=LicenseStatus.get(odl_license.metadata.availability.state), + status=LicenseStatus.unavailable, checkouts_available=0, ) else: diff --git a/tests/manager/api/test_odl2.py b/tests/manager/api/test_odl2.py index 578311ca87..ed601215fc 100644 --- a/tests/manager/api/test_odl2.py +++ b/tests/manager/api/test_odl2.py @@ -1,5 +1,8 @@ +import copy import datetime +import functools import json +import uuid import pytest from freezegun import freeze_time @@ -398,74 +401,134 @@ def test_import_availability( moby_dick_license_dict = feed_json["publications"][0]["licenses"][0] test_book_license_dict = feed_json["publications"][2]["licenses"][0] + huck_finn_publication_dict = feed_json["publications"][1] + huck_finn_publication_dict["licenses"] = copy.deepcopy( + feed_json["publications"][0]["licenses"] + ) + huck_finn_publication_dict["images"] = copy.deepcopy( + feed_json["publications"][0]["images"] + ) + huck_finn_license_dict = huck_finn_publication_dict["licenses"][0] + MOBY_DICK_LICENSE_ID = "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799" TEST_BOOK_LICENSE_ID = "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9798" + HUCK_FINN_LICENSE_ID = f"urn:uuid:{uuid.uuid4()}" - moby_dick_license_dict["metadata"]["availability"] = { - "state": "unavailable", - } test_book_license_dict["metadata"]["availability"] = { "state": "unavailable", "reason": "https://registry.opds.io/reason#preordered", "until": "2016-01-20T00:00:00Z", } + huck_finn_license_dict["metadata"]["identifier"] = HUCK_FINN_LICENSE_ID + huck_finn_publication_dict["metadata"][ + "title" + ] = "Adventures of Huckleberry Finn" + + # Mock responses from license status server + def license_status_reply( + license_id: str, + concurrency: int = 10, + checkouts: int | None = 30, + expires: str | None = "2016-04-25T12:25:21+02:00", + ) -> LicenseInfoHelper: + return LicenseInfoHelper( + license=LicenseHelper( + identifier=license_id, + concurrency=concurrency, + checkouts=checkouts, + expires=expires, + ), + left=checkouts, + available=concurrency, + ) + + odl_mock_get.add(license_status_reply(MOBY_DICK_LICENSE_ID)) + odl_mock_get.add(license_status_reply(HUCK_FINN_LICENSE_ID)) imported_editions, pools, works, failures = odl2_importer.import_from_feed( json.dumps(feed_json) ) assert isinstance(pools, list) - assert 2 == len(pools) - - [moby_dick_pool, test_book_pool] = pools - - assert moby_dick_pool.identifier.identifier == "978-3-16-148410-0" - assert moby_dick_pool.identifier.type == "ISBN" - assert moby_dick_pool.licenses_owned == 0 - assert moby_dick_pool.licenses_available == 0 - assert len(moby_dick_pool.licenses) == 1 - [moby_dick_license] = moby_dick_pool.licenses - assert moby_dick_license.identifier == MOBY_DICK_LICENSE_ID - assert moby_dick_license.is_available_for_borrowing is False - assert moby_dick_license.status == LicenseStatus.unavailable - - assert test_book_pool.identifier.identifier == "http://example.org/test-book" - assert test_book_pool.identifier.type == "URI" - assert test_book_pool.licenses_owned == 0 - assert test_book_pool.licenses_available == 0 - assert len(test_book_pool.licenses) == 1 - [test_book_license] = test_book_pool.licenses - assert test_book_license.identifier == TEST_BOOK_LICENSE_ID - assert test_book_license.is_available_for_borrowing is False - assert test_book_license.status == LicenseStatus.unavailable + assert 3 == len(pools) + + [moby_dick_pool, huck_finn_pool, test_book_pool] = pools + + def assert_pool( + pool: LicensePool, + identifier: str, + identifier_type: str, + licenses_owned: int, + licenses_available: int, + license_id: str, + available_for_borrowing: bool, + license_status: LicenseStatus, + ) -> None: + assert pool.identifier.identifier == identifier + assert pool.identifier.type == identifier_type + assert pool.licenses_owned == licenses_owned + assert pool.licenses_available == licenses_available + assert len(pool.licenses) == 1 + [license_info] = pool.licenses + assert license_info.identifier == license_id + assert license_info.is_available_for_borrowing is available_for_borrowing + assert license_info.status == license_status + + assert_moby_dick_pool = functools.partial( + assert_pool, + identifier="978-3-16-148410-0", + identifier_type="ISBN", + license_id=MOBY_DICK_LICENSE_ID, + ) + assert_test_book_pool = functools.partial( + assert_pool, + identifier="http://example.org/test-book", + identifier_type="URI", + license_id=TEST_BOOK_LICENSE_ID, + ) + assert_huck_finn_pool = functools.partial( + assert_pool, + identifier="9781234567897", + identifier_type="ISBN", + license_id=HUCK_FINN_LICENSE_ID, + ) + + assert_moby_dick_pool( + moby_dick_pool, + licenses_owned=30, + licenses_available=10, + available_for_borrowing=True, + license_status=LicenseStatus.available, + ) + + assert_test_book_pool( + test_book_pool, + licenses_owned=0, + licenses_available=0, + available_for_borrowing=False, + license_status=LicenseStatus.unavailable, + ) + + assert_huck_finn_pool( + huck_finn_pool, + licenses_owned=30, + licenses_available=10, + available_for_borrowing=True, + license_status=LicenseStatus.available, + ) # Harvest the feed again, but this time the status has changed moby_dick_license_dict["metadata"]["availability"] = { - "state": "available", + "state": "unavailable", } del test_book_license_dict["metadata"]["availability"] + huck_finn_publication_dict["metadata"]["availability"] = { + "state": "unavailable", + } # Mock responses from license status server odl_mock_get.add( - LicenseInfoHelper( - license=LicenseHelper( - identifier=MOBY_DICK_LICENSE_ID, - concurrency=10, - checkouts=30, - expires="2016-04-25T12:25:21+02:00", - ), - left=30, - available=10, - ) - ) - odl_mock_get.add( - LicenseInfoHelper( - license=LicenseHelper( - identifier=TEST_BOOK_LICENSE_ID, - concurrency=10, - ), - available=10, - ) + license_status_reply(TEST_BOOK_LICENSE_ID, checkouts=None, expires=None) ) # Harvest the feed again @@ -474,29 +537,33 @@ def test_import_availability( ) assert isinstance(pools, list) - assert 2 == len(pools) - - [moby_dick_pool, test_book_pool] = pools - - assert moby_dick_pool.identifier.identifier == "978-3-16-148410-0" - assert moby_dick_pool.identifier.type == "ISBN" - assert moby_dick_pool.licenses_owned == 30 - assert moby_dick_pool.licenses_available == 10 - assert len(moby_dick_pool.licenses) == 1 - [moby_dick_license] = moby_dick_pool.licenses - assert moby_dick_license.identifier == MOBY_DICK_LICENSE_ID - assert moby_dick_license.is_available_for_borrowing is True - assert moby_dick_license.status == LicenseStatus.available - - assert test_book_pool.identifier.identifier == "http://example.org/test-book" - assert test_book_pool.identifier.type == "URI" - assert test_book_pool.licenses_owned == 10 - assert test_book_pool.licenses_available == 10 - assert len(test_book_pool.licenses) == 1 - [test_book_license] = test_book_pool.licenses - assert test_book_license.identifier == TEST_BOOK_LICENSE_ID - assert test_book_license.is_available_for_borrowing is True - assert test_book_license.status == LicenseStatus.available + assert 3 == len(pools) + + [moby_dick_pool, huck_finn_pool, test_book_pool] = pools + + assert_moby_dick_pool( + moby_dick_pool, + licenses_owned=0, + licenses_available=0, + available_for_borrowing=False, + license_status=LicenseStatus.unavailable, + ) + + assert_test_book_pool( + test_book_pool, + licenses_owned=10, + licenses_available=10, + available_for_borrowing=True, + license_status=LicenseStatus.available, + ) + + assert_huck_finn_pool( + huck_finn_pool, + licenses_owned=0, + licenses_available=0, + available_for_borrowing=False, + license_status=LicenseStatus.unavailable, + ) class TestODL2API: From c70739982880a8db9452787d8b675820a422b9b0 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Tue, 18 Jun 2024 10:51:13 -0300 Subject: [PATCH 7/8] Bump up to new version of webpub manifest parser --- poetry.lock | 38 ++++++++++++++++---------------------- pyproject.toml | 2 +- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/poetry.lock b/poetry.lock index 076f585be5..d5f982c2c0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "alembic" @@ -1530,8 +1530,6 @@ files = [ {file = "frozendict-2.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d13b4310db337f4d2103867c5a05090b22bc4d50ca842093779ef541ea9c9eea"}, {file = "frozendict-2.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:b3b967d5065872e27b06f785a80c0ed0a45d1f7c9b85223da05358e734d858ca"}, {file = "frozendict-2.4.4-cp39-cp39-win_arm64.whl", hash = "sha256:4ae8d05c8d0b6134bfb6bfb369d5fa0c4df21eabb5ca7f645af95fdc6689678e"}, - {file = "frozendict-2.4.4-py311-none-any.whl", hash = "sha256:705efca8d74d3facbb6ace80ab3afdd28eb8a237bfb4063ed89996b024bc443d"}, - {file = "frozendict-2.4.4-py312-none-any.whl", hash = "sha256:d9647563e76adb05b7cde2172403123380871360a114f546b4ae1704510801e5"}, {file = "frozendict-2.4.4.tar.gz", hash = "sha256:3f7c031b26e4ee6a3f786ceb5e3abf1181c4ade92dce1f847da26ea2c96008c7"}, ] @@ -2929,28 +2927,24 @@ files = [ [[package]] name = "palace-webpub-manifest-parser" -version = "0" +version = "3.2.0" description = "A parser for the Readium Web Publication Manifest, OPDS 2.0 and ODL formats." optional = false -python-versions = ">=3.8,<4" -files = [] -develop = false +python-versions = "<4,>=3.8" +files = [ + {file = "palace_webpub_manifest_parser-3.2.0-py3-none-any.whl", hash = "sha256:813850224cb4970bffd399b097cee59318e2d2050e376000db49166821928872"}, + {file = "palace_webpub_manifest_parser-3.2.0.tar.gz", hash = "sha256:46f9e9c5719ab63762636715cb8493cad780ba8341a080f27fa1eda059c4532b"}, +] [package.dependencies] -jsonschema = "^4.19" -multipledispatch = "^1.0" -pyrsistent = "^0.20" -python-dateutil = "^2.8" -pytz = "^2024.1" -requests = "^2.27" -rfc3987 = "^1.3" -uritemplate = "^4.1" - -[package.source] -type = "git" -url = "https://github.com/ThePalaceProject/webpub-manifest-parser.git" -reference = "feature/add-odl-availability-proposal" -resolved_reference = "6970ea8c877926da5f9db18d60b5c59305171aec" +jsonschema = ">=4.19,<5.0" +multipledispatch = ">=1.0,<2.0" +pyrsistent = ">=0.20,<0.21" +python-dateutil = ">=2.8,<3.0" +pytz = ">=2024.1,<2025.0" +requests = ">=2.27,<3.0" +rfc3987 = ">=1.3,<2.0" +uritemplate = ">=4.1,<5.0" [[package]] name = "pillow" @@ -5057,4 +5051,4 @@ lxml = ">=3.8" [metadata] lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "bf0913a75e48852d711701bcffcfdd7e82825e1e49b81d58181235ff8df91536" +content-hash = "21a985a3a04a73e7c5d2597e314d4c714540f2b74be9fb29366ebf64ce7f89ef" diff --git a/pyproject.toml b/pyproject.toml index 8a4ad31385..63b0770a64 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -247,7 +247,7 @@ nameparser = "^1.1" # nameparser is for author name manipulations nltk = "3.8.1" # nltk is a textblob dependency. opensearch-dsl = "~1.0" opensearch-py = "~1.1" -palace-webpub-manifest-parser = {git = "https://github.com/ThePalaceProject/webpub-manifest-parser.git", branch = "feature/add-odl-availability-proposal"} +palace-webpub-manifest-parser = "^3.2.0" pillow = "^10.0" pycryptodome = "^3.18" pydantic = {version = "^1.10.9", extras = ["dotenv", "email"]} From b0e44625d22ad9650ef2611cfa0dc4d3bd099895 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Tue, 18 Jun 2024 11:12:45 -0300 Subject: [PATCH 8/8] Add a more detailed comment. --- src/palace/manager/core/opds2_import.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/palace/manager/core/opds2_import.py b/src/palace/manager/core/opds2_import.py index faa17d101e..8f03bf6c94 100644 --- a/src/palace/manager/core/opds2_import.py +++ b/src/palace/manager/core/opds2_import.py @@ -689,6 +689,14 @@ def _extract_availability( ) -> bool: """Extract the publication's availability from its availability information. + We default to a publication being available if no availability information is provided or if the provided + availability information is past the time specified in its `until` field. The `since` field on the + availability information is not used, it is assumed to be informational and always in the past if it is + present. This is based on a discussion with the OPDS 2.0 working group. + + TODO: Update our handling of the `since` field based on the resolution of the discussion here: + https://github.com/opds-community/drafts/discussions/63#discussioncomment-9806140 + :return: Boolean value indicating whether the publication is available. """ available = opds2_ast.OPDS2AvailabilityType.AVAILABLE.value