Skip to content

Commit

Permalink
ODL availability proposal (PP-869) (#1903)
Browse files Browse the repository at this point in the history
This PR moves the time_tracking property that was overridden by a custom class in Palace, back out into the webpub-manifest-parser, and adds support for the draft ODL Availability (removals) proposal here: opds-community/drafts#63
  • Loading branch information
jonathangreen authored Jun 18, 2024
1 parent ed8826f commit 3f74425
Show file tree
Hide file tree
Showing 8 changed files with 520 additions and 177 deletions.
5 changes: 2 additions & 3 deletions bin/opds2_import_monitor
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/usr/bin/env python
"""Update the circulation manager server with new books from OPDS 2.0 import collections."""

from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory

from palace.manager.core.opds2_import import (
OPDS2Importer,
OPDS2ImportMonitor,
PalaceOPDS2FeedParserFactory,
RWPMManifestParser,
)
from palace.manager.scripts.opds_import import OPDSImportScript
Expand All @@ -14,7 +13,7 @@ import_script = OPDSImportScript(
importer_class=OPDS2Importer,
monitor_class=OPDS2ImportMonitor,
protocol=OPDS2Importer.NAME,
parser=RWPMManifestParser(PalaceOPDS2FeedParserFactory()),
parser=RWPMManifestParser(OPDS2FeedParserFactory()),
)

import_script.run()
22 changes: 10 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ nameparser = "^1.1" # nameparser is for author name manipulations
nltk = "3.8.1" # nltk is a textblob dependency.
opensearch-dsl = "~1.0"
opensearch-py = "~1.1"
palace-webpub-manifest-parser = "^3.1.1"
palace-webpub-manifest-parser = "^3.2.0"
pillow = "^10.0"
pycryptodome = "^3.18"
pydantic = {version = "^1.10.9", extras = ["dotenv", "email"]}
Expand All @@ -261,7 +261,7 @@ pyspellchecker = "0.8.1"
python = ">=3.10,<4"
python-dateutil = "2.9.0.post0"
python3-saml = "^1.16" # python-saml is required for SAML authentication
pytz = "^2023.3"
pytz = "^2024.1"
pyyaml = "^6.0"
redis = "^5.0.5"
redmail = "^0.6.0"
Expand Down
187 changes: 104 additions & 83 deletions src/palace/manager/api/odl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
ODLLibrarySettings,
ODLSettings,
)
from palace.manager.core.metadata_layer import FormatData, TimestampData
from palace.manager.core.metadata_layer import FormatData, LicenseData, TimestampData
from palace.manager.core.monitor import CollectionMonitor
from palace.manager.core.opds2_import import (
OPDS2Importer,
Expand All @@ -36,7 +36,11 @@
)
from palace.manager.sqlalchemy.model.collection import Collection
from palace.manager.sqlalchemy.model.edition import Edition
from palace.manager.sqlalchemy.model.licensing import LicensePool, RightsStatus
from palace.manager.sqlalchemy.model.licensing import (
LicensePool,
LicenseStatus,
RightsStatus,
)
from palace.manager.sqlalchemy.model.patron import Hold
from palace.manager.sqlalchemy.model.resource import HttpResponseTuple
from palace.manager.util import first_or_default
Expand Down Expand Up @@ -213,103 +217,120 @@ def _extract_publication_metadata(
metadata = super()._extract_publication_metadata(
feed, publication, data_source_name
)

if not publication.licenses:
# This title is an open-access title, no need to process licenses.
return metadata

formats = []
licenses = []
medium = None

skipped_license_formats = set(self.settings.skipped_license_formats)
publication_availability = self._extract_availability(
publication.metadata.availability
)

if publication.licenses:
for odl_license in publication.licenses:
identifier = odl_license.metadata.identifier
checkout_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
)
if checkout_link:
checkout_link = checkout_link.href
for odl_license in publication.licenses:
identifier = odl_license.metadata.identifier

license_info_document_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)
)
if license_info_document_link:
license_info_document_link = license_info_document_link.href
checkout_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
)
if checkout_link:
checkout_link = checkout_link.href

expires = (
to_utc(odl_license.metadata.terms.expires)
if odl_license.metadata.terms
else None
license_info_document_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)
)
if license_info_document_link:
license_info_document_link = license_info_document_link.href

expires = (
to_utc(odl_license.metadata.terms.expires)
if odl_license.metadata.terms
else None
)
concurrency = (
int(odl_license.metadata.terms.concurrency)
if odl_license.metadata.terms
else None
)

if not license_info_document_link:
parsed_license = None
elif (
not self._extract_availability(odl_license.metadata.availability)
or not publication_availability
):
# No need to fetch the license document, we already know that this title is not available.
parsed_license = LicenseData(
identifier=identifier,
checkout_url=None,
status_url=license_info_document_link,
status=LicenseStatus.unavailable,
checkouts_available=0,
)
concurrency = (
int(odl_license.metadata.terms.concurrency)
if odl_license.metadata.terms
else None
else:
parsed_license = self.get_license_data(
license_info_document_link,
checkout_link,
identifier,
expires,
concurrency,
self.http_get,
)

if not license_info_document_link:
parsed_license = None
if parsed_license is not None:
licenses.append(parsed_license)

license_formats = set(odl_license.metadata.formats)
for license_format in license_formats:
if (
skipped_license_formats
and license_format in skipped_license_formats
):
continue

if not medium:
medium = Edition.medium_from_media_type(license_format)

drm_schemes: list[str | None]
if license_format in self.LICENSE_FORMATS:
# Special case to handle DeMarque audiobooks which include the protection
# in the content type. When we see a license format of
# application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction
# it means that this audiobook title is available through the DeMarque streaming manifest
# endpoint.
drm_schemes = [
self.LICENSE_FORMATS[license_format][self.DRM_SCHEME]
]
license_format = self.LICENSE_FORMATS[license_format][
self.CONTENT_TYPE
]
else:
parsed_license = self.get_license_data(
license_info_document_link,
checkout_link,
identifier,
expires,
concurrency,
self.http_get,
drm_schemes = (
odl_license.metadata.protection.formats
if odl_license.metadata.protection
else []
)

if parsed_license is not None:
licenses.append(parsed_license)

license_formats = set(odl_license.metadata.formats)
for license_format in license_formats:
if (
skipped_license_formats
and license_format in skipped_license_formats
):
continue

if not medium:
medium = Edition.medium_from_media_type(license_format)

drm_schemes: list[str | None]
if license_format in self.LICENSE_FORMATS:
# Special case to handle DeMarque audiobooks which include the protection
# in the content type. When we see a license format of
# application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction
# it means that this audiobook title is available through the DeMarque streaming manifest
# endpoint.
drm_schemes = [
self.LICENSE_FORMATS[license_format][self.DRM_SCHEME]
]
license_format = self.LICENSE_FORMATS[license_format][
self.CONTENT_TYPE
]
else:
drm_schemes = (
odl_license.metadata.protection.formats
if odl_license.metadata.protection
else []
)

for drm_scheme in drm_schemes or [None]:
formats.append(
FormatData(
content_type=license_format,
drm_scheme=drm_scheme,
rights_uri=RightsStatus.IN_COPYRIGHT,
)
for drm_scheme in drm_schemes or [None]:
formats.append(
FormatData(
content_type=license_format,
drm_scheme=drm_scheme,
rights_uri=RightsStatus.IN_COPYRIGHT,
)
)

# If we don't have any licenses, then this title is an open-access title.
# So we don't change the circulation data.
if len(licenses) != 0:
metadata.circulation.licenses = licenses
metadata.circulation.licenses_owned = None
metadata.circulation.licenses_available = None
metadata.circulation.licenses_reserved = None
metadata.circulation.patrons_in_hold_queue = None
metadata.circulation.formats.extend(formats)
metadata.medium = medium
metadata.circulation.licenses = licenses
metadata.circulation.licenses_owned = None
metadata.circulation.licenses_available = None
metadata.circulation.licenses_reserved = None
metadata.circulation.patrons_in_hold_queue = None
metadata.circulation.formats.extend(formats)
metadata.medium = medium

return metadata

Expand Down
Loading

0 comments on commit 3f74425

Please sign in to comment.