Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ODL availability proposal (PP-869) #1903

Merged
merged 8 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions bin/opds2_import_monitor
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/usr/bin/env python
"""Update the circulation manager server with new books from OPDS 2.0 import collections."""

from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory

from palace.manager.core.opds2_import import (
OPDS2Importer,
OPDS2ImportMonitor,
PalaceOPDS2FeedParserFactory,
RWPMManifestParser,
)
from palace.manager.scripts.opds_import import OPDSImportScript
Expand All @@ -14,7 +13,7 @@ import_script = OPDSImportScript(
importer_class=OPDS2Importer,
monitor_class=OPDS2ImportMonitor,
protocol=OPDS2Importer.NAME,
parser=RWPMManifestParser(PalaceOPDS2FeedParserFactory()),
parser=RWPMManifestParser(OPDS2FeedParserFactory()),
)

import_script.run()
22 changes: 10 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ nameparser = "^1.1" # nameparser is for author name manipulations
nltk = "3.8.1" # nltk is a textblob dependency.
opensearch-dsl = "~1.0"
opensearch-py = "~1.1"
palace-webpub-manifest-parser = "^3.1.1"
palace-webpub-manifest-parser = "^3.2.0"
pillow = "^10.0"
pycryptodome = "^3.18"
pydantic = {version = "^1.10.9", extras = ["dotenv", "email"]}
Expand All @@ -261,7 +261,7 @@ pyspellchecker = "0.8.1"
python = ">=3.10,<4"
python-dateutil = "2.9.0.post0"
python3-saml = "^1.16" # python-saml is required for SAML authentication
pytz = "^2023.3"
pytz = "^2024.1"
pyyaml = "^6.0"
redis = "^5.0.5"
redmail = "^0.6.0"
Expand Down
187 changes: 104 additions & 83 deletions src/palace/manager/api/odl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
ODLLibrarySettings,
ODLSettings,
)
from palace.manager.core.metadata_layer import FormatData, TimestampData
from palace.manager.core.metadata_layer import FormatData, LicenseData, TimestampData
from palace.manager.core.monitor import CollectionMonitor
from palace.manager.core.opds2_import import (
OPDS2Importer,
Expand All @@ -36,7 +36,11 @@
)
from palace.manager.sqlalchemy.model.collection import Collection
from palace.manager.sqlalchemy.model.edition import Edition
from palace.manager.sqlalchemy.model.licensing import LicensePool, RightsStatus
from palace.manager.sqlalchemy.model.licensing import (
LicensePool,
LicenseStatus,
RightsStatus,
)
from palace.manager.sqlalchemy.model.patron import Hold
from palace.manager.sqlalchemy.model.resource import HttpResponseTuple
from palace.manager.util import first_or_default
Expand Down Expand Up @@ -213,103 +217,120 @@
metadata = super()._extract_publication_metadata(
feed, publication, data_source_name
)

if not publication.licenses:
# This title is an open-access title, no need to process licenses.
return metadata

formats = []
licenses = []
medium = None

skipped_license_formats = set(self.settings.skipped_license_formats)
publication_availability = self._extract_availability(
publication.metadata.availability
)

if publication.licenses:
for odl_license in publication.licenses:
identifier = odl_license.metadata.identifier
checkout_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
)
if checkout_link:
checkout_link = checkout_link.href
for odl_license in publication.licenses:
identifier = odl_license.metadata.identifier

license_info_document_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)
)
if license_info_document_link:
license_info_document_link = license_info_document_link.href
checkout_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
)
if checkout_link:
checkout_link = checkout_link.href

expires = (
to_utc(odl_license.metadata.terms.expires)
if odl_license.metadata.terms
else None
license_info_document_link = first_or_default(
odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)
)
if license_info_document_link:
license_info_document_link = license_info_document_link.href

expires = (
to_utc(odl_license.metadata.terms.expires)
if odl_license.metadata.terms
else None
)
concurrency = (
int(odl_license.metadata.terms.concurrency)
if odl_license.metadata.terms
else None
)

if not license_info_document_link:
parsed_license = None

Check warning on line 261 in src/palace/manager/api/odl2.py

View check run for this annotation

Codecov / codecov/patch

src/palace/manager/api/odl2.py#L261

Added line #L261 was not covered by tests
elif (
not self._extract_availability(odl_license.metadata.availability)
or not publication_availability
):
# No need to fetch the license document, we already know that this title is not available.
parsed_license = LicenseData(
identifier=identifier,
checkout_url=None,
status_url=license_info_document_link,
status=LicenseStatus.unavailable,
checkouts_available=0,
)
concurrency = (
int(odl_license.metadata.terms.concurrency)
if odl_license.metadata.terms
else None
else:
parsed_license = self.get_license_data(
license_info_document_link,
checkout_link,
identifier,
expires,
concurrency,
self.http_get,
)

if not license_info_document_link:
parsed_license = None
if parsed_license is not None:
licenses.append(parsed_license)

license_formats = set(odl_license.metadata.formats)
for license_format in license_formats:
if (
skipped_license_formats
and license_format in skipped_license_formats
):
continue

if not medium:
medium = Edition.medium_from_media_type(license_format)

drm_schemes: list[str | None]
if license_format in self.LICENSE_FORMATS:
# Special case to handle DeMarque audiobooks which include the protection
# in the content type. When we see a license format of
# application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction
# it means that this audiobook title is available through the DeMarque streaming manifest
# endpoint.
drm_schemes = [
self.LICENSE_FORMATS[license_format][self.DRM_SCHEME]
]
license_format = self.LICENSE_FORMATS[license_format][
self.CONTENT_TYPE
]
else:
parsed_license = self.get_license_data(
license_info_document_link,
checkout_link,
identifier,
expires,
concurrency,
self.http_get,
drm_schemes = (
odl_license.metadata.protection.formats
if odl_license.metadata.protection
else []
)

if parsed_license is not None:
licenses.append(parsed_license)

license_formats = set(odl_license.metadata.formats)
for license_format in license_formats:
if (
skipped_license_formats
and license_format in skipped_license_formats
):
continue

if not medium:
medium = Edition.medium_from_media_type(license_format)

drm_schemes: list[str | None]
if license_format in self.LICENSE_FORMATS:
# Special case to handle DeMarque audiobooks which include the protection
# in the content type. When we see a license format of
# application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction
# it means that this audiobook title is available through the DeMarque streaming manifest
# endpoint.
drm_schemes = [
self.LICENSE_FORMATS[license_format][self.DRM_SCHEME]
]
license_format = self.LICENSE_FORMATS[license_format][
self.CONTENT_TYPE
]
else:
drm_schemes = (
odl_license.metadata.protection.formats
if odl_license.metadata.protection
else []
)

for drm_scheme in drm_schemes or [None]:
formats.append(
FormatData(
content_type=license_format,
drm_scheme=drm_scheme,
rights_uri=RightsStatus.IN_COPYRIGHT,
)
for drm_scheme in drm_schemes or [None]:
formats.append(
FormatData(
content_type=license_format,
drm_scheme=drm_scheme,
rights_uri=RightsStatus.IN_COPYRIGHT,
)
)

# If we don't have any licenses, then this title is an open-access title.
# So we don't change the circulation data.
if len(licenses) != 0:
metadata.circulation.licenses = licenses
metadata.circulation.licenses_owned = None
metadata.circulation.licenses_available = None
metadata.circulation.licenses_reserved = None
metadata.circulation.patrons_in_hold_queue = None
metadata.circulation.formats.extend(formats)
metadata.medium = medium
metadata.circulation.licenses = licenses
metadata.circulation.licenses_owned = None
metadata.circulation.licenses_available = None
metadata.circulation.licenses_reserved = None
metadata.circulation.patrons_in_hold_queue = None
metadata.circulation.formats.extend(formats)
metadata.medium = medium

return metadata

Expand Down
Loading