From 50fc11cdec3c938bbd0b7aa4b1855569ce71e3c4 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Wed, 6 Nov 2024 13:33:58 -0400 Subject: [PATCH] Replace webpub manifest parser with pydantic --- bin/opds2_import_monitor | 8 +- bin/opds2_odl_import_monitor | 4 - bin/opds2_odl_schema_validate | 4 - bin/opds2_reaper_monitor | 9 +- bin/opds2_schema_validate | 6 +- poetry.lock | 75 +-- pyproject.toml | 3 +- src/palace/manager/api/odl/importer.py | 187 +++---- src/palace/manager/core/metadata_layer.py | 2 +- src/palace/manager/core/opds2_import.py | 461 +++++++----------- src/palace/manager/core/opds_schema.py | 183 ++----- .../schema_acquisition-object.schema.json | 20 - .../schema_feed-metadata.schema.json | 52 -- .../drafts.opds.io/schema_feed.schema.json | 155 ------ .../schema_properties.schema.json | 123 ----- .../schema_publication.schema.json | 82 ---- ...fest_schema_contributor-object.schema.json | 54 -- ...ub-manifest_schema_contributor.schema.json | 26 - ...rimental_presentation_metadata.schema.json | 36 -- ...mental_presentation_properties.schema.json | 31 -- ...a_extensions_divina_properties.schema.json | 13 - ...tensions_encryption_properties.schema.json | 40 -- ...chema_extensions_epub_metadata.schema.json | 21 - ...ema_extensions_epub_properties.schema.json | 32 -- ...b-manifest_schema_language-map.schema.json | 20 - .../webpub-manifest_schema_link.schema.json | 130 ----- ...ebpub-manifest_schema_metadata.schema.json | 158 ------ ...manifest_schema_subject-object.schema.json | 45 -- ...webpub-manifest_schema_subject.schema.json | 26 - .../opds2_schema/odl-feed.schema.json | 146 ------ .../opds2_schema/odl-licenses.schema.json | 82 ---- .../opds2_schema/odl-publication.schema.json | 63 --- src/palace/manager/util/__init__.py | 18 +- tests/manager/api/odl/test_importer.py | 68 +-- tests/manager/core/test_opds2_import.py | 39 +- tests/manager/core/test_opds_validate.py | 11 +- 36 files changed, 326 insertions(+), 2107 deletions(-) delete mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_acquisition-object.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_properties.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_experimental_presentation_metadata.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_experimental_presentation_properties.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_extensions_divina_properties.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_extensions_encryption_properties.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_extensions_epub_metadata.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_extensions_epub_properties.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_language-map.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_link.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_metadata.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_subject-object.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_subject.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/odl-feed.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/odl-licenses.schema.json delete mode 100644 src/palace/manager/resources/opds2_schema/odl-publication.schema.json diff --git a/bin/opds2_import_monitor b/bin/opds2_import_monitor index 1bbcc36b0b..a66e8c020c 100755 --- a/bin/opds2_import_monitor +++ b/bin/opds2_import_monitor @@ -1,19 +1,13 @@ #!/usr/bin/env python """Update the circulation manager server with new books from OPDS 2.0 import collections.""" -from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory -from palace.manager.core.opds2_import import ( - OPDS2Importer, - OPDS2ImportMonitor, - RWPMManifestParser, -) +from palace.manager.core.opds2_import import OPDS2Importer, OPDS2ImportMonitor from palace.manager.scripts.opds_import import OPDSImportScript import_script = OPDSImportScript( importer_class=OPDS2Importer, monitor_class=OPDS2ImportMonitor, protocol=OPDS2Importer.NAME, - parser=RWPMManifestParser(OPDS2FeedParserFactory()), ) import_script.run() diff --git a/bin/opds2_odl_import_monitor b/bin/opds2_odl_import_monitor index a43955c8ac..d7e0769ba9 100755 --- a/bin/opds2_odl_import_monitor +++ b/bin/opds2_odl_import_monitor @@ -3,17 +3,13 @@ OPDS 2.x + ODL collections.""" -from webpub_manifest_parser.odl import ODLFeedParserFactory - from palace.manager.api.odl.importer import ( OPDS2WithODLImporter, OPDS2WithODLImportMonitor, ) -from palace.manager.core.opds2_import import RWPMManifestParser from palace.manager.scripts.monitor import RunCollectionMonitorScript RunCollectionMonitorScript( OPDS2WithODLImportMonitor, import_class=OPDS2WithODLImporter, - parser=RWPMManifestParser(ODLFeedParserFactory()), ).run() diff --git a/bin/opds2_odl_schema_validate b/bin/opds2_odl_schema_validate index 28ab269d3c..29bf822760 100755 --- a/bin/opds2_odl_schema_validate +++ b/bin/opds2_odl_schema_validate @@ -3,17 +3,13 @@ OPDS import collections.""" -from webpub_manifest_parser.odl import ODLFeedParserFactory - from palace.manager.api.odl.importer import OPDS2WithODLImporter -from palace.manager.core.opds2_import import RWPMManifestParser from palace.manager.core.opds_schema import OPDS2WithODLSchemaValidation from palace.manager.scripts.monitor import RunCollectionMonitorScript import_script = RunCollectionMonitorScript( OPDS2WithODLSchemaValidation, import_class=OPDS2WithODLImporter, - parser=RWPMManifestParser(ODLFeedParserFactory()), ) import_script.run() diff --git a/bin/opds2_reaper_monitor b/bin/opds2_reaper_monitor index dfc73f8c75..bdff395115 100755 --- a/bin/opds2_reaper_monitor +++ b/bin/opds2_reaper_monitor @@ -6,16 +6,10 @@ from collections.abc import Generator from typing import Any, cast from sqlalchemy.orm import raiseload -from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory from palace.manager.core.coverage import CoverageFailure from palace.manager.core.metadata_layer import TimestampData -from palace.manager.core.opds2_import import ( - OPDS2API, - OPDS2Importer, - OPDS2ImportMonitor, - RWPMManifestParser, -) +from palace.manager.core.opds2_import import OPDS2API, OPDS2Importer, OPDS2ImportMonitor from palace.manager.scripts.input import CollectionInputScript from palace.manager.sqlalchemy.model.collection import Collection from palace.manager.sqlalchemy.model.edition import Edition @@ -28,7 +22,6 @@ def main(): importer_class=OPDS2Importer, monitor_class=OPDS2ReaperMonitor, protocol=OPDS2Importer.NAME, - parser=RWPMManifestParser(OPDS2FeedParserFactory()), ) reaper_script.run() diff --git a/bin/opds2_schema_validate b/bin/opds2_schema_validate index 117118bdb7..0e1936e333 100755 --- a/bin/opds2_schema_validate +++ b/bin/opds2_schema_validate @@ -2,10 +2,7 @@ """Update the circulation manager server with new books from OPDS import collections.""" - -from webpub_manifest_parser.opds2 import OPDS2FeedParserFactory - -from palace.manager.core.opds2_import import OPDS2Importer, RWPMManifestParser +from palace.manager.core.opds2_import import OPDS2Importer from palace.manager.core.opds_schema import OPDS2SchemaValidation from palace.manager.scripts.opds_import import OPDSImportScript @@ -13,7 +10,6 @@ import_script = OPDSImportScript( importer_class=OPDS2Importer, monitor_class=OPDS2SchemaValidation, protocol=OPDS2Importer.NAME, - parser=RWPMManifestParser(OPDS2FeedParserFactory()), ) import_script.run() diff --git a/poetry.lock b/poetry.lock index f12fa29bbb..1f428d3215 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2923,27 +2923,6 @@ files = [ {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] -[[package]] -name = "palace-webpub-manifest-parser" -version = "4.0.0" -description = "A parser for the Readium Web Publication Manifest, OPDS 2.0 and ODL formats." -optional = false -python-versions = "<4,>=3.8" -files = [ - {file = "palace_webpub_manifest_parser-4.0.0-py3-none-any.whl", hash = "sha256:8b7f98c259cbd63d4515da80bdb9c387a7ef109162f38de1935bd5cba5aad345"}, - {file = "palace_webpub_manifest_parser-4.0.0.tar.gz", hash = "sha256:fb24ac59682e0ca7272fa55a04f905eb3f10a7c1948a3ad260a1e2eb9dacd0ac"}, -] - -[package.dependencies] -jsonschema = ">=4.19,<5.0" -multipledispatch = ">=1.0,<2.0" -pyrsistent = ">=0.20,<0.21" -python-dateutil = ">=2.8,<3.0" -pytz = ">=2024.1,<2025.0" -requests = ">=2.27,<3.0" -rfc3987 = ">=1.3,<2.0" -uritemplate = ">=4.1,<5.0" - [[package]] name = "pillow" version = "11.0.0" @@ -3700,47 +3679,6 @@ tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} docs = ["furo (>=2024.8.6)", "sphinx-autodoc-typehints (>=2.4.1)"] testing = ["covdefaults (>=2.3)", "pytest (>=8.3.3)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "setuptools (>=75.1)"] -[[package]] -name = "pyrsistent" -version = "0.20.0" -description = "Persistent/Functional/Immutable data structures" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyrsistent-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c3aba3e01235221e5b229a6c05f585f344734bd1ad42a8ac51493d74722bbce"}, - {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1beb78af5423b879edaf23c5591ff292cf7c33979734c99aa66d5914ead880f"}, - {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cc459636983764e692b9eba7144cdd54fdec23ccdb1e8ba392a63666c60c34"}, - {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5ac696f02b3fc01a710427585c855f65cd9c640e14f52abe52020722bb4906b"}, - {file = "pyrsistent-0.20.0-cp310-cp310-win32.whl", hash = "sha256:0724c506cd8b63c69c7f883cc233aac948c1ea946ea95996ad8b1380c25e1d3f"}, - {file = "pyrsistent-0.20.0-cp310-cp310-win_amd64.whl", hash = "sha256:8441cf9616d642c475684d6cf2520dd24812e996ba9af15e606df5f6fd9d04a7"}, - {file = "pyrsistent-0.20.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0f3b1bcaa1f0629c978b355a7c37acd58907390149b7311b5db1b37648eb6958"}, - {file = "pyrsistent-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdd7ef1ea7a491ae70d826b6cc64868de09a1d5ff9ef8d574250d0940e275b8"}, - {file = "pyrsistent-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cae40a9e3ce178415040a0383f00e8d68b569e97f31928a3a8ad37e3fde6df6a"}, - {file = "pyrsistent-0.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6288b3fa6622ad8a91e6eb759cfc48ff3089e7c17fb1d4c59a919769314af224"}, - {file = "pyrsistent-0.20.0-cp311-cp311-win32.whl", hash = "sha256:7d29c23bdf6e5438c755b941cef867ec2a4a172ceb9f50553b6ed70d50dfd656"}, - {file = "pyrsistent-0.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:59a89bccd615551391f3237e00006a26bcf98a4d18623a19909a2c48b8e986ee"}, - {file = "pyrsistent-0.20.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:09848306523a3aba463c4b49493a760e7a6ca52e4826aa100ee99d8d39b7ad1e"}, - {file = "pyrsistent-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a14798c3005ec892bbada26485c2eea3b54109cb2533713e355c806891f63c5e"}, - {file = "pyrsistent-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b14decb628fac50db5e02ee5a35a9c0772d20277824cfe845c8a8b717c15daa3"}, - {file = "pyrsistent-0.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e2c116cc804d9b09ce9814d17df5edf1df0c624aba3b43bc1ad90411487036d"}, - {file = "pyrsistent-0.20.0-cp312-cp312-win32.whl", hash = "sha256:e78d0c7c1e99a4a45c99143900ea0546025e41bb59ebc10182e947cf1ece9174"}, - {file = "pyrsistent-0.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:4021a7f963d88ccd15b523787d18ed5e5269ce57aa4037146a2377ff607ae87d"}, - {file = "pyrsistent-0.20.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:79ed12ba79935adaac1664fd7e0e585a22caa539dfc9b7c7c6d5ebf91fb89054"}, - {file = "pyrsistent-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f920385a11207dc372a028b3f1e1038bb244b3ec38d448e6d8e43c6b3ba20e98"}, - {file = "pyrsistent-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f5c2d012671b7391803263419e31b5c7c21e7c95c8760d7fc35602353dee714"}, - {file = "pyrsistent-0.20.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef3992833fbd686ee783590639f4b8343a57f1f75de8633749d984dc0eb16c86"}, - {file = "pyrsistent-0.20.0-cp38-cp38-win32.whl", hash = "sha256:881bbea27bbd32d37eb24dd320a5e745a2a5b092a17f6debc1349252fac85423"}, - {file = "pyrsistent-0.20.0-cp38-cp38-win_amd64.whl", hash = "sha256:6d270ec9dd33cdb13f4d62c95c1a5a50e6b7cdd86302b494217137f760495b9d"}, - {file = "pyrsistent-0.20.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ca52d1ceae015859d16aded12584c59eb3825f7b50c6cfd621d4231a6cc624ce"}, - {file = "pyrsistent-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b318ca24db0f0518630e8b6f3831e9cba78f099ed5c1d65ffe3e023003043ba0"}, - {file = "pyrsistent-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed2c3216a605dc9a6ea50c7e84c82906e3684c4e80d2908208f662a6cbf9022"}, - {file = "pyrsistent-0.20.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e14c95c16211d166f59c6611533d0dacce2e25de0f76e4c140fde250997b3ca"}, - {file = "pyrsistent-0.20.0-cp39-cp39-win32.whl", hash = "sha256:f058a615031eea4ef94ead6456f5ec2026c19fb5bd6bfe86e9665c4158cf802f"}, - {file = "pyrsistent-0.20.0-cp39-cp39-win_amd64.whl", hash = "sha256:58b8f6366e152092194ae68fefe18b9f0b4f89227dfd86a07770c3d86097aebf"}, - {file = "pyrsistent-0.20.0-py3-none-any.whl", hash = "sha256:c55acc4733aad6560a7f5f818466631f07efc001fd023f34a6c203f8b6df0f0b"}, - {file = "pyrsistent-0.20.0.tar.gz", hash = "sha256:4c48f78f62ab596c679086084d0dd13254ae4f3d6c72a83ffdf5ebdef8f265a4"}, -] - [[package]] name = "pyspellchecker" version = "0.8.1" @@ -4280,17 +4218,6 @@ requests = ">=2.22,<3" [package.extras] fixture = ["fixtures"] -[[package]] -name = "rfc3987" -version = "1.3.8" -description = "Parsing and validation of URIs (RFC 3986) and IRIs (RFC 3987)" -optional = false -python-versions = "*" -files = [ - {file = "rfc3987-1.3.8-py2.py3-none-any.whl", hash = "sha256:10702b1e51e5658843460b189b185c0366d2cf4cff716f13111b0ea9fd2dce53"}, - {file = "rfc3987-1.3.8.tar.gz", hash = "sha256:d3c4d257a560d544e9826b38bc81db676890c79ab9d7ac92b39c7a253d5ca733"}, -] - [[package]] name = "rpds-py" version = "0.18.1" @@ -5194,4 +5121,4 @@ lxml = ">=3.8" [metadata] lock-version = "2.0" python-versions = ">=3.10,<4" -content-hash = "9f87d9807010e25860a8ea6291c8465d93a4969069ef4af12db7ef1dfa929519" +content-hash = "5913de8becf85e5e85ac1db81930fcfdfeb260b0bc9c38b62f5498580c98c951" diff --git a/pyproject.toml b/pyproject.toml index f5fa6992a9..0a5b6bf7ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ module = [ "palace.manager.api.opds_for_distributors", "palace.manager.core.opds2_import", "palace.manager.core.opds_import", + "palace.manager.core.opds_schema", "palace.manager.core.selftest", "palace.manager.feed.*", "palace.manager.integration.*", @@ -197,7 +198,6 @@ module = [ "unicodecsv", "uwsgi", "wcag_contrast_ratio", - "webpub_manifest_parser.*", ] [tool.poetry] @@ -242,7 +242,6 @@ multipledispatch = "^1.0" nameparser = "^1.1" # nameparser is for author name manipulations opensearch-dsl = "~1.0" opensearch-py = "~1.1" -palace-webpub-manifest-parser = "^4.0.0" pillow = "^11.0" pycountry = "^24.6.1" pycryptodome = "^3.18" diff --git a/src/palace/manager/api/odl/importer.py b/src/palace/manager/api/odl/importer.py index 74906c7d33..8ab9bbeb5d 100644 --- a/src/palace/manager/api/odl/importer.py +++ b/src/palace/manager/api/odl/importer.py @@ -1,26 +1,26 @@ from __future__ import annotations import datetime -from collections.abc import Callable, Mapping -from typing import TYPE_CHECKING, Any, cast +from collections.abc import Callable, Mapping, Sequence +from functools import cached_property +from typing import Any, cast from urllib.parse import urljoin -import dateutil +from pydantic import TypeAdapter, ValidationError from requests import Response from sqlalchemy.orm import Session -from webpub_manifest_parser.odl import ODLFeedParserFactory -from webpub_manifest_parser.opds2.registry import OPDS2LinkRelationsRegistry from palace.manager.api.odl.api import OPDS2WithODLApi from palace.manager.api.odl.auth import OdlAuthenticatedRequest from palace.manager.api.odl.constants import FEEDBOOKS_AUDIO from palace.manager.api.odl.settings import OPDS2AuthType, OPDS2WithODLSettings from palace.manager.core.metadata_layer import FormatData, LicenseData, Metadata -from palace.manager.core.opds2_import import ( - OPDS2Importer, - OPDS2ImportMonitor, - RWPMManifestParser, -) +from palace.manager.core.opds2_import import OPDS2Importer, OPDS2ImportMonitor +from palace.manager.opds import opds2, rwpm +from palace.manager.opds.lcp.status import LoanStatus +from palace.manager.opds.odl import odl +from palace.manager.opds.odl.info import LicenseInfo +from palace.manager.opds.odl.odl import Opds2OrOpds2WithOdlPublication from palace.manager.sqlalchemy.constants import MediaTypes from palace.manager.sqlalchemy.model.collection import Collection from palace.manager.sqlalchemy.model.edition import Edition @@ -30,13 +30,8 @@ RightsStatus, ) from palace.manager.sqlalchemy.model.resource import Hyperlink -from palace.manager.util import first_or_default -from palace.manager.util.datetime_helpers import to_utc from palace.manager.util.http import HTTP -if TYPE_CHECKING: - from webpub_manifest_parser.opds2.ast import OPDS2Feed, OPDS2Publication - class OPDS2WithODLImporter(OPDS2Importer): """Import information and formats from an ODL feed. @@ -63,7 +58,6 @@ def __init__( self, db: Session, collection: Collection, - parser: RWPMManifestParser | None = None, data_source_name: str | None = None, http_get: Callable[..., Response] | None = None, ): @@ -77,9 +71,6 @@ def __init__( If this is None, no LicensePools will be created -- only Editions. :type collection: Collection - :param parser: Feed parser - :type parser: RWPMManifestParser - :param data_source_name: Name of the source of this OPDS feed. All Editions created by this import will be associated with this DataSource. If there is no DataSource with this name, one will be created. @@ -90,7 +81,6 @@ def __init__( super().__init__( db, collection, - parser if parser else RWPMManifestParser(ODLFeedParserFactory()), data_source_name, ) @@ -134,9 +124,9 @@ def create_format_data(format: FormatData) -> FormatData: def _extract_publication_metadata( self, - feed: OPDS2Feed, - publication: OPDS2Publication, + publication: opds2.BasePublication, data_source_name: str | None, + feed_self_url: str, ) -> Metadata: """Extract a Metadata object from webpub-manifest-parser's publication. @@ -147,11 +137,11 @@ def _extract_publication_metadata( :return: Publication's metadata """ metadata = super()._extract_publication_metadata( - feed, publication, data_source_name + publication, data_source_name, feed_self_url ) - if not publication.licenses: - # This is an unlimited-access title with no license information. Nothing to do. + if not isinstance(publication, odl.Publication): + # This is a generic OPDS2 publication, not an ODL publication. return self._process_unlimited_access_title(metadata) formats = [] @@ -159,52 +149,39 @@ def _extract_publication_metadata( medium = None skipped_license_formats = set(self.settings.skipped_license_formats) # type: ignore[attr-defined] - publication_availability = self._extract_availability( - publication.metadata.availability - ) + publication_availability = publication.metadata.availability.available for odl_license in publication.licenses: identifier = odl_license.metadata.identifier - checkout_link = first_or_default( - odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) - ) - if checkout_link: - checkout_link = checkout_link.href + checkout_link = odl_license.links.get( + rel=opds2.AcquisitionLinkRelations.borrow, + type=LoanStatus.content_type(), + raising=True, + ).href - license_info_document_link = first_or_default( - odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) - ) - if license_info_document_link: - license_info_document_link = license_info_document_link.href + license_info_document_link = odl_license.links.get( + rel=rwpm.LinkRelations.self, + type=LicenseInfo.content_type(), + raising=True, + ).href - expires = ( - to_utc(odl_license.metadata.terms.expires) - if odl_license.metadata.terms - else None - ) - concurrency = ( - int(odl_license.metadata.terms.concurrency) - if odl_license.metadata.terms - else None - ) + expires = odl_license.metadata.terms.expires_datetime + concurrency = odl_license.metadata.terms.concurrency - if not license_info_document_link: - parsed_license = None - elif ( - not self._extract_availability(odl_license.metadata.availability) - or not publication_availability - ): - # No need to fetch the license document, we already know that this title is not available. - parsed_license = LicenseData( + parsed_license = ( + LicenseData( identifier=identifier, checkout_url=None, status_url=license_info_document_link, status=LicenseStatus.unavailable, checkouts_available=0, ) - else: - parsed_license = self.get_license_data( + if ( + not odl_license.metadata.availability.available + or not publication_availability + ) + else self.get_license_data( license_info_document_link, checkout_link, identifier, @@ -212,6 +189,7 @@ def _extract_publication_metadata( concurrency, self.http_get, ) + ) if parsed_license is not None: licenses.append(parsed_license) @@ -227,7 +205,7 @@ def _extract_publication_metadata( if not medium: medium = Edition.medium_from_media_type(license_format) - drm_schemes: list[str | None] + drm_schemes: Sequence[str | None] if license_format in self.LICENSE_FORMATS: # Special case to handle DeMarque audiobooks which include the protection # in the content type. When we see a license format of @@ -269,11 +247,10 @@ def _extract_publication_metadata( @classmethod def fetch_license_info( cls, document_link: str, do_get: Callable[..., Response] - ) -> dict[str, Any] | None: + ) -> bytes | None: resp = do_get(document_link, headers={}) if resp.status_code in (200, 201): - license_info_document = resp.json() - return license_info_document # type: ignore[no-any-return] + return resp.content else: cls.logger().warning( f"License Info Document is not available. " @@ -284,7 +261,7 @@ def fetch_license_info( @classmethod def parse_license_info( cls, - license_info_document: dict[str, Any], + license_info_document: bytes | str | None, license_info_link: str, checkout_link: str | None, ) -> LicenseData | None: @@ -299,73 +276,27 @@ def parse_license_info( :return: LicenseData if all the license's attributes are correct, None, otherwise """ - identifier = license_info_document.get("identifier") - document_status = license_info_document.get("status") - document_checkouts = license_info_document.get("checkouts", {}) - document_left = document_checkouts.get("left") - document_available = document_checkouts.get("available") - document_terms = license_info_document.get("terms", {}) - document_expires = document_terms.get("expires") - document_concurrency = document_terms.get("concurrency") - document_format = license_info_document.get("format") - - if identifier is None: - cls.logger().error("License info document has no identifier.") + if license_info_document is None: return None - expires = None - if document_expires is not None: - expires = dateutil.parser.parse(document_expires) - expires = to_utc(expires) - - if document_status is not None: - status = LicenseStatus.get(document_status) - if status.value != document_status: - cls.logger().warning( - f"Identifier # {identifier} unknown status value " - f"{document_status} defaulting to {status.value}." - ) - else: - status = LicenseStatus.unavailable - cls.logger().warning( - f"Identifier # {identifier} license info document does not have " - f"required key 'status'." - ) - - if document_available is not None: - available = int(document_available) - else: - available = 0 - cls.logger().warning( - f"Identifier # {identifier} license info document does not have " - f"required key 'checkouts.available'." + try: + document = LicenseInfo.model_validate_json(license_info_document) + except ValidationError as e: + cls.logger().error( + f"License Info Document at {license_info_link} is not valid. {e}" ) - - left = None - if document_left is not None: - left = int(document_left) - - concurrency = None - if document_concurrency is not None: - concurrency = int(document_concurrency) - - content_types = None - if document_format is not None: - if isinstance(document_format, str): - content_types = [document_format] - elif isinstance(document_format, list): - content_types = document_format + return None return LicenseData( - identifier=identifier, + identifier=document.identifier, checkout_url=checkout_link, status_url=license_info_link, - expires=expires, - checkouts_left=left, - checkouts_available=available, - status=status, - terms_concurrency=concurrency, - content_types=content_types, + expires=document.terms.expires_datetime, + checkouts_left=document.checkouts.left, + checkouts_available=document.checkouts.available, + status=document.status, + terms_concurrency=document.terms.concurrency, + content_types=list(document.formats), ) @classmethod @@ -422,6 +353,16 @@ def get_license_data( return parsed_license + @cached_property + def _publication_type_adapter(self) -> TypeAdapter[Opds2OrOpds2WithOdlPublication]: + return TypeAdapter(Opds2OrOpds2WithOdlPublication) + + def _get_publication( + self, + publication: dict[str, Any], + ) -> opds2.Publication | odl.Publication: + return self._publication_type_adapter.validate_python(publication) + class OPDS2WithODLImportMonitor(OdlAuthenticatedRequest, OPDS2ImportMonitor): """Import information from an ODL feed.""" diff --git a/src/palace/manager/core/metadata_layer.py b/src/palace/manager/core/metadata_layer.py index c9b9ca7dba..b42285dfe5 100644 --- a/src/palace/manager/core/metadata_layer.py +++ b/src/palace/manager/core/metadata_layer.py @@ -540,7 +540,7 @@ def __init__( self.terms_concurrency = terms_concurrency self.content_types = content_types - def add_to_pool(self, db: Session, pool: LicensePool): + def add_to_pool(self, db: Session, pool: LicensePool) -> License: license_obj, _ = get_one_or_create( db, License, diff --git a/src/palace/manager/core/opds2_import.py b/src/palace/manager/core/opds2_import.py index 162b5a4396..70572cd5fc 100644 --- a/src/palace/manager/core/opds2_import.py +++ b/src/palace/manager/core/opds2_import.py @@ -1,27 +1,16 @@ from __future__ import annotations -import logging -from collections.abc import Iterable, Mapping, Sequence +from collections.abc import Mapping, Sequence from datetime import datetime from functools import cached_property -from io import BytesIO, StringIO -from typing import TYPE_CHECKING, Any +from typing import Any from urllib.parse import urljoin, urlparse -import webpub_manifest_parser.opds2.ast as opds2_ast from flask_babel import lazy_gettext as _ +from pydantic import ValidationError from requests import Response from sqlalchemy.orm import Session from uritemplate import URITemplate -from webpub_manifest_parser.core import ManifestParserFactory, ManifestParserResult -from webpub_manifest_parser.core.analyzer import NodeFinder -from webpub_manifest_parser.core.ast import Link, Manifestlike -from webpub_manifest_parser.errors import BaseError -from webpub_manifest_parser.opds2.registry import ( - OPDS2LinkRelationsRegistry, - OPDS2MediaTypesRegistry, -) -from webpub_manifest_parser.utils import encode, first_or_default from palace.manager.api.circulation import RedirectFulfillment from palace.manager.api.circulation_exceptions import CannotFulfill @@ -47,6 +36,9 @@ ConfigurationFormItemType, FormField, ) +from palace.manager.opds import opds2, rwpm +from palace.manager.opds.opds2 import AcquisitionObject +from palace.manager.opds.types.link import CompactCollection from palace.manager.sqlalchemy.constants import ( IdentifierType, LinkRelations, @@ -66,73 +58,14 @@ ) from palace.manager.sqlalchemy.model.patron import Patron from palace.manager.sqlalchemy.model.resource import Hyperlink, Representation -from palace.manager.util.datetime_helpers import utc_now +from palace.manager.util import first_or_default from palace.manager.util.http import HTTP, BadResponseException -from palace.manager.util.opds_writer import OPDSFeed - -if TYPE_CHECKING: - from webpub_manifest_parser.core import ast as core_ast - - -class RWPMManifestParser: - def __init__(self, manifest_parser_factory: ManifestParserFactory): - """Initialize a new instance of RWPMManifestParser class. - - :param manifest_parser_factory: Factory creating a new instance - of a RWPM-compatible parser (RWPM, OPDS 2.x, ODL 2.x, etc.) - """ - if not isinstance(manifest_parser_factory, ManifestParserFactory): - raise ValueError( - "Argument 'manifest_parser_factory' must be an instance of {}".format( - ManifestParserFactory - ) - ) - - self._manifest_parser_factory = manifest_parser_factory - - def parse_manifest( - self, manifest: str | dict[str, Any] | Manifestlike - ) -> ManifestParserResult: - """Parse the feed into an RPWM-like AST object. - - :param manifest: RWPM-like manifest - :return: Parsed RWPM-like manifest - """ - result = None - input_stream: BytesIO | StringIO - - try: - if isinstance(manifest, bytes): - input_stream = BytesIO(manifest) - parser = self._manifest_parser_factory.create() - result = parser.parse_stream(input_stream) - elif isinstance(manifest, str): - input_stream = StringIO(manifest) - parser = self._manifest_parser_factory.create() - result = parser.parse_stream(input_stream) - elif isinstance(manifest, dict): - parser = self._manifest_parser_factory.create() - result = parser.parse_json(manifest) - elif isinstance(manifest, Manifestlike): - result = ManifestParserResult(manifest) - else: - raise ValueError( - "Argument 'manifest' must be either a string, a dictionary, or an instance of {}".format( - Manifestlike - ) - ) - except BaseError: - logging.exception("Failed to parse the RWPM-like manifest") - - raise - - return result class OPDS2ImporterSettings(OPDSImporterSettings): custom_accept_header: str = FormField( default="{}, {};q=0.9, */*;q=0.1".format( - OPDS2MediaTypesRegistry.OPDS_FEED.key, "application/json" + opds2.PublicationFeed.content_type(), "application/json" ), form=ConfigurationFormItem( label=_("Custom accept header"), @@ -271,7 +204,6 @@ def __init__( self, db: Session, collection: Collection, - parser: RWPMManifestParser, data_source_name: str | None = None, ): """Initialize a new instance of OPDS2Importer class. @@ -281,7 +213,6 @@ def __init__( :param collection: Circulation Manager's collection. LicensePools created by this OPDS2Import class will be associated with the given Collection. If this is None, no LicensePools will be created -- only Editions. - :param parser: Feed parser :param data_source_name: Name of the source of this OPDS feed. All Editions created by this import will be associated with this DataSource. If there is no DataSource with this name, one will be created. @@ -289,7 +220,6 @@ def __init__( This is only for use when you are importing OPDS metadata without any particular Collection in mind. """ super().__init__(db, collection, data_source_name) - self._parser = parser self.ignored_identifier_types = self.settings.ignored_identifier_types def _is_identifier_allowed(self, identifier: Identifier) -> bool: @@ -300,8 +230,8 @@ def _is_identifier_allowed(self, identifier: Identifier) -> bool: """ return identifier.type not in self.ignored_identifier_types - def _extract_subjects(self, subjects: list[core_ast.Subject]) -> list[SubjectData]: - """Extract a list of SubjectData objects from the webpub-manifest-parser's subject. + def _extract_subjects(self, subjects: Sequence[rwpm.Subject]) -> list[SubjectData]: + """Extract a list of SubjectData objects from the rwpm.Subject. :param subjects: Parsed subject object :return: List of subjects metadata @@ -312,33 +242,33 @@ def _extract_subjects(self, subjects: list[core_ast.Subject]) -> list[SubjectDat for subject in subjects: self.log.debug( - f"Started extracting subject metadata from {encode(subject)}" + f"Started extracting subject metadata from {subject.model_dump_json()}" ) scheme = subject.scheme - - subject_type = Subject.by_uri.get(scheme) + subject_type = Subject.by_uri.get(scheme) if scheme is not None else None if not subject_type: # We can't represent this subject because we don't # know its scheme. Just treat it as a tag. subject_type = Subject.TAG subject_metadata = SubjectData( - type=subject_type, identifier=subject.code, name=subject.name, weight=1 + type=subject_type, + identifier=subject.code, + name=str(subject.name), + weight=1, ) subject_metadata_list.append(subject_metadata) self.log.debug( "Finished extracting subject metadata from {}: {}".format( - encode(subject), encode(subject_metadata) + subject.model_dump_json(), subject_metadata ) ) self.log.debug( - "Finished extracting subjects metadata: {}".format( - encode(subject_metadata_list) - ) + f"Finished extracting subjects metadata: {subject_metadata_list}" ) return subject_metadata_list @@ -358,16 +288,11 @@ def _contributor_roles(self) -> Mapping[str, str]: return marc_code_mapping | {role.lower(): role for role in Contributor.Role} def _extract_contributor_roles( - self, roles: Sequence[str] | str | None, default: str + self, roles: Sequence[str], default: str ) -> list[str]: """ Normalize the contributor roles from the OPDS2 feed to our internal representation. """ - if roles is None: - roles = [] - elif isinstance(roles, str): - roles = [roles] - mapped_roles = set() for role in roles: if (lowercased_role := role.lower()) not in self._contributor_roles: @@ -381,10 +306,10 @@ def _extract_contributor_roles( def _extract_contributors( self, - contributors: list[core_ast.Contributor], + contributors: Sequence[rwpm.Contributor], default_role: str, ) -> list[ContributorData]: - """Extract a list of ContributorData objects from the webpub-manifest-parser's contributor. + """Extract a list of ContributorData objects from rwpm.Contributor. :param contributors: Parsed contributor object :param default_role: Default role @@ -396,53 +321,48 @@ def _extract_contributors( for contributor in contributors: self.log.debug( - "Started extracting contributor metadata from {}".format( - encode(contributor) - ) + f"Started extracting contributor metadata from {contributor.model_dump_json()}" ) - if not contributor.name: - self.log.warning( - f"Contributor has no name. Skipping. {encode(contributor)}" - ) - continue + if isinstance(contributor, rwpm.ContributorWithRole): + roles = self._extract_contributor_roles(contributor.roles, default_role) + else: + roles = [default_role] contributor_metadata = ContributorData( sort_name=contributor.sort_as, - display_name=contributor.name, + display_name=str(contributor.name), family_name=None, wikipedia_name=None, - roles=self._extract_contributor_roles(contributor.roles, default_role), + roles=roles, ) self.log.debug( - "Finished extracting contributor metadata from {}: {}".format( - encode(contributor), encode(contributor_metadata) - ) + f"Finished extracting contributor metadata from {contributor.model_dump_json()}: {contributor_metadata}" ) contributor_metadata_list.append(contributor_metadata) self.log.debug( - "Finished extracting contributors metadata: {}".format( - encode(contributor_metadata_list) - ) + f"Finished extracting contributors metadata: {contributor_metadata_list}" ) return contributor_metadata_list def _extract_link( - self, link: Link, feed_self_url: str, default_link_rel: str | None = None + self, link: opds2.Link, feed_self_url: str, default_link_rel: str | None = None ) -> LinkData: - """Extract a LinkData object from webpub-manifest-parser's link. + """Extract a LinkData object from opds2.Link. - :param link: webpub-manifest-parser's link + :param link: link :param feed_self_url: Feed's self URL :param default_link_rel: Default link's relation :return: Link metadata """ - self.log.debug(f"Started extracting link metadata from {encode(link)}") + self.log.debug( + f"Started extracting link metadata from {link.model_dump_json()}" + ) # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one. rights_uri = RightsStatus.rights_uri_from_string("") @@ -463,15 +383,13 @@ def _extract_link( ) self.log.debug( - "Finished extracting link metadata from {}: {}".format( - encode(link), encode(link_metadata) - ) + f"Finished extracting link metadata from {link.model_dump_json()}: {link_metadata}" ) return link_metadata def _extract_description_link( - self, publication: opds2_ast.OPDS2Publication + self, publication: opds2.BasePublication ) -> LinkData | None: """Extract description from the publication object and create a Hyperlink.DESCRIPTION link containing it. @@ -480,7 +398,7 @@ def _extract_description_link( """ self.log.debug( "Started extracting a description link from {}".format( - encode(publication.metadata.description) + publication.metadata.description ) ) @@ -495,14 +413,14 @@ def _extract_description_link( self.log.debug( "Finished extracting a description link from {}: {}".format( - encode(publication.metadata.description), encode(description_link) + publication.metadata.description, description_link ) ) return description_link def _extract_image_links( - self, publication: opds2_ast.OPDS2Publication, feed_self_url: str + self, publication: opds2.BasePublication, feed_self_url: str ) -> list[LinkData]: """Extracts a list of LinkData objects containing information about artwork. @@ -510,9 +428,7 @@ def _extract_image_links( :param feed_self_url: Feed's self URL :return: List of links metadata """ - self.log.debug( - f"Started extracting image links from {encode(publication.images)}" - ) + self.log.debug(f"Started extracting image links from {publication.images}") if not publication.images: return [] @@ -530,7 +446,7 @@ def _extract_image_links( sorted_raw_image_links = list( reversed( sorted( - publication.images.links, + publication.images, key=lambda link: (link.width or 0, link.height or 0), ) ) @@ -554,23 +470,21 @@ def _extract_image_links( image_links.append(cover_link) self.log.debug( - "Finished extracting image links from {}: {}".format( - encode(publication.images), encode(image_links) - ) + f"Finished extracting image links from {publication.images}: {image_links}" ) return image_links def _extract_links( - self, publication: opds2_ast.OPDS2Publication, feed_self_url: str + self, publication: opds2.BasePublication, feed_self_url: str ) -> list[LinkData]: - """Extract a list of LinkData objects from a list of webpub-manifest-parser links. + """Extract a list of LinkData objects from opds2.Publication. :param publication: Publication object :param feed_self_url: Feed's self URL :return: List of links metadata """ - self.log.debug(f"Started extracting links from {encode(publication.links)}") + self.log.debug(f"Started extracting links from {publication.links}") links = [] @@ -586,41 +500,30 @@ def _extract_links( if image_links: links.extend(image_links) - self.log.debug( - "Finished extracting links from {}: {}".format( - encode(publication.links), encode(links) - ) - ) + self.log.debug(f"Finished extracting links from {publication.links}: {links}") return links def _extract_media_types_and_drm_scheme_from_link( - self, link: core_ast.Link - ) -> list[tuple[str, str]]: + self, link: opds2.Link + ) -> list[tuple[str, str | None]]: """Extract information about content's media type and used DRM schema from the link. :param link: Link object :return: 2-tuple containing information about the content's media type and its DRM schema """ self.log.debug( - "Started extracting media types and a DRM scheme from {}".format( - encode(link) - ) + f"Started extracting media types and a DRM scheme from {link.model_dump_json()}" ) - media_types_and_drm_scheme = [] + media_types_and_drm_scheme: list[tuple[str, str | None]] = [] - if ( - link.properties - and link.properties.availability - and link.properties.availability.state - != opds2_ast.OPDS2AvailabilityType.AVAILABLE.value - ): - self.log.info(f"Link unavailable. Skipping. {encode(link)}") + if not link.properties.availability.available: + self.log.info(f"Link unavailable. Skipping. {link.model_dump_json()}") return [] # We need to take into account indirect acquisition links - if link.properties and link.properties.indirect_acquisition: + if link.properties.indirect_acquisition: # We make the assumption that when we have nested indirect acquisition links # that the most deeply nested link is the content type, and the link at the nesting # level above that is the DRM. We discard all other levels of indirection, assuming @@ -630,11 +533,11 @@ def _extract_media_types_and_drm_scheme_from_link( # where the top level link is a OPDS feed and the common case of a single # indirect_acquisition link. for acquisition_object in link.properties.indirect_acquisition: - nested_acquisition = acquisition_object + nested_acquisition: AcquisitionObject | None = acquisition_object nested_types = [link.type] while nested_acquisition: nested_types.append(nested_acquisition.type) - nested_acquisition = first_or_default(nested_acquisition.child) + nested_acquisition = first_or_default(nested_acquisition.children) [drm_type, media_type] = nested_types[-2:] # We then check this returned pair of content types to make sure they match known @@ -657,13 +560,15 @@ def _extract_media_types_and_drm_scheme_from_link( self.log.debug( "Finished extracting media types and a DRM scheme from {}: {}".format( - encode(link), encode(media_types_and_drm_scheme) + link, media_types_and_drm_scheme ) ) return media_types_and_drm_scheme - def _extract_medium_from_links(self, links: core_ast.LinkList) -> str | None: + def _extract_medium_from_links( + self, links: CompactCollection[opds2.Link] + ) -> str | None: """Extract the publication's medium from its links. :param links: List of links @@ -688,7 +593,7 @@ def _extract_medium_from_links(self, links: core_ast.LinkList) -> str | None: @staticmethod def _extract_medium( - publication: opds2_ast.OPDS2Publication, + publication: opds2.BasePublication, default_medium: str | None = Edition.BOOK_MEDIUM, ) -> str | None: """Extract the publication's medium from its metadata. @@ -705,79 +610,41 @@ def _extract_medium( return medium - def _extract_identifier( - self, publication: opds2_ast.OPDS2Publication - ) -> Identifier: + def _extract_identifier(self, publication: opds2.BasePublication) -> Identifier: """Extract the publication's identifier from its metadata. :param publication: Publication object :return: Identifier object """ - return self.parse_identifier(publication.metadata.identifier) # type: ignore[no-any-return] - - @classmethod - def _extract_availability( - cls, availability: opds2_ast.OPDS2AvailabilityInformation | None - ) -> bool: - """Extract the publication's availability from its availability information. - - We default to a publication being available if no availability information is provided or if the provided - availability information is past the time specified in its `until` field. The `since` field on the - availability information is not used, it is assumed to be informational and always in the past if it is - present. This is based on a discussion with the OPDS 2.0 working group. - - TODO: Update our handling of the `since` field based on the resolution of the discussion here: - https://github.com/opds-community/drafts/discussions/63#discussioncomment-9806140 - - :return: Boolean value indicating whether the publication is available. - """ - available = opds2_ast.OPDS2AvailabilityType.AVAILABLE.value - if ( - availability - and availability.state != available - and (not availability.until or availability.until > utc_now()) - ): - return False - - return True + return self.parse_identifier(publication.metadata.identifier) def _extract_publication_metadata( self, - feed: opds2_ast.OPDS2Feed, - publication: opds2_ast.OPDS2Publication, + publication: opds2.BasePublication, data_source_name: str | None, + feed_self_url: str, ) -> Metadata: - """Extract a Metadata object from webpub-manifest-parser's publication. + """Extract a Metadata object from opds2.Publication. :param publication: Feed object :param publication: Publication object :param data_source_name: Data source's name :return: Publication's metadata """ - self.log.debug( - "Started extracting metadata from publication {}".format( - encode(publication) - ) - ) + self.log.debug(f"Started extracting metadata from publication {publication}") - title = publication.metadata.title - - if title == OPDSFeed.NO_TITLE: - title = None - - subtitle = publication.metadata.subtitle + title = str(publication.metadata.title) + subtitle = str(publication.metadata.subtitle) languages = first_or_default(publication.metadata.languages) derived_medium = self._extract_medium_from_links(publication.links) medium = self._extract_medium(publication, derived_medium) - publisher = first_or_default(publication.metadata.publishers) - if publisher: - publisher = publisher.name + first_publisher = first_or_default(publication.metadata.publishers) + publisher = str(first_publisher.name) if first_publisher else None - imprint = first_or_default(publication.metadata.imprints) - if imprint: - imprint = imprint.name + first_imprint = first_or_default(publication.metadata.imprints) + imprint = str(first_imprint.name) if first_imprint else None published = publication.metadata.published subjects = self._extract_subjects(publication.metadata.subjects) @@ -826,9 +693,6 @@ def _extract_publication_metadata( f"Ignoring the time tracking flag for entry {publication.metadata.identifier}" ) - feed_self_url = first_or_default( - feed.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) - ).href links = self._extract_links(publication, feed_self_url) last_opds_update = publication.metadata.modified @@ -848,7 +712,7 @@ def _extract_publication_metadata( # FIXME: It seems that OPDS 2.0 spec doesn't contain information about rights so we use the default one rights_uri = RightsStatus.rights_uri_from_string("") - if self._extract_availability(publication.metadata.availability): + if publication.metadata.availability.available: licenses_owned = LicensePool.UNLIMITED_ACCESS licenses_available = LicensePool.UNLIMITED_ACCESS else: @@ -896,7 +760,7 @@ def _extract_publication_metadata( self.log.debug( "Finished extracting metadata from publication {}: {}".format( - encode(publication), encode(metadata) + publication, metadata ) ) @@ -904,7 +768,7 @@ def _extract_publication_metadata( def _find_formats_in_non_open_access_acquisition_links( self, - ast_link_list: list[core_ast.Link], + ast_link_list: Sequence[opds2.StrictLink], link_data_list: list[LinkData], rights_uri: str, circulation_data: CirculationData, @@ -940,24 +804,14 @@ def _find_formats_in_non_open_access_acquisition_links( return formats - @staticmethod - def _get_publications( - feed: opds2_ast.OPDS2Feed, - ) -> Iterable[opds2_ast.OPDS2Publication]: - """Return all the publications in the feed. - :param feed: OPDS 2.0 feed - :return: An iterable list of publications containing in the feed - """ - if feed.publications: - yield from feed.publications - - if feed.groups: - for group in feed.groups: - if group.publications: - yield from group.publications + def _get_publication( + self, + publication: dict[str, Any], + ) -> opds2.BasePublication: + return opds2.Publication.model_validate(publication) @staticmethod - def _is_acquisition_link(link: core_ast.Link) -> bool: + def _is_acquisition_link(link: opds2.Link) -> bool: """Return a boolean value indicating whether a link can be considered an acquisition link. :param link: Link object @@ -1026,130 +880,141 @@ def _record_coverage_failure( return failure def _record_publication_unrecognizable_identifier( - self, publication: opds2_ast.OPDS2Publication + self, identifier: str | None, title: str | None ) -> None: """Record a publication's unrecognizable identifier, i.e. identifier that has an unknown format and could not be parsed by CM. :param publication: OPDS 2.x publication object """ - original_identifier = publication.metadata.identifier - title = publication.metadata.title - - if original_identifier is None: + if identifier is None: self.log.warning(f"Publication '{title}' does not have an identifier.") else: self.log.warning( - f"Publication # {original_identifier} ('{title}') has an unrecognizable identifier." + f"Publication # {identifier} ('{title}') has an unrecognizable identifier." ) - def extract_next_links(self, feed: str | opds2_ast.OPDS2Feed) -> list[str]: + def _parse_feed(self, feed: str | bytes) -> opds2.PublicationFeedNoValidation: + return opds2.PublicationFeedNoValidation.model_validate_json(feed) + + def extract_next_links(self, feed: str | bytes) -> list[str]: """Extracts "next" links from the feed. :param feed: OPDS 2.0 feed :return: List of "next" links """ - parser_result = self._parser.parse_manifest(feed) - parsed_feed = parser_result.root - - if not parsed_feed: + try: + parsed_feed = self._parse_feed(feed) + except ValidationError: return [] - next_links = parsed_feed.links.get_by_rel(self.NEXT_LINK_RELATION) - next_links = [next_link.href for next_link in next_links] + next_links = [ + next_link.href + for next_link in parsed_feed.links.get_collection( + rel=self.NEXT_LINK_RELATION + ) + ] - return next_links # type: ignore[no-any-return] + return next_links def extract_last_update_dates( - self, feed: str | opds2_ast.OPDS2Feed + self, feed: str | bytes ) -> list[tuple[str | None, datetime | None]]: """Extract last update date of the feed. :param feed: OPDS 2.0 feed :return: A list of 2-tuples containing publication's identifiers and their last modified dates """ - parser_result = self._parser.parse_manifest(feed) - parsed_feed = parser_result.root - - if not parsed_feed: + try: + parsed_feed = self._parse_feed(feed) + + return [ + (publication.metadata.identifier, publication.metadata.modified) + for publication in [ + self._get_publication(publication) + for publication in parsed_feed.publications + ] + ] + except ValidationError: return [] - dates = [ - (publication.metadata.identifier, publication.metadata.modified) - for publication in self._get_publications(parsed_feed) - if publication.metadata.modified - ] - - return dates - - def _parse_feed_links(self, links: list[core_ast.Link]) -> None: + def _parse_feed_links(self, links: CompactCollection[opds2.StrictLink]) -> None: """Parse the global feed links. Currently only parses the token endpoint link""" - for link in links: - if first_or_default(link.rels) == Hyperlink.TOKEN_AUTH: - # Save the collection-wide token authentication endpoint - self.collection.integration_configuration.context_update( - {OPDS2API.TOKEN_AUTH_CONFIG_KEY: link.href} - ) + token_auth_link = links.get(rel=Hyperlink.TOKEN_AUTH) + if token_auth_link is not None: + self.collection.integration_configuration.context_update( + {OPDS2API.TOKEN_AUTH_CONFIG_KEY: token_auth_link.href} + ) + + def _get_allowed_identifier( + self, identifier: str | None, title: str | None + ) -> Identifier | None: + recognized_identifier = self.parse_identifier(identifier) + if not recognized_identifier or not self._is_identifier_allowed( + recognized_identifier + ): + self._record_publication_unrecognizable_identifier(identifier, title) + return None + return recognized_identifier def extract_feed_data( - self, feed: str | opds2_ast.OPDS2Feed, feed_url: str | None = None + self, feed: str | bytes, feed_url: str | None = None ) -> tuple[dict[str, Metadata], dict[str, list[CoverageFailure]]]: """Turn an OPDS 2.0 feed into lists of Metadata and CirculationData objects. :param feed: OPDS 2.0 feed :param feed_url: Feed URL used to resolve relative links """ - parser_result = self._parser.parse_manifest(feed) - feed = parser_result.root + try: + parsed_feed = self._parse_feed(feed) + except ValidationError: + self.log.exception("Error validating feed") + return {}, {} + publication_metadata_dictionary = {} failures: dict[str, list[CoverageFailure]] = {} - if feed.links: - self._parse_feed_links(feed.links) + if parsed_feed.links: + self._parse_feed_links(parsed_feed.links) + + for publication_dict in parsed_feed.publications: + try: + publication = self._get_publication(publication_dict) + except ValidationError as e: + raw_identifier = publication_dict.get("metadata", {}).get("identifier") + raw_title = publication_dict.get("metadata", {}).get("title") + recognized_identifier = self._get_allowed_identifier( + raw_identifier, raw_title + ) + if recognized_identifier: + self._record_coverage_failure( + failures, recognized_identifier, str(e) + ) - for publication in self._get_publications(feed): - recognized_identifier = self._extract_identifier(publication) + continue + recognized_identifier = self._get_allowed_identifier( + publication.metadata.identifier, str(publication.metadata.title) + ) - if not recognized_identifier or not self._is_identifier_allowed( - recognized_identifier - ): - self._record_publication_unrecognizable_identifier(publication) + if not recognized_identifier: continue + feed_self_url = parsed_feed.links.get( + rel=rwpm.LinkRelations.self, raising=True + ).href publication_metadata = self._extract_publication_metadata( - feed, publication, self.data_source_name + publication, self.data_source_name, feed_self_url ) publication_metadata_dictionary[ publication_metadata.primary_identifier.identifier ] = publication_metadata - node_finder = NodeFinder() - - for error in parser_result.errors: - publication = node_finder.find_parent_or_self( - parser_result.root, error.node, opds2_ast.OPDS2Publication - ) - - if publication: - recognized_identifier = self._extract_identifier(publication) - - if not recognized_identifier or not self._is_identifier_allowed( - recognized_identifier - ): - self._record_publication_unrecognizable_identifier(publication) - else: - self._record_coverage_failure( - failures, recognized_identifier, error.error_message - ) - else: - self.log.warning(f"{error.error_message}") - return publication_metadata_dictionary, failures class OPDS2ImportMonitor(OPDSImportMonitor): PROTOCOL = OPDS2API.label() - MEDIA_TYPE = OPDS2MediaTypesRegistry.OPDS_FEED.key, "application/json" + MEDIA_TYPE = opds2.PublicationFeed.content_type(), "application/json" def _verify_media_type(self, url: str, resp: Response) -> None: # Make sure we got an OPDS feed, and not an error page that was @@ -1164,5 +1029,5 @@ def _verify_media_type(self, url: str, resp: Response) -> None: def _get_accept_header(self) -> str: return "{}, {};q=0.9, */*;q=0.1".format( - OPDS2MediaTypesRegistry.OPDS_FEED.key, "application/json" + opds2.PublicationFeed.content_type(), "application/json" ) diff --git a/src/palace/manager/core/opds_schema.py b/src/palace/manager/core/opds_schema.py index 372570988d..a1d6abd8fd 100644 --- a/src/palace/manager/core/opds_schema.py +++ b/src/palace/manager/core/opds_schema.py @@ -1,180 +1,63 @@ -import json -import re -from collections.abc import Generator -from importlib.abc import Traversable +from collections.abc import Callable from typing import Any -from urllib.parse import urlparse -from jsonschema import Draft7Validator -from jsonschema import _keywords as kw -from jsonschema import validators -from jsonschema.exceptions import ValidationError -from jsonschema.protocols import Validator -from referencing import Registry -from referencing.retrieval import to_cached_resource +from pydantic import ValidationError +from requests import Response from palace.manager.api.odl.importer import OPDS2WithODLImportMonitor +from palace.manager.core.coverage import CoverageFailure from palace.manager.core.opds2_import import OPDS2ImportMonitor +from palace.manager.opds.odl.odl import Feed +from palace.manager.opds.opds2 import BasePublicationFeed, PublicationFeed +from palace.manager.sqlalchemy.model.edition import Edition from palace.manager.util.log import LoggerMixin -from palace.manager.util.resources import resources_dir - - -def opds2_schema_resources() -> Traversable: - return resources_dir("opds2_schema") - - -@to_cached_resource(loads=json.loads) -def opds2_cached_retrieve(uri: str) -> str: - """ - Fetch files from the resources directory or from local cache. - - If the uri is a file:// uri, fetch the file from the resources directory. Otherwise, - fetch the file from the local cache in the 'cached' directory. - - To refresh the cache, delete the 'cached' directory uncomment the code below and re-run - the tests. This will force the function to download any necessary files into the cache. - """ - parsed = urlparse(uri) - resources = opds2_schema_resources() - if parsed.scheme == "file": - filename = f"{parsed.netloc}{parsed.path}" - package_file = resources / filename - else: - netloc_dir = parsed.netloc - filename = parsed.path.removeprefix("/").replace("/", "_") - package_file = resources / "cached" / netloc_dir / filename - # if not package_file.is_file(): - # cached_dir = resources / "cached" / netloc_dir - # cached_dir.mkdir(parents=True, exist_ok=True) - # (cached_dir / filename).write_text(requests.get(uri).text) - - return package_file.read_text() - - -def opds2_regex_replace(pattern: str) -> str: - """ - Replace named groups in a regex pattern. - - The OPDS2 schema uses a regex pattern using named groups, which is a valid PCRE pattern, - but not valid in Python's re module. This function converts the named groups to use the - Python specific ?P syntax. - """ - return re.sub(r"\?<(.+?)>", r"?P<\1>", pattern) - - -def opds2_pattern_validator( - validator: Validator, patrn: str, instance: Any, schema: dict[str, Any] -) -> Generator[ValidationError, None, None]: - """ - Validation function to validate a patten element. - """ - patrn = opds2_regex_replace(patrn) - yield from kw.pattern(validator, patrn, instance, schema) - - -def opds2_pattern_properties_validator( - validator: Validator, - patrnProps: dict[str, Any], - instance: dict[str, Any], - schema: dict[str, Any], -) -> Generator[ValidationError, None, None]: - """ - Validation function to validate a pattenProperties element. - """ - filtered_patterns = { - opds2_regex_replace(pattern): subschema - for pattern, subschema in patrnProps.items() - } - yield from kw.patternProperties(validator, filtered_patterns, instance, schema) - - -def opds2_additional_properties_validator( - validator: Validator, - aP: dict[str, Any], - instance: dict[str, Any], - schema: dict[str, Any], -) -> Generator[ValidationError, None, None]: - """ - Validation function to validate a pattenProperties element. - """ - if "patternProperties" in schema: - schema = schema.copy() - schema["patternProperties"] = { - opds2_regex_replace(patrn): subschema - for patrn, subschema in schema["patternProperties"].items() - } - yield from kw.additionalProperties(validator, aP, instance, schema) - - -def opds2_schema_registry() -> Registry: - """ - Create a Registry that loads schemas with the opds2_cached_retrieve function. - """ - # See https://github.com/python-jsonschema/referencing/issues/61 for details on - # why we needed the type ignore here. - return Registry(retrieve=opds2_cached_retrieve) # type: ignore[call-arg] - - -def opds2_schema_validator(schema: dict[str, Any]) -> Validator: - """ - This returns a jsonschema Draft7Validator modified to use the opds2_pattern_validator - function for the pattern keyword. - """ - - registry = opds2_schema_registry() - validator_cls = validators.extend( - Draft7Validator, - version="draft7", - validators={ - "pattern": opds2_pattern_validator, - "patternProperties": opds2_pattern_properties_validator, - "additionalProperties": opds2_additional_properties_validator, - }, - ) - return validator_cls(schema, registry=registry) class OPDS2SchemaValidationMixin(LoggerMixin): - def validate_schema(self, schema_url: str, feed: dict[str, Any]) -> None: - schema = {"$ref": schema_url} - schema_validator = opds2_schema_validator(schema) + @classmethod + def validate_schema( + cls, feed_cls: type[BasePublicationFeed[Any]], feed: bytes | str + ) -> None: try: - schema_validator.validate(feed) + feed_cls.model_validate_json(feed) except ValidationError as e: - self.log.error("Validation failed for feed") - for attr in ["message", "path", "schema_path", "validator_value"]: - self.log.error(f"{attr}: {getattr(e, attr, None)}") + print(str(e)) raise class OPDS2SchemaValidation(OPDS2ImportMonitor, OPDS2SchemaValidationMixin): - def import_one_feed(self, feed): - if type(feed) in (str, bytes): - feed = json.loads(feed) - self.validate_schema("https://drafts.opds.io/schema/feed.schema.json", feed) - return [], [] - - def follow_one_link(self, url, do_get=None): + def import_one_feed( + self, feed: bytes | str + ) -> tuple[list[Edition], dict[str, list[CoverageFailure]]]: + self.validate_schema(PublicationFeed, feed) + return [], {} + + def follow_one_link( + self, url: str, do_get: Callable[..., Response] | None = None + ) -> tuple[list[str], bytes | None]: """We don't need all pages, the first page should be fine for validation""" next_links, feed = super().follow_one_link(url, do_get) return [], feed - def feed_contains_new_data(self, feed): + def feed_contains_new_data(self, feed: bytes | str) -> bool: return True class OPDS2WithODLSchemaValidation( OPDS2WithODLImportMonitor, OPDS2SchemaValidationMixin ): - def import_one_feed(self, feed): - feed = json.loads(feed) - self.validate_schema("file://odl-feed.schema.json", feed) - return [], [] - - def follow_one_link(self, url, do_get=None): + def import_one_feed( + self, feed: bytes | str + ) -> tuple[list[Edition], dict[str, list[CoverageFailure]]]: + self.validate_schema(Feed, feed) + return [], {} + + def follow_one_link( + self, url: str, do_get: Callable[..., Response] | None = None + ) -> tuple[list[str], bytes | None]: """We don't need all pages, the first page should be fine for validation""" next_links, feed = super().follow_one_link(url, do_get) return [], feed - def feed_contains_new_data(self, feed): + def feed_contains_new_data(self, feed: bytes | str) -> bool: return True diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_acquisition-object.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_acquisition-object.schema.json deleted file mode 100644 index 1a05a4efce..0000000000 --- a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_acquisition-object.schema.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://drafts.opds.io/schema/acquisition-object.schema.json", - "title": "OPDS Acquisition Object", - "type": "object", - "properties": { - "type": { - "type": "string" - }, - "child": { - "type": "array", - "items": { - "$ref": "acquisition-object.schema.json" - } - } - }, - "required": [ - "type" - ] -} diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json deleted file mode 100644 index 583d27bedc..0000000000 --- a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://drafts.opds.io/schema/feed-metadata.schema.json", - "title": "OPDS Metadata", - "type": "object", - "properties": { - "identifier": { - "type": "string", - "format": "uri" - }, - "@type": { - "type": "string", - "format": "uri" - }, - "title": { - "type": [ - "string", - "array", - "object" - ] - }, - "subtitle": { - "type": [ - "string", - "array", - "object" - ] - }, - "modified": { - "type": "string", - "format": "date-time" - }, - "description": { - "type": "string" - }, - "itemsPerPage": { - "type": "integer", - "exclusiveMinimum": 0 - }, - "currentPage": { - "type": "integer", - "exclusiveMinimum": 0 - }, - "numberOfItems": { - "type": "integer", - "minimum": 0 - } - }, - "required": [ - "title" - ] -} diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json deleted file mode 100644 index 823b442d74..0000000000 --- a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json +++ /dev/null @@ -1,155 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://drafts.opds.io/schema/feed.schema.json", - "title": "OPDS Feed", - "type": "object", - "properties": { - "metadata": { - "description": "Contains feed-level metadata such as title or number of items", - "$ref": "feed-metadata.schema.json" - }, - "links": { - "description": "Feed-level links such as search or pagination", - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "uniqueItems": true, - "minItems": 1, - "contains": { - "properties": { - "rel": { - "anyOf": [ - { - "type": "string", - "const": "self" - }, - { - "type": "array", - "contains": { - "const": "self" - } - } - ] - } - }, - "required": [ - "rel" - ] - } - }, - "publications": { - "description": "A list of publications that can be acquired", - "type": "array", - "items": { - "$ref": "publication.schema.json" - }, - "uniqueItems": true, - "minItems": 1 - }, - "navigation": { - "description": "Navigation for the catalog using links", - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "uniqueItems": true, - "minItems": 1, - "allOf": [ - { - "description": "Each Link Object in a navigation collection must contain a title", - "items": { - "required": [ - "title" - ] - } - } - ] - }, - "facets": { - "description": "Facets are meant to re-order or obtain a subset for the current list of publications", - "type": "array", - "items": { - "type": "object", - "properties": { - "metadata": { - "$ref": "feed-metadata.schema.json" - }, - "links": { - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "uniqueItems": true, - "minItems": 1 - } - } - }, - "uniqueItems": true, - "minItems": 1 - }, - "groups": { - "description": "Groups provide a curated experience, grouping publications or navigation links together", - "type": "array", - "items": { - "type": "object", - "properties": { - "metadata": { - "$ref": "feed-metadata.schema.json" - }, - "links": { - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "uniqueItems": true, - "minItems": 1 - }, - "publications": { - "type": "array", - "items": { - "$ref": "publication.schema.json" - }, - "uniqueItems": true, - "minItems": 1 - }, - "navigation": { - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "uniqueItems": true, - "minItems": 1 - } - }, - "required": [ - "metadata" - ] - } - } - }, - "required": [ - "metadata", - "links" - ], - "additionalProperties": { - "$ref": "https://readium.org/webpub-manifest/schema/subcollection.schema.json" - }, - "anyOf": [ - { - "required": [ - "publications" - ] - }, - { - "required": [ - "navigation" - ] - }, - { - "required": [ - "groups" - ] - } - ] -} diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_properties.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_properties.schema.json deleted file mode 100644 index a3fc15a124..0000000000 --- a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_properties.schema.json +++ /dev/null @@ -1,123 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://drafts.opds.io/schema/properties.schema.json", - "title": "OPDS Link Properties", - "type": "object", - "properties": { - "numberOfItems": { - "description": "Provide a hint about the expected number of items returned", - "type": "integer", - "minimum": 0 - }, - "price": { - "description": "The price of a publication is tied to its acquisition link", - "type": "object", - "properties": { - "value": { - "type": "number", - "minimum": 0 - }, - "currency": { - "type": "string", - "enum": [ - "AED", "AFN", "ALL", "AMD", "ANG", "AOA", "ARS", "AUD", "AWG", "AZN", "BAM", "BBD", "BDT", - "BGN", "BHD", "BIF", "BMD", "BND", "BOB", "BOV", "BRL", "BSD", "BTN", "BWP", "BYN", "BZD", - "CAD", "CDF", "CHE", "CHF", "CHW", "CLF", "CLP", "CNY", "COP", "COU", "CRC", "CUC", "CUP", - "CVE", "CZK", "DJF", "DKK", "DOP", "DZD", "EGP", "ERN", "ETB", "EUR", "FJD", "FKP", "GBP", - "GEL", "GHS", "GIP", "GMD", "GNF", "GTQ", "GYD", "HKD", "HNL", "HRK", "HTG", "HUF", "IDR", - "ILS", "INR", "IQD", "IRR", "ISK", "JMD", "JOD", "JPY", "KES", "KGS", "KHR", "KMF", "KPW", - "KRW", "KWD", "KYD", "KZT", "LAK", "LBP", "LKR", "LRD", "LSL", "LYD", "MAD", "MDL", "MGA", - "MKD", "MMK", "MNT", "MOP", "MRU", "MUR", "MVR", "MWK", "MXN", "MXV", "MYR", "MZN", "NAD", - "NGN", "NIO", "NOK", "NPR", "NZD", "OMR", "PAB", "PEN", "PGK", "PHP", "PKR", "PLN", "PYG", - "QAR", "RON", "RSD", "RUB", "RWF", "SAR", "SBD", "SCR", "SDG", "SEK", "SGD", "SHP", "SLL", - "SOS", "SRD", "SSP", "STN", "SVC", "SYP", "SZL", "THB", "TJS", "TMT", "TND", "TOP", "TRY", - "TTD", "TWD", "TZS", "UAH", "UGX", "USD", "USN", "UYI", "UYU", "UZS", "VEF", "VES", "VND", - "VUV", "WST", "XAF", "XAG", "XAU", "XBA", "XBB", "XBC", "XBD", "XCD", "XDR", "XOF", "XPD", - "XPF", "XPT", "XSU", "XTS", "XUA", "XXX", "YER", "ZAR", "ZMW", "ZWL" - ] - } - }, - "required": [ - "currency", - "value" - ] - }, - "indirectAcquisition": { - "description": "Indirect acquisition provides a hint for the expected media type that will be acquired after additional steps", - "type": "array", - "items": { - "$ref": "acquisition-object.schema.json" - } - }, - "holds": { - "description": "Library-specific feature for unavailable books that support a hold list", - "type": "object", - "properties": { - "total": { - "type": "integer", - "minimum": 0 - }, - "position": { - "type": "integer", - "minimum": 0 - } - } - }, - "copies": { - "description": "Library-specific feature that contains information about the copies that a library has acquired", - "type": "object", - "properties": { - "total": { - "type": "integer", - "minimum": 0 - }, - "available": { - "type": "integer", - "minimum": 0 - } - } - }, - "availability": { - "description": "Indicates the availability of a given resource", - "type": "object", - "properties": { - "state": { - "type": "string", - "enum": [ - "available", - "unavailable", - "reserved", - "ready" - ] - }, - "since": { - "description": "Timestamp for the previous state change", - "type": "string", - "anyOf": [ - { - "format": "date" - }, - { - "format": "date-time" - } - ] - }, - "until": { - "description": "Timestamp for the next state change", - "type": "string", - "anyOf": [ - { - "format": "date" - }, - { - "format": "date-time" - } - ] - } - }, - "required": [ - "state" - ] - } - } -} diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json deleted file mode 100644 index e3f50bc879..0000000000 --- a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://drafts.opds.io/schema/publication.schema.json", - "title": "OPDS Publication", - "type": "object", - "properties": { - "metadata": { - "$ref": "https://readium.org/webpub-manifest/schema/metadata.schema.json" - }, - "links": { - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "contains": { - "description": "A publication must contain at least one acquisition link.", - "properties": { - "rel": { - "anyOf": [ - { - "type": "string", - "enum": [ - "preview", - "http://opds-spec.org/acquisition", - "http://opds-spec.org/acquisition/buy", - "http://opds-spec.org/acquisition/open-access", - "http://opds-spec.org/acquisition/borrow", - "http://opds-spec.org/acquisition/sample", - "http://opds-spec.org/acquisition/subscribe" - ] - }, - { - "type": "array", - "contains": { - "type": "string", - "enum": [ - "preview", - "http://opds-spec.org/acquisition", - "http://opds-spec.org/acquisition/buy", - "http://opds-spec.org/acquisition/open-access", - "http://opds-spec.org/acquisition/borrow", - "http://opds-spec.org/acquisition/sample", - "http://opds-spec.org/acquisition/subscribe" - ] - } - } - ] - } - } - } - }, - "images": { - "description": "Images are meant to be displayed to the user when browsing publications", - "type": "array", - "items": { - "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" - }, - "minItems": 1, - "allOf": [ - { - "description": "At least one image resource must use one of the following formats: image/jpeg, image/avif, image/png or image/gif.", - "contains": { - "properties": { - "type": { - "enum": [ - "image/jpeg", - "image/avif", - "image/png", - "image/gif" - ] - } - } - } - } - ] - } - }, - "required": [ - "metadata", - "links" - ] -} diff --git a/src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json b/src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json deleted file mode 100644 index 7071d7bdfa..0000000000 --- a/src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://readium.org/webpub-manifest/schema/contributor-object.schema.json", - "title": "Contributor Object", - "type": "object", - "properties": { - "name": { - "anyOf": [ - { - "type": "string" - }, - { - "description": "The language in a language map must be a valid BCP 47 tag.", - "type": "object", - "patternProperties": { - "^((?(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang))|((?([A-Za-z]{2,3}(-(?[A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-(?