From 98a22abe8313ecf626dd28a87b40cca945410c50 Mon Sep 17 00:00:00 2001 From: Jonathan Green Date: Thu, 27 Jun 2024 15:51:35 -0300 Subject: [PATCH] Add caching --- src/palace/manager/core/opds_schema.py | 30 +++- .../schema_feed-metadata.schema.json | 52 ++++++ .../drafts.opds.io/schema_feed.schema.json | 155 +++++++++++++++++ .../schema_publication.schema.json | 82 +++++++++ ...fest_schema_contributor-object.schema.json | 54 ++++++ ...ub-manifest_schema_contributor.schema.json | 26 +++ ...rimental_presentation_metadata.schema.json | 36 ++++ ...chema_extensions_epub_metadata.schema.json | 21 +++ ...b-manifest_schema_language-map.schema.json | 20 +++ .../webpub-manifest_schema_link.schema.json | 130 ++++++++++++++ ...ebpub-manifest_schema_metadata.schema.json | 158 ++++++++++++++++++ ...manifest_schema_subject-object.schema.json | 45 +++++ ...webpub-manifest_schema_subject.schema.json | 26 +++ 13 files changed, 826 insertions(+), 9 deletions(-) create mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_experimental_presentation_metadata.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_extensions_epub_metadata.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_language-map.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_link.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_metadata.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_subject-object.schema.json create mode 100644 src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_subject.schema.json diff --git a/src/palace/manager/core/opds_schema.py b/src/palace/manager/core/opds_schema.py index 4d7d0f633..93e20edcc 100644 --- a/src/palace/manager/core/opds_schema.py +++ b/src/palace/manager/core/opds_schema.py @@ -2,7 +2,8 @@ import re from collections.abc import Generator from importlib.abc import Traversable -from typing import Any +from pathlib import Path +from typing import Any, cast from urllib.parse import urlparse import requests @@ -25,19 +26,30 @@ def opds2_schema_resources() -> Traversable: @to_cached_resource(loads=json.loads) def opds2_cached_retrieve(uri: str) -> str: """ - Fetch file from local filesystem if it has a file:// url, else fetch remotely. + Fetch files from the resources directory or cache them. - We use the to_cached_resource decorator, which caches the results of this function, - so each uri should only get fetched once. + If the uri is a file:// uri, fetch the file from the resources directory. Otherwise, + fetch the file from the local cache in the 'cached' directory falling back to downloading + the file if it is not found and adding it to the cache. + + To refresh the cache, delete the 'cached' directory. This will force the function to + re-download the files. """ parsed = urlparse(uri) + resources = opds2_schema_resources() if parsed.scheme == "file": - filename = "/".join([parsed.netloc, parsed.path]) - package_file = opds2_schema_resources() / filename - with package_file.open("r") as fp: - return fp.read() + filename = f"{parsed.netloc}{parsed.path}" + package_file = resources / filename else: - return requests.get(uri).text + netloc_dir = parsed.netloc + filename = parsed.path.removeprefix("/").replace("/", "_") + package_file = resources / "cached" / netloc_dir / filename + if not package_file.is_file(): + cached_dir = cast(Path, resources / "cached" / netloc_dir) + cached_dir.mkdir(parents=True, exist_ok=True) + (cached_dir / filename).write_text(requests.get(uri).text) + + return package_file.read_text() def opds2_pattern_validator( diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json new file mode 100644 index 000000000..583d27bed --- /dev/null +++ b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed-metadata.schema.json @@ -0,0 +1,52 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://drafts.opds.io/schema/feed-metadata.schema.json", + "title": "OPDS Metadata", + "type": "object", + "properties": { + "identifier": { + "type": "string", + "format": "uri" + }, + "@type": { + "type": "string", + "format": "uri" + }, + "title": { + "type": [ + "string", + "array", + "object" + ] + }, + "subtitle": { + "type": [ + "string", + "array", + "object" + ] + }, + "modified": { + "type": "string", + "format": "date-time" + }, + "description": { + "type": "string" + }, + "itemsPerPage": { + "type": "integer", + "exclusiveMinimum": 0 + }, + "currentPage": { + "type": "integer", + "exclusiveMinimum": 0 + }, + "numberOfItems": { + "type": "integer", + "minimum": 0 + } + }, + "required": [ + "title" + ] +} diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json new file mode 100644 index 000000000..823b442d7 --- /dev/null +++ b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_feed.schema.json @@ -0,0 +1,155 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://drafts.opds.io/schema/feed.schema.json", + "title": "OPDS Feed", + "type": "object", + "properties": { + "metadata": { + "description": "Contains feed-level metadata such as title or number of items", + "$ref": "feed-metadata.schema.json" + }, + "links": { + "description": "Feed-level links such as search or pagination", + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "uniqueItems": true, + "minItems": 1, + "contains": { + "properties": { + "rel": { + "anyOf": [ + { + "type": "string", + "const": "self" + }, + { + "type": "array", + "contains": { + "const": "self" + } + } + ] + } + }, + "required": [ + "rel" + ] + } + }, + "publications": { + "description": "A list of publications that can be acquired", + "type": "array", + "items": { + "$ref": "publication.schema.json" + }, + "uniqueItems": true, + "minItems": 1 + }, + "navigation": { + "description": "Navigation for the catalog using links", + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "uniqueItems": true, + "minItems": 1, + "allOf": [ + { + "description": "Each Link Object in a navigation collection must contain a title", + "items": { + "required": [ + "title" + ] + } + } + ] + }, + "facets": { + "description": "Facets are meant to re-order or obtain a subset for the current list of publications", + "type": "array", + "items": { + "type": "object", + "properties": { + "metadata": { + "$ref": "feed-metadata.schema.json" + }, + "links": { + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "uniqueItems": true, + "minItems": 1 + } + } + }, + "uniqueItems": true, + "minItems": 1 + }, + "groups": { + "description": "Groups provide a curated experience, grouping publications or navigation links together", + "type": "array", + "items": { + "type": "object", + "properties": { + "metadata": { + "$ref": "feed-metadata.schema.json" + }, + "links": { + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "uniqueItems": true, + "minItems": 1 + }, + "publications": { + "type": "array", + "items": { + "$ref": "publication.schema.json" + }, + "uniqueItems": true, + "minItems": 1 + }, + "navigation": { + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "uniqueItems": true, + "minItems": 1 + } + }, + "required": [ + "metadata" + ] + } + } + }, + "required": [ + "metadata", + "links" + ], + "additionalProperties": { + "$ref": "https://readium.org/webpub-manifest/schema/subcollection.schema.json" + }, + "anyOf": [ + { + "required": [ + "publications" + ] + }, + { + "required": [ + "navigation" + ] + }, + { + "required": [ + "groups" + ] + } + ] +} diff --git a/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json new file mode 100644 index 000000000..e3f50bc87 --- /dev/null +++ b/src/palace/manager/resources/opds2_schema/cached/drafts.opds.io/schema_publication.schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://drafts.opds.io/schema/publication.schema.json", + "title": "OPDS Publication", + "type": "object", + "properties": { + "metadata": { + "$ref": "https://readium.org/webpub-manifest/schema/metadata.schema.json" + }, + "links": { + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "contains": { + "description": "A publication must contain at least one acquisition link.", + "properties": { + "rel": { + "anyOf": [ + { + "type": "string", + "enum": [ + "preview", + "http://opds-spec.org/acquisition", + "http://opds-spec.org/acquisition/buy", + "http://opds-spec.org/acquisition/open-access", + "http://opds-spec.org/acquisition/borrow", + "http://opds-spec.org/acquisition/sample", + "http://opds-spec.org/acquisition/subscribe" + ] + }, + { + "type": "array", + "contains": { + "type": "string", + "enum": [ + "preview", + "http://opds-spec.org/acquisition", + "http://opds-spec.org/acquisition/buy", + "http://opds-spec.org/acquisition/open-access", + "http://opds-spec.org/acquisition/borrow", + "http://opds-spec.org/acquisition/sample", + "http://opds-spec.org/acquisition/subscribe" + ] + } + } + ] + } + } + } + }, + "images": { + "description": "Images are meant to be displayed to the user when browsing publications", + "type": "array", + "items": { + "$ref": "https://readium.org/webpub-manifest/schema/link.schema.json" + }, + "minItems": 1, + "allOf": [ + { + "description": "At least one image resource must use one of the following formats: image/jpeg, image/avif, image/png or image/gif.", + "contains": { + "properties": { + "type": { + "enum": [ + "image/jpeg", + "image/avif", + "image/png", + "image/gif" + ] + } + } + } + } + ] + } + }, + "required": [ + "metadata", + "links" + ] +} diff --git a/src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json b/src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json new file mode 100644 index 000000000..7071d7bdf --- /dev/null +++ b/src/palace/manager/resources/opds2_schema/cached/readium.org/webpub-manifest_schema_contributor-object.schema.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://readium.org/webpub-manifest/schema/contributor-object.schema.json", + "title": "Contributor Object", + "type": "object", + "properties": { + "name": { + "anyOf": [ + { + "type": "string" + }, + { + "description": "The language in a language map must be a valid BCP 47 tag.", + "type": "object", + "patternProperties": { + "^((?(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang))|((?([A-Za-z]{2,3}(-(?[A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-(?