Skip to content

Commit

Permalink
One more update to handle another case
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathangreen committed Jun 28, 2024
1 parent f234f88 commit adb1809
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 7 deletions.
99 changes: 92 additions & 7 deletions src/palace/manager/core/opds_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from urllib.parse import urlparse

from jsonschema import Draft7Validator, validators
from jsonschema._utils import extras_msg
from jsonschema.exceptions import ValidationError
from jsonschema.protocols import Validator
from referencing import Registry
Expand Down Expand Up @@ -49,26 +50,108 @@ def opds2_cached_retrieve(uri: str) -> str:
return package_file.read_text()


def opds2_regex_replace(pattern: str) -> str:
"""
Replace named groups in a regex pattern.
The OPDS2 schema uses a regex pattern using named groups, which is a valid PCRE pattern,
but not valid in Python's re module. This function converts the named groups to use the
Python specific ?P<name> syntax.
"""
return re.sub(r"\?<(.+?)>", r"?P<\1>", pattern)


def opds2_pattern_validator(
validator: Validator, patrn: str, instance: Any, schema: dict[str, Any]
) -> Generator[ValidationError, None, None]:
"""
Validation function to validate a patten element.
The bulk of this function is copied from the jsonschema library. It was copied from
jsonschema._keywords.patten. They put their validation functions in a private module,
jsonschema._keywords.pattern. They put their validation functions in a private module,
and the docs mention not to extending them. So we copied the function here.
The only change is the first line which does a regex replacement on the pattern from the
schema. We do this because the OPDS2 schema uses a regex pattern using named groups, which
is a valid PCRE pattern, but not valid in Python's re module. So we convert the named groups
to use the Python specific ?P<name> syntax.
"""
patrn = re.sub(r"\?<(.+?)>", r"?P<\1>", patrn)
patrn = opds2_regex_replace(patrn)
if validator.is_type(instance, "string") and not re.search(patrn, instance):
yield ValidationError(f"{instance!r} does not match {patrn!r}")


def opds2_pattern_properties_validator(
validator: Validator,
patternProperties: dict[str, Any],
instance: dict[str, Any],
schema: dict[str, Any],
) -> Generator[ValidationError, None, None]:
"""
Validation function to validate a pattenProperties element.
The bulk of this function is copied from the jsonschema library. It was copied from
jsonschema._keywords.patternProperties. They put their validation functions in a private module,
and the docs mention not to extending them. So we copied the function here.
"""
if not validator.is_type(instance, "object"):
return

for pattern, subschema in patternProperties.items():
pattern = opds2_regex_replace(pattern)
for k, v in instance.items():
if re.search(pattern, k):
yield from validator.descend(
v,
subschema,
path=k,
schema_path=pattern,
)


def opds2_additional_properties_validator(
validator: Validator,
aP: dict[str, Any],
instance: dict[str, Any],
schema: dict[str, Any],
) -> Generator[ValidationError, None, None]:
"""
Validation function to validate a pattenProperties element.
The bulk of this function is copied from the jsonschema library. It was copied from
jsonschema._keywords.additionalProperties. They put their validation functions in a private module,
and the docs mention not to extending them. So we copied the function here.
"""

def additional_properties(
instance: dict[str, Any], schema: dict[str, Any]
) -> Generator[str, None, None]:
properties = schema.get("properties", {})
patterns = "|".join(schema.get("patternProperties", {}))
patterns = opds2_regex_replace(patterns)
for property in instance:
if property not in properties:
if patterns and re.search(patterns, property):
continue
yield property

if not validator.is_type(instance, "object"):
return

extras = set(additional_properties(instance, schema))

if validator.is_type(aP, "object"):
for extra in extras:
yield from validator.descend(instance[extra], aP, path=extra)
elif not aP and extras:
if "patternProperties" in schema:
verb = "does" if len(extras) == 1 else "do"
joined = ", ".join(repr(each) for each in sorted(extras))
patterns = ", ".join(
repr(each) for each in sorted(schema["patternProperties"])
)
error = f"{joined} {verb} not match any of the regexes: {patterns}"
yield ValidationError(error)
else:
error = "Additional properties are not allowed (%s %s unexpected)"
yield ValidationError(error % extras_msg(sorted(extras, key=str)))


def opds2_schema_registry() -> Registry:
"""
Create a Registry that loads schemas with the opds2_cached_retrieve function.
Expand All @@ -90,6 +173,8 @@ def opds2_schema_validator(schema: dict[str, Any]) -> Validator:
version="draft7",
validators={
"pattern": opds2_pattern_validator,
"patternProperties": opds2_pattern_properties_validator,
"additionalProperties": opds2_additional_properties_validator,
},
)
return validator_cls(schema, registry=registry)
Expand Down
93 changes: 93 additions & 0 deletions tests/files/opds2/bad_feed2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{
"metadata": {
"title": "Example listing publications"
},
"links": [
{
"rel": "self",
"href": "http://example.com/new",
"type": "application/opds+json"
}
],
"publications": [
{
"metadata": {
"@type": "http://schema.org/Book",
"title": {
"fr": "Vingt mille lieues sous les mers",
"en": "Twenty Thousand Leagues Under the Sea",
"ja": "海底二万里"
},
"description": "Adventures of Huckleberry Finn is a novel by Mark Twain, first published in the United Kingdom in December 1884 and in the United States in February 1885.",
"http://palaceproject.io/terms/timeTracking": true,
"author": [
{
"name": "Mark Twain",
"identifier": "http://example.org/mark-twain"
},
{
"name": "Samuel Langhorne Clemens"
}
],
"identifier": "http://example.org/huckleberry-finn",
"language": "this is a bad language code",
"publisher": {
"name": "Test Publisher"
},
"published": "2014-09-28T00:00:00Z",
"modified": "2015-09-29T17:00:00Z",
"subject": [
{
"scheme": "http://schema.org/audience",
"code": "juvenile-fiction",
"name": "Juvenile Fiction",
"links": []
}
]
},
"links": [
{
"type": "application/opds-publication+json",
"rel": "http://opds-spec.org/acquisition/borrow",
"href": "http://example.org/huckleberry-finn",
"properties": {
"availability": {
"state": "available"
},
"indirectAcquisition": [
{
"type": "application/vnd.adobe.adept+xml",
"child": [
{
"type": "application/epub+zip"
}
]
},
{
"type": "application/vnd.readium.lcp.license.v1.0+json",
"child": [
{
"type": "application/epub+zip"
}
]
}
]
}
},
{
"rel": "http://opds-spec.org/acquisition/sample",
"type": "application/epub+zip",
"href": "https://example.com/medias/e5/318061475b11cf8c8e3752da2a1cf68384d8bf.epub"
}
],
"images": [
{
"href": "http://example.org/cover.jpg",
"type": "image/jpeg",
"height": 1400,
"width": 800
}
]
}
]
}
1 change: 1 addition & 0 deletions tests/manager/core/test_opds_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class TestOPDS2Validation:
("feed.json", False),
("feed2.json", False),
("bad_feed.json", True),
("bad_feed2.json", True),
],
)
def test_opds2_schema(
Expand Down

0 comments on commit adb1809

Please sign in to comment.