Skip to content

Commit

Permalink
Support of JSON References
Browse files Browse the repository at this point in the history
  • Loading branch information
libretto committed Nov 13, 2024
1 parent 1ea3ad5 commit 3c6e672
Show file tree
Hide file tree
Showing 6 changed files with 1,408 additions and 6 deletions.
48 changes: 44 additions & 4 deletions src/karapace/schema_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from avro.schema import parse as avro_parse, Schema as AvroSchema
from collections.abc import Collection, Mapping, Sequence
from dataclasses import dataclass
from jsonschema import Draft7Validator
from jsonschema import Draft7Validator, RefResolver
from jsonschema.exceptions import SchemaError
from karapace.dependency import Dependency
from karapace.errors import InvalidSchema, InvalidVersion, VersionNotFoundException
Expand Down Expand Up @@ -47,8 +47,12 @@ def parse_avro_schema_definition(s: str, validate_enum_symbols: bool = True, val
return avro_parse(json_encode(json_data), validate_enum_symbols=validate_enum_symbols, validate_names=validate_names)


def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator:
"""Parses and validates `schema_definition`.
class InvalidValidatorRegistry(Exception):
pass


def parse_jsonschema_definition(schema_definition: str, resolver: RefResolver | None = None) -> Draft7Validator:
"""Parses and validates `schema_definition` with its `dependencies`.
Raises:
SchemaError: If `schema_definition` is not a valid Draft7 schema.
Expand All @@ -57,6 +61,8 @@ def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator:
# TODO: Annotations dictate Mapping[str, Any] here, but we have unit tests that
# use bool values and fail if we assert isinstance(_, dict).
Draft7Validator.check_schema(schema) # type: ignore[arg-type]
if resolver:
return Draft7Validator(schema, resolver=resolver) # type: ignore[arg-type]
return Draft7Validator(schema) # type: ignore[arg-type]


Expand Down Expand Up @@ -195,6 +201,40 @@ def schema(self) -> Draft7Validator | AvroSchema | ProtobufSchema:
return parsed_typed_schema.schema


def json_resolver(schema_str: str, dependencies: Mapping[str, Dependency] | None = None) -> RefResolver | None:
# RefResolver is deprecated but it still used in karapace code
# see normalize_schema_rec() function in src/karapace/compatibility/jsonschema/utils.py
# In case when karapace JSON support will be updated we must rewrite this code to use
# referencing.Registry instead of RefResolver
schema_store: dict = {}
stack: list[tuple[str, Mapping[str, Dependency] | None]] = [(schema_str, dependencies)]
if dependencies is None:
return None
while stack:
current_schema_str, current_dependencies = stack.pop()
if current_dependencies:
stack.append((current_schema_str, None))
for dependency in current_dependencies.values():
stack.append((dependency.schema.schema_str, dependency.schema.dependencies))
else:
schema_json = json_decode(current_schema_str)
if isinstance(schema_json, dict):
schema_store[schema_json["$id"]] = schema_json
else:
# In the case of schemas with references, we only support schemas with a canonical structure,
# which must include a $id in the reference.
raise InvalidSchema
main_schema_json = json_decode(schema_str)
if not isinstance(main_schema_json, dict):
# In the case of schemas with references, we only support schemas with a canonical structure, which must
# contain an $id tag within the reference. Simple main schemas of types such as bool, int, str, etc.,
# are not supported.
raise InvalidSchema

resolver = RefResolver.from_schema(main_schema_json, store=schema_store)
return resolver


def parse(
schema_type: SchemaType,
schema_str: str,
Expand All @@ -221,7 +261,7 @@ def parse(

elif schema_type is SchemaType.JSONSCHEMA:
try:
parsed_schema = parse_jsonschema_definition(schema_str)
parsed_schema = parse_jsonschema_definition(schema_str, resolver=json_resolver(schema_str, dependencies))
# TypeError - Raised when the user forgets to encode the schema as a string.
except (TypeError, JSONDecodeError, SchemaError, AssertionError) as e:
raise InvalidSchema from e
Expand Down
14 changes: 13 additions & 1 deletion src/karapace/schema_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,19 @@ def _handle_msg_schema(self, key: dict, value: dict | None) -> None:

parsed_schema: Draft7Validator | AvroSchema | ProtobufSchema | None = None
resolved_dependencies: dict[str, Dependency] | None = None
if schema_type_parsed in [SchemaType.AVRO, SchemaType.JSONSCHEMA]:
if schema_type_parsed == SchemaType.JSONSCHEMA:
try:
if schema_references:
candidate_references = [reference_from_mapping(reference_data) for reference_data in schema_references]
resolved_references, resolved_dependencies = self.resolve_references(candidate_references)
schema_str = json.dumps(json.loads(schema_str), sort_keys=True)
except json.JSONDecodeError as e:
LOG.warning("Schema is not valid JSON")
raise e
except InvalidReferences as e:
LOG.exception("Invalid JSON references")
raise e
elif schema_type_parsed == SchemaType.AVRO:
try:
schema_str = json.dumps(json.loads(schema_str), sort_keys=True)
except json.JSONDecodeError as exc:
Expand Down
2 changes: 1 addition & 1 deletion src/karapace/schema_registry_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,7 @@ def _validate_references(
content_type=content_type,
status=HTTPStatus.BAD_REQUEST,
)
if references and schema_type != SchemaType.PROTOBUF:
if references and schema_type != SchemaType.PROTOBUF and schema_type != SchemaType.JSONSCHEMA:
self.r(
body={
"error_code": SchemaErrorCodes.REFERENCES_SUPPORT_NOT_IMPLEMENTED.value,
Expand Down
Loading

0 comments on commit 3c6e672

Please sign in to comment.