From ba5c9c1a2a03880bd09509bc727ddc2c6202b680 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 30 Mar 2021 11:58:53 +0300 Subject: [PATCH 001/168] protbuf support skeleton --- karapace/compatibility/__init__.py | 18 ++++++++++++++++++ karapace/kafka_rest_apis/__init__.py | 4 +++- karapace/protobuf_compatibility.py | 22 ++++++++++++++++++++++ karapace/schema_reader.py | 13 +++++++++++++ karapace/schema_registry_apis.py | 2 +- karapace/serialization.py | 8 ++++++++ 6 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 karapace/protobuf_compatibility.py diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index b2f3914d4..776f725df 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -11,6 +11,7 @@ SchemaIncompatibilityType ) from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility +from karapace.protobuf_compatibility import check_protobuf_schema_compatibility from karapace.schema_reader import SchemaType, TypedSchema import logging @@ -52,6 +53,11 @@ def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Valida return jsonschema_compatibility(reader, writer) +def check_protobuf_compatibility(reader_schema, writer_schema) -> SchemaCompatibilityResult: + result = check_protobuf_schema_compatibility(reader_schema, writer_schema) + return result + + def check_compatibility( source: TypedSchema, target: TypedSchema, compatibility_mode: CompatibilityModes ) -> SchemaCompatibilityResult: @@ -88,6 +94,18 @@ def check_compatibility( result = check_jsonschema_compatibility(reader=target.schema, writer=source.schema) result = result.merged_with(check_jsonschema_compatibility(reader=source.schema, writer=target.schema)) + elif source.schema_type is SchemaType.PROTOBUF: + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = check_protobuf_compatibility(reader_schema=target.schema, writer_schema=source.schema) + + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = check_protobuf_compatibility(reader_schema=source.schema, writer_schema=target.schema) + + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = check_protobuf_compatibility(reader_schema=target.schema, writer_schema=source.schema) + result = result.merged_with( + check_protobuf_compatibility(reader_schema=source.schema, writer_schema=target.schema) + ) else: result = SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 1dec3d8b8..3a18d0743 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -27,7 +27,9 @@ RECORD_CODES = [42201, 42202] KNOWN_FORMATS = {"json", "avro", "binary"} OFFSET_RESET_STRATEGIES = {"latest", "earliest"} -SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA} +# TODO: PROTOBUF* check schema mapping +SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA, "protobuf": SchemaType.PROTOBUF} + TypedConsumer = namedtuple("TypedConsumer", ["consumer", "serialization_format", "config"]) diff --git a/karapace/protobuf_compatibility.py b/karapace/protobuf_compatibility.py new file mode 100644 index 000000000..3eeb81748 --- /dev/null +++ b/karapace/protobuf_compatibility.py @@ -0,0 +1,22 @@ +# TODO: PROTOBUF* this functionality must be implemented +from karapace.avro_compatibility import SchemaCompatibilityResult + + +def parse_protobuf_schema_definition(schema_definition: str) -> str: + """ Parses and validates `schema_definition`. + + Raises: + Nothing yet. + + """ + + return schema_definition + + +def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: + # TODO: PROTOBUF* for investigation purposes yet + + if writer != reader: + return SchemaCompatibilityResult.compatible() + + return SchemaCompatibilityResult.compatible() diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 224366383..c74551955 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -14,6 +14,7 @@ from kafka.errors import NoBrokersAvailable, NodeNotReadyError, TopicAlreadyExistsError from karapace import constants from karapace.avro_compatibility import parse_avro_schema_definition +from karapace.protobuf_compatibility import parse_protobuf_schema_definition from karapace.statsd import StatsClient from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue @@ -22,6 +23,7 @@ import json import logging +import sys import time log = logging.getLogger(__name__) @@ -71,12 +73,23 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen except SchemaParseException as e: raise InvalidSchema from e + @staticmethod + def parse_protobuf(schema_str: str): + try: + return TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) + # TypeError - Raised when the user forgets to encode the schema as a string. + except: # FIXME: bare except + print("Unexpected error:", sys.exc_info()[0]) + raise InvalidSchema + @staticmethod def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsistent-return-statements if schema_type is SchemaType.AVRO: return TypedSchema.parse_avro(schema_str) if schema_type is SchemaType.JSONSCHEMA: return TypedSchema.parse_json(schema_str) + if schema_type is SchemaType.PROTOBUF: + return TypedSchema.parse_protobuf(schema_str) raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}") def to_json(self): diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 9f85398cd..018a38c82 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -529,7 +529,7 @@ def _validate_schema_request_body(self, content_type, body) -> None: def _validate_schema_type(self, content_type, body) -> None: schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value)) - if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO}: + if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO, SchemaType.PROTOBUF}: self.r( body={ "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, diff --git a/karapace/serialization.py b/karapace/serialization.py index 14cbd6dab..4325dd583 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -135,6 +135,7 @@ def get_subject_name(self, topic_name: str, schema: str, subject_type: str, sche namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") + # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}" async def get_schema_for_subject(self, subject: str) -> TypedSchema: @@ -184,6 +185,10 @@ def read_value(schema: TypedSchema, bio: io.BytesIO): except ValidationError as e: raise InvalidPayload from e return value + if schema.schema_type is SchemaType.PROTOBUF: + # TODO: PROTOBUF* we need use protobuf validator there + value = bio.read() + return value raise ValueError("Unknown schema type") @@ -197,6 +202,9 @@ def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict): except ValidationError as e: raise InvalidPayload from e bio.write(json_encode(value, binary=True)) + elif schema.schema_type is SchemaType.PROTOBUF: + # TODO: PROTOBUF* we need use protobuf validator there + bio.write(value) else: raise ValueError("Unknown schema type") From 6e782ca83770c4fe14ac6d81bf13df9a6f4e01ee Mon Sep 17 00:00:00 2001 From: amrutha_shanbhag Date: Tue, 20 Apr 2021 14:24:46 +1000 Subject: [PATCH 002/168] Revert "protbuf support skeleton" This reverts commit ba5c9c1a2a03880bd09509bc727ddc2c6202b680. --- karapace/compatibility/__init__.py | 18 ------------------ karapace/kafka_rest_apis/__init__.py | 4 +--- karapace/protobuf_compatibility.py | 22 ---------------------- karapace/schema_reader.py | 13 ------------- karapace/schema_registry_apis.py | 2 +- karapace/serialization.py | 8 -------- 6 files changed, 2 insertions(+), 65 deletions(-) delete mode 100644 karapace/protobuf_compatibility.py diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index 776f725df..b2f3914d4 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -11,7 +11,6 @@ SchemaIncompatibilityType ) from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility -from karapace.protobuf_compatibility import check_protobuf_schema_compatibility from karapace.schema_reader import SchemaType, TypedSchema import logging @@ -53,11 +52,6 @@ def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Valida return jsonschema_compatibility(reader, writer) -def check_protobuf_compatibility(reader_schema, writer_schema) -> SchemaCompatibilityResult: - result = check_protobuf_schema_compatibility(reader_schema, writer_schema) - return result - - def check_compatibility( source: TypedSchema, target: TypedSchema, compatibility_mode: CompatibilityModes ) -> SchemaCompatibilityResult: @@ -94,18 +88,6 @@ def check_compatibility( result = check_jsonschema_compatibility(reader=target.schema, writer=source.schema) result = result.merged_with(check_jsonschema_compatibility(reader=source.schema, writer=target.schema)) - elif source.schema_type is SchemaType.PROTOBUF: - if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_protobuf_compatibility(reader_schema=target.schema, writer_schema=source.schema) - - elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_protobuf_compatibility(reader_schema=source.schema, writer_schema=target.schema) - - elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_protobuf_compatibility(reader_schema=target.schema, writer_schema=source.schema) - result = result.merged_with( - check_protobuf_compatibility(reader_schema=source.schema, writer_schema=target.schema) - ) else: result = SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 3a18d0743..1dec3d8b8 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -27,9 +27,7 @@ RECORD_CODES = [42201, 42202] KNOWN_FORMATS = {"json", "avro", "binary"} OFFSET_RESET_STRATEGIES = {"latest", "earliest"} -# TODO: PROTOBUF* check schema mapping -SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA, "protobuf": SchemaType.PROTOBUF} - +SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA} TypedConsumer = namedtuple("TypedConsumer", ["consumer", "serialization_format", "config"]) diff --git a/karapace/protobuf_compatibility.py b/karapace/protobuf_compatibility.py deleted file mode 100644 index 3eeb81748..000000000 --- a/karapace/protobuf_compatibility.py +++ /dev/null @@ -1,22 +0,0 @@ -# TODO: PROTOBUF* this functionality must be implemented -from karapace.avro_compatibility import SchemaCompatibilityResult - - -def parse_protobuf_schema_definition(schema_definition: str) -> str: - """ Parses and validates `schema_definition`. - - Raises: - Nothing yet. - - """ - - return schema_definition - - -def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: - # TODO: PROTOBUF* for investigation purposes yet - - if writer != reader: - return SchemaCompatibilityResult.compatible() - - return SchemaCompatibilityResult.compatible() diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index c74551955..224366383 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -14,7 +14,6 @@ from kafka.errors import NoBrokersAvailable, NodeNotReadyError, TopicAlreadyExistsError from karapace import constants from karapace.avro_compatibility import parse_avro_schema_definition -from karapace.protobuf_compatibility import parse_protobuf_schema_definition from karapace.statsd import StatsClient from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue @@ -23,7 +22,6 @@ import json import logging -import sys import time log = logging.getLogger(__name__) @@ -73,23 +71,12 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen except SchemaParseException as e: raise InvalidSchema from e - @staticmethod - def parse_protobuf(schema_str: str): - try: - return TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) - # TypeError - Raised when the user forgets to encode the schema as a string. - except: # FIXME: bare except - print("Unexpected error:", sys.exc_info()[0]) - raise InvalidSchema - @staticmethod def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsistent-return-statements if schema_type is SchemaType.AVRO: return TypedSchema.parse_avro(schema_str) if schema_type is SchemaType.JSONSCHEMA: return TypedSchema.parse_json(schema_str) - if schema_type is SchemaType.PROTOBUF: - return TypedSchema.parse_protobuf(schema_str) raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}") def to_json(self): diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 018a38c82..9f85398cd 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -529,7 +529,7 @@ def _validate_schema_request_body(self, content_type, body) -> None: def _validate_schema_type(self, content_type, body) -> None: schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value)) - if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO, SchemaType.PROTOBUF}: + if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO}: self.r( body={ "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, diff --git a/karapace/serialization.py b/karapace/serialization.py index 4325dd583..14cbd6dab 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -135,7 +135,6 @@ def get_subject_name(self, topic_name: str, schema: str, subject_type: str, sche namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") - # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}" async def get_schema_for_subject(self, subject: str) -> TypedSchema: @@ -185,10 +184,6 @@ def read_value(schema: TypedSchema, bio: io.BytesIO): except ValidationError as e: raise InvalidPayload from e return value - if schema.schema_type is SchemaType.PROTOBUF: - # TODO: PROTOBUF* we need use protobuf validator there - value = bio.read() - return value raise ValueError("Unknown schema type") @@ -202,9 +197,6 @@ def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict): except ValidationError as e: raise InvalidPayload from e bio.write(json_encode(value, binary=True)) - elif schema.schema_type is SchemaType.PROTOBUF: - # TODO: PROTOBUF* we need use protobuf validator there - bio.write(value) else: raise ValueError("Unknown schema type") From d3aff3948f1d1a75d426fa66775322c459f6258f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 21 Apr 2021 13:55:37 +0300 Subject: [PATCH 003/168] Add protobuf skeleton --- karapace/compatibility/__init__.py | 17 +++++++++++++++++ karapace/kafka_rest_apis/__init__.py | 5 +++-- karapace/protobuf_compatibility.py | 22 ++++++++++++++++++++++ karapace/schema_reader.py | 14 ++++++++++++++ karapace/schema_registry_apis.py | 4 ++-- karapace/serialization.py | 9 +++++++++ 6 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 karapace/protobuf_compatibility.py diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index 57cc2593b..a278ee802 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -12,6 +12,7 @@ ) from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility from karapace.schema_reader import SchemaType, TypedSchema +from karapace.protobuf_compatibility import check_protobuf_schema_compatibility import logging @@ -61,6 +62,10 @@ def check_avro_compatibility(reader_schema, writer_schema) -> SchemaCompatibilit def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Validator) -> SchemaCompatibilityResult: return jsonschema_compatibility(reader, writer) +def check_protobuf_compatibility(reader_schema, writer_schema) -> SchemaCompatibilityResult: + result = check_protobuf_schema_compatibility(reader_schema, writer_schema) + return result + def check_compatibility( old_schema: TypedSchema, new_schema: TypedSchema, compatibility_mode: CompatibilityModes @@ -127,6 +132,18 @@ def check_compatibility( ) ) + elif old_schema.schema_type is SchemaType.PROTOBUF: + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = check_protobuf_compatibility(reader_schema=new_schema.schema, writer_schema=old_schema.schema) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = check_protobuf_compatibility(reader_schema=old_schema.schema, writer_schema=new_schema.schema) + + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = check_protobuf_compatibility(reader_schema=new_schema.schema, writer_schema=old_schema.schema) + result = result.merged_with( + check_protobuf_compatibility(reader_schema=old_schema.schema, writer_schema=new_schema.schema) + ) + else: result = SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 4e937b1a1..3a54b5fd0 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -25,9 +25,10 @@ RECORD_KEYS = ["key", "value", "partition"] PUBLISH_KEYS = {"records", "value_schema", "value_schema_id", "key_schema", "key_schema_id"} RECORD_CODES = [42201, 42202] -KNOWN_FORMATS = {"json", "avro", "binary"} +KNOWN_FORMATS = {"json", "avro", "protobuf", "binary"} OFFSET_RESET_STRATEGIES = {"latest", "earliest"} -SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA} +# TODO: PROTOBUF* check schema mapping +SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA, "protobuf": SchemaType.PROTOBUF} TypedConsumer = namedtuple("TypedConsumer", ["consumer", "serialization_format", "config"]) diff --git a/karapace/protobuf_compatibility.py b/karapace/protobuf_compatibility.py new file mode 100644 index 000000000..3eeb81748 --- /dev/null +++ b/karapace/protobuf_compatibility.py @@ -0,0 +1,22 @@ +# TODO: PROTOBUF* this functionality must be implemented +from karapace.avro_compatibility import SchemaCompatibilityResult + + +def parse_protobuf_schema_definition(schema_definition: str) -> str: + """ Parses and validates `schema_definition`. + + Raises: + Nothing yet. + + """ + + return schema_definition + + +def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: + # TODO: PROTOBUF* for investigation purposes yet + + if writer != reader: + return SchemaCompatibilityResult.compatible() + + return SchemaCompatibilityResult.compatible() diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 224366383..5b21fc640 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -4,6 +4,8 @@ Copyright (c) 2019 Aiven Ltd See LICENSE for details """ +import sys + from avro.schema import Schema as AvroSchema, SchemaParseException from enum import Enum, unique from json import JSONDecodeError @@ -14,6 +16,7 @@ from kafka.errors import NoBrokersAvailable, NodeNotReadyError, TopicAlreadyExistsError from karapace import constants from karapace.avro_compatibility import parse_avro_schema_definition +from karapace.protobuf_compatibility import parse_protobuf_schema_definition from karapace.statsd import StatsClient from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue @@ -71,12 +74,23 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen except SchemaParseException as e: raise InvalidSchema from e + @staticmethod + def parse_protobuf(schema_str: str): + try: + return TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) + # TypeError - Raised when the user forgets to encode the schema as a string. + except Exception as e: # FIXME: bare except + print("Unexpected error:", sys.exc_info()[0]) + raise InvalidSchema from e + @staticmethod def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsistent-return-statements if schema_type is SchemaType.AVRO: return TypedSchema.parse_avro(schema_str) if schema_type is SchemaType.JSONSCHEMA: return TypedSchema.parse_json(schema_str) + if schema_type is SchemaType.PROTOBUF: + return TypedSchema.parse_protobuf(schema_str) raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}") def to_json(self): diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 0305692f3..235ac1e62 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -232,7 +232,7 @@ def send_delete_subject_message(self, subject, version): key = '{{"subject":"{}","magic":0,"keytype":"DELETE_SUBJECT"}}'.format(subject) value = '{{"subject":"{}","version":{}}}'.format(subject, version) return self.send_kafka_message(key, value) - +# TODO: PROTOBUF add protobuf compatibility_check async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json @@ -543,7 +543,7 @@ def _validate_schema_request_body(self, content_type, body) -> None: def _validate_schema_type(self, content_type, body) -> None: schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value)) - if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO}: + if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO, SchemaType.PROTOBUF}: self.r( body={ "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, diff --git a/karapace/serialization.py b/karapace/serialization.py index 14cbd6dab..0b751dae2 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -135,6 +135,7 @@ def get_subject_name(self, topic_name: str, schema: str, subject_type: str, sche namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") + # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}" async def get_schema_for_subject(self, subject: str) -> TypedSchema: @@ -184,6 +185,10 @@ def read_value(schema: TypedSchema, bio: io.BytesIO): except ValidationError as e: raise InvalidPayload from e return value + if schema.schema_type is SchemaType.PROTOBUF: + # TODO: PROTOBUF* we need use protobuf validator there + value = bio.read() + return value raise ValueError("Unknown schema type") @@ -197,6 +202,10 @@ def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict): except ValidationError as e: raise InvalidPayload from e bio.write(json_encode(value, binary=True)) + elif schema.schema_type is SchemaType.PROTOBUF: + # TODO: PROTOBUF* we need use protobuf validator there + bio.write(value) + else: raise ValueError("Unknown schema type") From 46e23c76207f3443cd6b4d8f43a3129bfda5a59b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 21 Apr 2021 16:30:58 +0300 Subject: [PATCH 004/168] Add skeleton files --- karapace/compatibility/__init__.py | 5 +++-- karapace/schema_reader.py | 3 +-- karapace/schema_registry_apis.py | 3 ++- tests/integration/test_client.py | 14 ++++++++++++++ tests/utils.py | 16 ++++++++++++++++ 5 files changed, 36 insertions(+), 5 deletions(-) diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index a278ee802..be57e4f0f 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -11,8 +11,8 @@ SchemaIncompatibilityType ) from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility -from karapace.schema_reader import SchemaType, TypedSchema from karapace.protobuf_compatibility import check_protobuf_schema_compatibility +from karapace.schema_reader import SchemaType, TypedSchema import logging @@ -62,6 +62,7 @@ def check_avro_compatibility(reader_schema, writer_schema) -> SchemaCompatibilit def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Validator) -> SchemaCompatibilityResult: return jsonschema_compatibility(reader, writer) + def check_protobuf_compatibility(reader_schema, writer_schema) -> SchemaCompatibilityResult: result = check_protobuf_schema_compatibility(reader_schema, writer_schema) return result @@ -141,7 +142,7 @@ def check_compatibility( elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: result = check_protobuf_compatibility(reader_schema=new_schema.schema, writer_schema=old_schema.schema) result = result.merged_with( - check_protobuf_compatibility(reader_schema=old_schema.schema, writer_schema=new_schema.schema) + check_protobuf_compatibility(reader_schema=old_schema.schema, writer_schema=new_schema.schema) ) else: diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 5b21fc640..6dda022fe 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -4,8 +4,6 @@ Copyright (c) 2019 Aiven Ltd See LICENSE for details """ -import sys - from avro.schema import Schema as AvroSchema, SchemaParseException from enum import Enum, unique from json import JSONDecodeError @@ -25,6 +23,7 @@ import json import logging +import sys import time log = logging.getLogger(__name__) diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 235ac1e62..c69b83d10 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -232,7 +232,8 @@ def send_delete_subject_message(self, subject, version): key = '{{"subject":"{}","magic":0,"keytype":"DELETE_SUBJECT"}}'.format(subject) value = '{{"subject":"{}","version":{}}}'.format(subject, version) return self.send_kafka_message(key, value) -# TODO: PROTOBUF add protobuf compatibility_check + + # TODO: PROTOBUF add protobuf compatibility_check async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index 6ce07f85d..4b918d5bd 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -15,3 +15,17 @@ async def test_remote_client(registry_async_client): stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_avro + + +async def test_remote_client_protobuf(registry_async_client): + schema_avro = TypedSchema.parse(SchemaType.PROTOBUF, schema_avro_json) + reg_cli = SchemaRegistryClient() + reg_cli.client = registry_async_client + subject = new_random_name("subject") + sc_id = await reg_cli.post_new_schema(subject, schema_avro) + assert sc_id >= 0 + stored_schema = await reg_cli.get_schema_for_id(sc_id) + assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}" + stored_id, stored_schema = await reg_cli.get_latest_schema(subject) + assert stored_id == sc_id + assert stored_schema == schema_avro diff --git a/tests/utils.py b/tests/utils.py index 3a17cc931..ccb50fb52 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -42,6 +42,22 @@ }] }) +schema_protobuf_json = json.dumps({ + "namespace": "example.avro", + "type": "record", + "name": "example.avro.User", + "fields": [{ + "name": "name", + "type": "string" + }, { + "name": "favorite_number", + "type": "int" + }, { + "name": "favorite_color", + "type": "string" + }] +}) + test_objects_jsonschema = [{"foo": 100}, {"foo": 200}] test_objects_avro = [ From f6be6276f6cc3b451394c3840077f89cd61c3aeb Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 21 Apr 2021 16:56:55 +0300 Subject: [PATCH 005/168] remove unfinished tests --- tests/integration/test_client.py | 13 +------------ tests/utils.py | 16 +--------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index 4b918d5bd..4be1bc459 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -17,15 +17,4 @@ async def test_remote_client(registry_async_client): assert stored_schema == schema_avro -async def test_remote_client_protobuf(registry_async_client): - schema_avro = TypedSchema.parse(SchemaType.PROTOBUF, schema_avro_json) - reg_cli = SchemaRegistryClient() - reg_cli.client = registry_async_client - subject = new_random_name("subject") - sc_id = await reg_cli.post_new_schema(subject, schema_avro) - assert sc_id >= 0 - stored_schema = await reg_cli.get_schema_for_id(sc_id) - assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}" - stored_id, stored_schema = await reg_cli.get_latest_schema(subject) - assert stored_id == sc_id - assert stored_schema == schema_avro + diff --git a/tests/utils.py b/tests/utils.py index ccb50fb52..c60547885 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -42,21 +42,7 @@ }] }) -schema_protobuf_json = json.dumps({ - "namespace": "example.avro", - "type": "record", - "name": "example.avro.User", - "fields": [{ - "name": "name", - "type": "string" - }, { - "name": "favorite_number", - "type": "int" - }, { - "name": "favorite_color", - "type": "string" - }] -}) + test_objects_jsonschema = [{"foo": 100}, {"foo": 200}] From 4b2fdb742f0bd4af1e30de51c399a0b274157902 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 21 Apr 2021 17:04:23 +0300 Subject: [PATCH 006/168] fixup lint errors --- tests/integration/test_client.py | 3 --- tests/utils.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index 4be1bc459..6ce07f85d 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -15,6 +15,3 @@ async def test_remote_client(registry_async_client): stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_avro - - - diff --git a/tests/utils.py b/tests/utils.py index c60547885..3a17cc931 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -42,8 +42,6 @@ }] }) - - test_objects_jsonschema = [{"foo": 100}, {"foo": 200}] test_objects_avro = [ From e28b735e4f4aa4db5dfc81858234e163da49c72d Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 22 Apr 2021 21:38:47 +0300 Subject: [PATCH 007/168] Changed project structure, and added one test and debugged issues for PR #1 --- karapace/compatibility/__init__.py | 20 +++++++------- karapace/compatibility/protobuf/__init__.py | 0 .../protobuf/checks.py} | 11 -------- karapace/protobuf/__init__.py | 0 karapace/protobuf/schema.py | 8 ++++++ karapace/protobuf/utils.py | 3 +++ karapace/schema_reader.py | 27 ++++++++++++++++--- karapace/serialization.py | 8 +++++- tests/integration/test_client.py | 16 ++++++++++- tests/utils.py | 4 +++ 10 files changed, 71 insertions(+), 26 deletions(-) create mode 100644 karapace/compatibility/protobuf/__init__.py rename karapace/{protobuf_compatibility.py => compatibility/protobuf/checks.py} (67%) create mode 100644 karapace/protobuf/__init__.py create mode 100644 karapace/protobuf/schema.py create mode 100644 karapace/protobuf/utils.py diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index be57e4f0f..f939ed342 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -11,7 +11,7 @@ SchemaIncompatibilityType ) from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility -from karapace.protobuf_compatibility import check_protobuf_schema_compatibility +from karapace.compatibility.protobuf.checks import check_protobuf_schema_compatibility from karapace.schema_reader import SchemaType, TypedSchema import logging @@ -63,9 +63,8 @@ def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Valida return jsonschema_compatibility(reader, writer) -def check_protobuf_compatibility(reader_schema, writer_schema) -> SchemaCompatibilityResult: - result = check_protobuf_schema_compatibility(reader_schema, writer_schema) - return result +def check_protobuf_compatibility(reader, writer) -> SchemaCompatibilityResult: + return check_protobuf_schema_compatibility(reader, writer) def check_compatibility( @@ -135,15 +134,16 @@ def check_compatibility( elif old_schema.schema_type is SchemaType.PROTOBUF: if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: - result = check_protobuf_compatibility(reader_schema=new_schema.schema, writer_schema=old_schema.schema) + result = check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_protobuf_compatibility(reader_schema=old_schema.schema, writer_schema=new_schema.schema) + result = check_protobuf_compatibility(reader=old_schema.schema, writer=new_schema.schema) elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_protobuf_compatibility(reader_schema=new_schema.schema, writer_schema=old_schema.schema) - result = result.merged_with( - check_protobuf_compatibility(reader_schema=old_schema.schema, writer_schema=new_schema.schema) - ) + result = check_protobuf_compatibility(reader=new_schema.schema, writer=old_schema.schema) + result = result.merged_with(check_protobuf_compatibility(reader=old_schema.schema, writer=new_schema.schema)) else: result = SchemaCompatibilityResult.incompatible( diff --git a/karapace/compatibility/protobuf/__init__.py b/karapace/compatibility/protobuf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/karapace/protobuf_compatibility.py b/karapace/compatibility/protobuf/checks.py similarity index 67% rename from karapace/protobuf_compatibility.py rename to karapace/compatibility/protobuf/checks.py index 3eeb81748..e761655b2 100644 --- a/karapace/protobuf_compatibility.py +++ b/karapace/compatibility/protobuf/checks.py @@ -2,17 +2,6 @@ from karapace.avro_compatibility import SchemaCompatibilityResult -def parse_protobuf_schema_definition(schema_definition: str) -> str: - """ Parses and validates `schema_definition`. - - Raises: - Nothing yet. - - """ - - return schema_definition - - def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: # TODO: PROTOBUF* for investigation purposes yet diff --git a/karapace/protobuf/__init__.py b/karapace/protobuf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py new file mode 100644 index 000000000..05b66c698 --- /dev/null +++ b/karapace/protobuf/schema.py @@ -0,0 +1,8 @@ +class ProtobufSchema: + schema: str + + def __init__(self, schema: str): + self.schema = schema + + def to_json(self): + return self.schema diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py new file mode 100644 index 000000000..2cf297248 --- /dev/null +++ b/karapace/protobuf/utils.py @@ -0,0 +1,3 @@ +def protobuf_encode(a: str) -> str: + # TODO: PROTOBUF + return a diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 6dda022fe..e6bd92c1f 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -14,7 +14,8 @@ from kafka.errors import NoBrokersAvailable, NodeNotReadyError, TopicAlreadyExistsError from karapace import constants from karapace.avro_compatibility import parse_avro_schema_definition -from karapace.protobuf_compatibility import parse_protobuf_schema_definition +from karapace.protobuf.schema import ProtobufSchema +from karapace.protobuf.utils import protobuf_encode from karapace.statsd import StatsClient from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue @@ -40,6 +41,17 @@ def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator: return Draft7Validator(schema) +def parse_protobuf_schema_definition(schema_definition: str) -> ProtobufSchema: + """ Parses and validates `schema_definition`. + + Raises: + Nothing yet. + + """ + + return ProtobufSchema(schema_definition) + + class InvalidSchema(Exception): pass @@ -89,6 +101,7 @@ def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsis if schema_type is SchemaType.JSONSCHEMA: return TypedSchema.parse_json(schema_str) if schema_type is SchemaType.PROTOBUF: + sys.stderr.write(f"ZVALA1: {TypedSchema.parse_protobuf(schema_str)}") return TypedSchema.parse_protobuf(schema_str) raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}") @@ -97,13 +110,21 @@ def to_json(self): return self.schema.schema if isinstance(self.schema, AvroSchema): return self.schema.to_json(names=None) + if isinstance(self.schema, ProtobufSchema): + return self.schema.to_json() return self.schema def __str__(self) -> str: - return json_encode(self.to_json(), compact=True) + if isinstance(self.schema, ProtobufSchema): + return self.schema.to_json() + else: + return json_encode(self.to_json(), compact=True) def __repr__(self): - return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" + if isinstance(self.schema, ProtobufSchema): + return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" + else: + return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" def __eq__(self, other): return isinstance(other, TypedSchema) and self.__str__() == other.__str__() and self.schema_type is other.schema_type diff --git a/karapace/serialization.py b/karapace/serialization.py index 0b751dae2..f9a4644e2 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -12,6 +12,7 @@ import io import logging import struct +import sys log = logging.getLogger(__name__) @@ -71,7 +72,10 @@ def __init__(self, schema_registry_url: str = "http://localhost:8081"): self.base_url = schema_registry_url async def post_new_schema(self, subject: str, schema: TypedSchema) -> int: - payload = {"schema": json_encode(schema.to_json()), "schemaType": schema.schema_type.value} + if schema.schema_type is SchemaType.PROTOBUF: + payload = {"schema": schema.to_json(), "schemaType": schema.schema_type.value} + else: + payload = {"schema": json_encode(schema.to_json()), "schemaType": schema.schema_type.value} result = await self.client.post(f"subjects/{quote(subject)}/versions", json=payload) if not result.ok: raise SchemaRetrievalError(result.json()) @@ -175,10 +179,12 @@ async def get_schema_for_id(self, schema_id: int) -> TypedSchema: def read_value(schema: TypedSchema, bio: io.BytesIO): + if schema.schema_type is SchemaType.AVRO: reader = DatumReader(schema.schema) return reader.read(BinaryDecoder(bio)) if schema.schema_type is SchemaType.JSONSCHEMA: + value = load(bio) try: schema.schema.validate(value) diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index 6ce07f85d..89a0e8736 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -1,6 +1,6 @@ from karapace.schema_reader import SchemaType, TypedSchema from karapace.serialization import SchemaRegistryClient -from tests.utils import new_random_name, schema_avro_json +from tests.utils import new_random_name, schema_avro_json, schema_protobuf_plain async def test_remote_client(registry_async_client): @@ -15,3 +15,17 @@ async def test_remote_client(registry_async_client): stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_avro + + +async def test_remote_client_protobuf(registry_async_client): + schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) + reg_cli = SchemaRegistryClient() + reg_cli.client = registry_async_client + subject = new_random_name("subject") + sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) + assert sc_id >= 0 + stored_schema = await reg_cli.get_schema_for_id(sc_id) + assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" + stored_id, stored_schema = await reg_cli.get_latest_schema(subject) + assert stored_id == sc_id + assert stored_schema == schema_protobuf diff --git a/tests/utils.py b/tests/utils.py index 3a17cc931..d194015f6 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -42,6 +42,10 @@ }] }) +schema_protobuf_plain = "syntax = \"proto3\";\npackage com.codingharbour.protobuf;\n\noption java_outer_classname = \""\ + "SimpleMessageProtos\";\n\nmessage SimpleMessage {\n string content = 1;\n"\ + " string date_time = 2;\n string content2 = 3;\n}\n" + test_objects_jsonschema = [{"foo": 100}, {"foo": 200}] test_objects_avro = [ From 3063f58376e9bc3f9ab5756667e1cdfb76acfa51 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 22 Apr 2021 21:44:09 +0300 Subject: [PATCH 008/168] fixup lint issues --- karapace/schema_reader.py | 8 ++------ karapace/serialization.py | 1 - 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index e6bd92c1f..16156c459 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -15,7 +15,6 @@ from karapace import constants from karapace.avro_compatibility import parse_avro_schema_definition from karapace.protobuf.schema import ProtobufSchema -from karapace.protobuf.utils import protobuf_encode from karapace.statsd import StatsClient from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue @@ -101,7 +100,6 @@ def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsis if schema_type is SchemaType.JSONSCHEMA: return TypedSchema.parse_json(schema_str) if schema_type is SchemaType.PROTOBUF: - sys.stderr.write(f"ZVALA1: {TypedSchema.parse_protobuf(schema_str)}") return TypedSchema.parse_protobuf(schema_str) raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}") @@ -117,14 +115,12 @@ def to_json(self): def __str__(self) -> str: if isinstance(self.schema, ProtobufSchema): return self.schema.to_json() - else: - return json_encode(self.to_json(), compact=True) + return json_encode(self.to_json(), compact=True) def __repr__(self): if isinstance(self.schema, ProtobufSchema): return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" - else: - return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" + return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" def __eq__(self, other): return isinstance(other, TypedSchema) and self.__str__() == other.__str__() and self.schema_type is other.schema_type diff --git a/karapace/serialization.py b/karapace/serialization.py index f9a4644e2..4a0276620 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -12,7 +12,6 @@ import io import logging import struct -import sys log = logging.getLogger(__name__) From 62e56eb4be54172eefb97d066f4f07d4bf6795c0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 25 Apr 2021 23:13:01 +0300 Subject: [PATCH 009/168] protobuf parser draft save --- karapace/protobuf/exception.py | 28 ++ karapace/protobuf/extend_element.py | 27 ++ karapace/protobuf/field.py | 10 + karapace/protobuf/field_element.py | 64 ++++ karapace/protobuf/kotlin_wrapper.py | 47 +++ karapace/protobuf/location.py | 57 ++++ karapace/protobuf/option_element.py | 129 ++++++++ karapace/protobuf/option_reader.py | 156 ++++++++++ karapace/protobuf/proto_file_element.py | 94 ++++++ karapace/protobuf/proto_parser.py | 200 ++++++++++++ karapace/protobuf/proto_type.py | 192 ++++++++++++ karapace/protobuf/protobuf_syntax.py | 5 + karapace/protobuf/service_element.py | 37 +++ karapace/protobuf/syntax.py | 6 + karapace/protobuf/syntax_reader.py | 396 ++++++++++++++++++++++++ karapace/protobuf/utils.py | 131 ++++++++ 16 files changed, 1579 insertions(+) create mode 100644 karapace/protobuf/exception.py create mode 100644 karapace/protobuf/extend_element.py create mode 100644 karapace/protobuf/field.py create mode 100644 karapace/protobuf/field_element.py create mode 100644 karapace/protobuf/kotlin_wrapper.py create mode 100644 karapace/protobuf/location.py create mode 100644 karapace/protobuf/option_element.py create mode 100644 karapace/protobuf/option_reader.py create mode 100644 karapace/protobuf/proto_file_element.py create mode 100644 karapace/protobuf/proto_parser.py create mode 100644 karapace/protobuf/proto_type.py create mode 100644 karapace/protobuf/protobuf_syntax.py create mode 100644 karapace/protobuf/service_element.py create mode 100644 karapace/protobuf/syntax.py create mode 100644 karapace/protobuf/syntax_reader.py diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py new file mode 100644 index 000000000..3b32dbb6c --- /dev/null +++ b/karapace/protobuf/exception.py @@ -0,0 +1,28 @@ +def error(message: str): + raise Exception(message) + + +class ProtobufParserRuntimeException(Exception): + pass + + +class IllegalStateException(Exception): + + def __init__(self, message="IllegalStateException"): + self.message = message + super().__init__(self.message) + + +class Error(Exception): + """Base class for errors in this module.""" + pass + + +class ProtobufException(Error): + """Generic Avro schema error.""" + pass + + +class SchemaParseException(ProtobufException): + """Error while parsing a JSON schema descriptor.""" + pass diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py new file mode 100644 index 000000000..0aecd8e8a --- /dev/null +++ b/karapace/protobuf/extend_element.py @@ -0,0 +1,27 @@ +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_indented + + +class ExtendElement: + location: Location + name: str + documentation: str + fields: list + + def __init__(self, location: Location, name: str, documentation: str, fields: list): + self.location = location + self.name = name + self.documentation = documentation + self.fields = fields + + def to_schema(self): + result: list = list() + append_documentation(result, self.documentation) + result.append(f"extend {self.name} {{") + if self.fields: + result.append("\n") + for field in self.fields: + append_indented(result, field.to_schema_declaration()) + + result.append("}\n") + return result diff --git a/karapace/protobuf/field.py b/karapace/protobuf/field.py new file mode 100644 index 000000000..5a387da56 --- /dev/null +++ b/karapace/protobuf/field.py @@ -0,0 +1,10 @@ +# TODO: ... +from enum import Enum + + +class Field: + class Label(Enum): + OPTIONAL = 1 + REQUIRED = 2 + REPEATED = 3 + ONE_OF = 4 diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py new file mode 100644 index 000000000..20d4687fb --- /dev/null +++ b/karapace/protobuf/field_element.py @@ -0,0 +1,64 @@ +from karapace.protobuf.field import Field +from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.proto_type import ProtoType +from karapace.protobuf.utils import append_documentation, append_options + + +class FieldElement: + location: Location + label: Field.Label + element_type: str + name: str + default_value: str = None + json_name: str = None + tag: int = 0, + documentation: str = "", + options: list = list() + + def __init__(self, location: Location, label: Field.Label, element_type: str, + name: str, default_value: str, json_name: str, tag: int, + documentation: str, options: list): + self.location = location + self.label = label + self.element_type = element_type + self.name = name + self.default_value = default_value + self.json_name = json_name + self.tag = tag + self.documentation = documentation + self.options = options + + def to_schema(self): + result: list = list() + append_documentation(result, self.documentation) + + if self.label: + result.append(f"{self.label.name.to_english_lower_case()} ") + + result.append(f"{self.element_type} {self.name} = {self.tag}") + + options_with_default = self.options_with_special_values() + if options_with_default and len(options_with_default) > 0: + result.append(' ') + append_options(result, options_with_default) + result.append(";\n") + + """ + Both `default` and `json_name` are defined in the schema like options but they are actually + not options themselves as they're missing from `google.protobuf.FieldOptions`. + """ + + def options_with_special_values(self) -> list: + + options = self.options.copy() + + if self.default_value: + proto_type = ProtoType.get2(self.element_type) + options.append(OptionElement("default", proto_type.to_kind(), self.default_value, False)) + if self.json_name: + self.options.append(OptionElement("json_name", OptionElement.Kind.STRING, self.json_name, False)) + + return options + +# Only non-repeated scalar types and Enums support default values. diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py new file mode 100644 index 000000000..4529a9c67 --- /dev/null +++ b/karapace/protobuf/kotlin_wrapper.py @@ -0,0 +1,47 @@ + + +def check(q: bool, message: str): + if not q: + raise IllegalStateException(message) + + +def require(q: bool, message: str): + if not q: + raise IllegalArgumentException(message) + + +class IllegalStateException(Exception): + def __init__(self, message="IllegalStateException"): + self.message = message + super().__init__(self.message) + + +class IllegalArgumentException(Exception): + def __init__(self, message="IllegalArgumentException"): + self.message = message + super().__init__(self.message) + + +class String(str): + pass + + +class Any(object): + pass + + +class StringBuilder(list): + + def append_indented(self: list, value: str): + lines = value.split("\n") + if len(lines) > 1 and not lines[-1]: + lines = lines.pop() + + for line in lines: + self.append(" ") + self.append(line) + self.append("\n") + + +class OptionsList(list): + pass diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py new file mode 100644 index 000000000..83703a864 --- /dev/null +++ b/karapace/protobuf/location.py @@ -0,0 +1,57 @@ +class Location: + """ Locates a .proto file, or a self.position within a .proto file, on the file system """ + + base: str + path: str + line: int + column: int + + def __init__(self, base: str, path: str, line: int = -1, column: int = -1): + """ str - The base directory of this location; + path - The path to this location relative to [base] + line - The line number of this location, or -1 for no specific line number + column - The column on the line of this location, or -1 for no specific column + """ + self.base = base + self.path = path + self.line = line + self.column = column + + def at(self, line: int, column: int): + return Location(self.base, self.path, line, column) + + def without_base(self): + """ Returns a copy of this location with an empty base. """ + return Location("", self.path, self.line, self.column) + + def with_path_only(self): + """ Returns a copy of this location including only its path. """ + return Location("", self.path, -1, -1) + + def to_string(self) -> str: + result = "" + if self.base: + result += self.base + "/" + + result += self.path + + if self.line != -1: + result += ":" + result += self.line + if self.column != -1: + result += ":" + result += self.column + + return result + + @staticmethod + def get(*args, **kwds): + if len(args) == 1: # (path) + path = args[0] + return Location.get("", path) + if len(args) == 2: # (base,path) + path: str = args[1] + base: str = args[0] + if base.endswith("/"): + base = base[:-1] + return Location(base, path) diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py new file mode 100644 index 000000000..72f17c5f7 --- /dev/null +++ b/karapace/protobuf/option_element.py @@ -0,0 +1,129 @@ +from enum import Enum +from karapace.protobuf.kotlin_wrapper import * +from karapace.protobuf.utils import append_indented + + +class OptionsList(list): + pass + + +class ListOptionElement(list): + pass + + +class OptionElement: + class Kind(Enum): + STRING = 1 + BOOLEAN = 2 + NUMBER = 3 + ENUM = 4 + MAP = 5 + LIST = 6 + OPTION = 7 + + name: str + kind: Kind + value = None + """ If true, this [OptionElement] is a custom option. """ + is_parenthesized: bool + + def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool): + self.name = name + self.kind = kind + self.value = value + self.is_parenthesized = is_parenthesized + self.formattedName = f"({self.name})" if is_parenthesized else self.name + + def to_schema(self) -> str: + aline = { + self.kind == self.Kind.STRING: f"{self.formattedName} = \"{self.value}\"", + self.kind in [self.Kind.BOOLEAN, self.Kind.NUMBER, self.Kind.ENUM]: f"{self.formattedName} = {self.value}", + self.kind == self.Kind.OPTION: f"{self.formattedName}.{self.value.to_schema()}", + self.kind == self.Kind.MAP: list([f"{self.formattedName} = {{\n", + self.format_option_map(self.value), + "}" + ]), + self.kind == self.Kind.LIST: list([f"{self.formattedName} = ", + self.append_options(self.value) + ]) + }[True] + if type(aline) is list: + return "".join(aline) + else: + return aline + + def to_schema_declaration(self): + return f"option {self.to_schema()};\n" + + def append_options(self, options: list): + data: list = list() + count = len(options) + if count == 1: + data.append('[') + data.append(options[0].to_schema()) + data.append(']') + return "".join(data) + + data.append("[\n") + for i in range(0, count): + if i < count - 1: + endl = "," + else: + endl = "" + append_indented(data, options[i].to_schema() + endl) + data.append(']') + return "".join(data) + + def format_option_map(self, value: dict) -> str: + keys = list(value.keys()) + last_index = len(keys) - 1 + result: StringBuilder = StringBuilder() + for index in range(len(keys)): + endl = "," if (index != last_index) else "" + result.append_indented(f"{keys[index]}: {self.format_option_map_value(value[keys[index]])}{endl}") + return "".join(result) + + def format_option_map_value(self, value) -> str: + aline = { + type(value) is str: f"\"{value}\"", + type(value) is dict: list(["{\n", + self.format_option_map_value(value), + "}" + ]), + type(value) is list: list(["[\n", + self.format_list_map_value(value), + "]" + ]) + }[True] + + if type(aline) is list: + return "".join(aline) + if type(aline) is str: + return aline + return value + + def format_list_map_value(self, value) -> str: + keys = value.keys() + last_index = len(value) - 1 + result: StringBuilder = StringBuilder() + for index in range(len(keys)): + endl = "," if (index != last_index) else "" + result.append_indented(f"{self.format_option_map_value(value[keys[index]])}{endl}") + return "".join(result) + + # TODO: REMOVE WHEN ALL CLEAN + """ companion object { + internal PACKED_OPTION_ELEMENT = + OptionElement("packed", BOOLEAN, value = "true", is_parenthesized = false) + + @JvmOverloads + def create( + name: String, + kind: Kind, + value: Any, + is_parenthesized: Boolean = false + ) = OptionElement(name, kind, value, is_parenthesized) + } +} + + """ diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py new file mode 100644 index 000000000..f03103761 --- /dev/null +++ b/karapace/protobuf/option_reader.py @@ -0,0 +1,156 @@ +from karapace.protobuf.syntax_reader import SyntaxReader +from karapace.protobuf.option_element import OptionElement + + +class KindAndValue: + kind: OptionElement.Kind + value: object + + def __init__(self, kind: OptionElement.Kind, value: object): + self.kind = kind + self.valuer = value + + +class OptionReader: + reader: SyntaxReader + + def __init__(self, reader: SyntaxReader): + self.reader = reader + + """ + Reads options enclosed in '[' and ']' if they are present and returns them. Returns an empty + list if no options are present. + """ + + def read_options(self) -> list: + if not self.reader.peek_char('['): + return list() + result: list = list() + while True: + result.append(self.read_option('=')) + + # Check for closing ']' + if self.reader.peek_char(']'): + break + + # Discard optional ','. + self.reader.expect(self.reader.peek_char(','), "Expected ',' or ']") + return result + + """ Reads a option containing a name, an '=' or ':', and a value. """ + + def read_option(self, key_value_separator: str) -> OptionElement: + is_extension = (self.reader.peek_char() == '[') + is_parenthesized = (self.reader.peek_char() == '(') + name = self.reader.read_name() # Option name. + if is_extension: + name = f"[{name}]" + + sub_names: list = list() + c = self.reader.read_char() + if c == '.': + # Read nested field name. For example "baz" in "(foo.bar).baz = 12". + sub_names = self.reader.read_name().split(".") + c = self.reader.read_char() + + if key_value_separator == ':' and c == '{': + # In text format, values which are maps can omit a separator. Backtrack so it can be re-read. + self.reader.push_back('{') + else: + self.reader.expect(c == key_value_separator, f"expected '{key_value_separator}' in option") + + kind_and_value = self.read_kind_and_value() + kind = kind_and_value.kind + value = kind_and_value.value + sub_names.reverse() + for sub_name in sub_names: + value = OptionElement(sub_name, kind, value, False) + kind = OptionElement.Kind.OPTION + return OptionElement(name, kind, value, is_parenthesized) + + """ Reads a value that can be a map, list, string, number, boolean or enum. """ + + def read_kind_and_value(self) -> KindAndValue: + peeked = self.reader.peek_char() + if peeked == '{': + return KindAndValue(OptionElement.Kind.MAP, self.read_map('{', '}', ':')) + if peeked == '[': + return KindAndValue(OptionElement.Kind.LIST, self.read_list()) + if peeked == '"' or peeked == "'": + return KindAndValue(OptionElement.Kind.STRING, self.reader.read_string()) + + if peeked.is_digit() or peeked == '-': + return KindAndValue(OptionElement.Kind.NUMBER, self.reader.read_word()) + + word = self.reader.read_word() + if word == "true": + return KindAndValue(OptionElement.Kind.BOOLEAN, "true") + if word == "false": + return KindAndValue(OptionElement.Kind.BOOLEAN, "false") + return KindAndValue(OptionElement.Kind.ENUM, word) + + """ + Returns a map of string keys and values. This is similar to a JSON object, with ':' and '}' + surrounding the map, ':' separating keys from values, and ',' or ';' separating entries. + """ + + def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) -> dict: + if self.reader.read_char() != open_brace: + raise AssertionError() + result: dict = dict() + while True: + if self.reader.peek_char(close_brace): + # If we see the close brace, finish immediately. This handles :}/[] and ,}/,] cases. + return result + + option = self.read_option(key_value_separator) + name = option.name + value = option.value + if value is OptionElement: + nested = result[name] + if not nested: + nested = dict() + result[name] = nested + nested[value.name] = value.value + else: + # Add the value(s) to any previous values with the same key + previous = result[name] + if not previous: + result[name] = value + elif type(previous) is list: # Add to previous List + self.add_to_list(previous, value) + else: + new_list: list = list() + new_list.append(previous) + self.add_to_list(new_list, value) + result[name] = new_list + # Discard optional separator. + self.reader.peek_char(',') or self.reader.peek_char(';') + + """ Adds an object or objects to a List. """ + + def add_to_list(self, _list: list, value: object): + if type(value) is list: + for v in list(value): + _list.append(v) + else: + _list.append(value) + + """ + * Returns a list of values. This is similar to JSON with '[' and ']' surrounding the list and ',' + * separating values. + """ + + def read_list(self) -> list: + self.reader.require('[') + result: list = list() + while True: + # If we see the close brace, finish immediately. This handles [] and ,] cases. + if self.reader.peek_char(']'): + return result + + result.append(self.read_kind_and_value().value) + + if self.reader.peek_char(','): + continue + self.reader.expect(self.reader.peek_char() == ']', "expected ',' or ']'") diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py new file mode 100644 index 000000000..ef8516481 --- /dev/null +++ b/karapace/protobuf/proto_file_element.py @@ -0,0 +1,94 @@ +from karapace.protobuf.location import Location +from karapace.protobuf.syntax import Syntax + + +class ProtoFileElement: + location: Location + package_name: str + syntax: Syntax + imports: list + public_imports: list + types: list + services: list + extend_declarations: list + options: list + + def __init__(self, location: Location, + package_name: str = None, + syntax: Syntax = None, + imports=None, + public_imports=None, + types=None, + services=None, + extend_declarations=None, + options=None): + + if options is None: + options = list() + if extend_declarations is None: + extend_declarations = list() + if services is None: + services = list() + if types is None: + types = list() + if public_imports is None: + public_imports = [] + if imports is None: + imports = [] + self.location = location + self.package_name = package_name + self.syntax = syntax + self.imports = imports + self.public_imports = public_imports + self.types = types + self.services = services + self.extend_declarations = extend_declarations + self.options = options + + def to_schema(self): + strings: list = ["// Proto schema formatted by Wire, do not edit.\n", "// Source: ", + str(self.location.with_path_only()), "\n"] + if self.syntax: + strings.append("\n") + strings.append("syntax = \"") + strings.append(str(self.syntax)) + strings.append("\";\n") + + if self.package_name: + strings.append("\n") + strings.append("package " + str(self.package_name) + ";\n") + + if (self.imports and len(self.imports)) or (self.public_imports and len(self.public_imports)): + strings.append("\n") + + for file in self.imports: + strings.append("import \"" + str(file) + "\";\n") + + for file in self.public_imports: + strings.append("import public \"" + str(file) + "\";\n") + + if self.options and len(self.options): + strings.append("\n") + for option in self.options: + strings.append(str(option.to_schema_declaration())) + + if self.types and len(self.types): + for type_element in self.types: + strings.append("\n") + strings.append(str(type_element.to_schema)) + + if self.extend_declarations and len(self.extend_declarations): + for extend_declaration in self.extend_declarations: + strings.append("\n") + strings.append(extend_declaration.to_schema()) + + if self.services and len(self.extend_declarations): + for service in self.services: + strings.append("\n") + strings.append(str(service.to_schema)) + + return "".join(strings) + + @staticmethod + def empty(path): + return ProtoFileElement(Location.get(path)) diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py new file mode 100644 index 000000000..1c70f610d --- /dev/null +++ b/karapace/protobuf/proto_parser.py @@ -0,0 +1,200 @@ +from builtins import str + +from enum import Enum + +from typing import List, Any, Union + +from io import StringIO + +from karapace.protobuf.location import Location +from karapace.protobuf.option_reader import OptionReader +from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.syntax import Syntax +from karapace.protobuf.syntax_reader import SyntaxReader +from karapace.protobuf.service_element import ServiceElement +from karapace.protobuf.exception import error +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.extend_element import ExtendElement + + +from enum import Enum + + + +class TypeElement : + location: Location + name: str + documentation: str + options: list + nested_types: list + def to_schema(self): + pass + + +class SyntaxReader: + pass + + +class Context : + FILE = "file" + + + + + +class ProtoParser: + location: Location + reader: SyntaxReader + public_imports: list + imports: list + nested_types: list + services: list + extends_list: list + options: list + declaration_count: int = 0 + syntax: Syntax = None + package_name: str = None + prefix: str = "" + + def __int__(self, location: Location, data:str): + self.reader = SyntaxReader(data, location) + + def read_proto_file(self) -> ProtoFileElement: + while True: + documentation = self.reader.read_documentation() + if self.reader.exhausted(): + return ProtoFileElement(self.location, self.package_name, self.syntax, self.imports, + self.public_imports, self.nested_types, self.services, self.extends_list, + self.options) + declaration = self.read_declaration(documentation, Context.FILE) + if type(declaration) is TypeElement : + # TODO: must add check for execption + duplicate = next((x for x in iter(self.nested_types) if x.name == declaration.name), None) + if duplicate : + error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") + self.nested_types.append(declaration) + + if type(declaration) is ServiceElement : + duplicate = next((x for x in iter(self.services) if x.name == declaration.name), None) + if duplicate : + error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") + self.services.append(declaration) + + if type(declaration) is OptionElement: + self.options.append(declaration) + + if type(declaration) is ExtendElement : + self.extends_list.append(declaration) + + + + def read_declaration(self, documentation: str, context: Context): + self.declaration_count += 1 + index = self.declaration_count + + # Skip unnecessary semicolons, occasionally used after a nested message declaration. + if self.reader.peek_char(';') : return None + + + location = self.reader.location() + label = self.reader.read_word() + + # TODO(benoit) Let's better parse the proto keywords. We are pretty weak when field/constants + # are named after any of the label we check here. + + result = None + if label == "package" and context.permits_package() : + self.package_name = self.reader.readName() + self.prefix = f"{self.package_name}." + self.reader.require(';') + return result + elif label == "import" and context.permits_import() : + import_string = self.reader.read_string() + if import_string == "public" : + self.public_imports.append(self.reader.read_string()) + + else : + self.imports.append(import_string) + self.reader.require(';') + return result + elif label == "syntax" and context.permits_syntax() : + self.reader.expect(self.syntax == None, location, "too many syntax definitions" ) + self.reader.require('=') + self.reader.expect(index == 0, location ,"'syntax' element must be the first declaration in a file") + + syntax_string = self.reader.read_quoted_string() + try : + syntax = Syntax(syntax_string) + except Exception as e: + # TODO: } catch (e: IllegalArgumentException) { ??? + self.reader.unexpected(str(e), location) + self.reader.require(';') + return result + elif label == "option" : + result = OptionReader(self.reader).read_option('=') + self.reader.require(';') + return result + elif label == "reserved" : + return self.read_reserved(location, documentation) + elif label == "message" and context.permits_message() : + return self.read_message(location, documentation) + elif label == "enum" and context.permits_enum() : + return self.read_enum_element(location, documentation) + elif label == "service" and context.permits_service() : + return self.read_service(location, documentation) + elif label == "extend" and context.permits_extend() : + return self.read_extend(location, documentation) + elif label == "rpc" and context.permits_rpc() : + return self.seread_rpc(location, documentation) + elif label == "oneof" and context.permits_one_of() : + return self.read_one_of(documentation) + elif label == "extensions" and context.permits_extensions() : + return self.read_extensions(location, documentation) + elif context == Context.MESSAGE or context == Context.EXTEND : + return self.read_field(documentation, location, label) + elif context == Context.ENUM : + return self.read_enum_constant(documentation, location, label) + else : + self.reader.unexpected("unexpected label: $label", location) + + + """ Reads a message declaration. """ + def read_message( self, location: Location, documentation: String )-> MessageElement : + name :str = self.reader.readName() + fields:list = list() + one_ofs:list = list() + nestedTypes:list = list() + extensions :list = list() + options :list = list() + reserveds :list = list() + groups :list = list() + + previousPrefix = self.prefix + self.prefix = f"{self.prefix}{name}." + + self.reader.require('{') + while True : + nested_documentation = self.reader.read_documentation() + if self.reader.peek_char('}'): + break + + declared = self.read_declaration(nested_documentation, Context.MESSAGE) : + type_declared = type(declared) + if type_declared is FieldElement : + fields.append(declared) + elif type_declared is OneOfElement : + one_ofs.append(declared) + elif type_declared is GroupElement : + groups.append(declared) + elif type_declared is TypeElement : + nestedTypes.append(declared) + elif type_declared is ExtensionsElement : + extensions.append(declared) + elif type_declared is OptionElement : + options.append(declared) + # Extend declarations always add in a global scope regardless of nesting. + elif type_declared is ExtendElement : + self.extends_list.append(declared) + elif type_declared is ReservedElement : + reserveds.append(declared) + \ No newline at end of file diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py new file mode 100644 index 000000000..9618f590b --- /dev/null +++ b/karapace/protobuf/proto_type.py @@ -0,0 +1,192 @@ +""" +Names a protocol buffer message, enumerated type, service, map, or a scalar. This class models a +fully-qualified name using the protocol buffer package. +""" + +from karapace.protobuf.kotlin_wrapper import check, require +from karapace.protobuf.option_element import OptionElement + + +class ProtoType: + is_scalar: bool + string: str + is_map: bool + """ The type of the map's keys. Only present when [is_map] is True. """ + key_type: object # ProtoType + + """ The type of the map's values. Only present when [is_map] is True. """ + value_type: object # ProtoType + + @property + def simple_name(self) -> str: + dot = self.string.rfind(".") + return self.string[dot + 1] + + """ Creates a scalar or message type. """ + + def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None): + + self.BOOL = ProtoType(True, "bool") + self.BYTES = ProtoType(True, "bytes") + self.DOUBLE = ProtoType(True, "double") + self.FLOAT = ProtoType(True, "float") + self.FIXED32 = ProtoType(True, "fixed32") + self.FIXED64 = ProtoType(True, "fixed64") + self.INT32 = ProtoType(True, "int32") + self.INT64 = ProtoType(True, "int64") + self.SFIXED32 = ProtoType(True, "sfixed32") + self.SFIXED64 = ProtoType(True, "sfixed64") + self.SINT32 = ProtoType(True, "sint32") + self.SINT64 = ProtoType(True, "sint64") + self.STRING = ProtoType(True, "string") + self.UINT32 = ProtoType(True, "uint32") + self.UINT64 = ProtoType(True, "uint64") + self.ANY = ProtoType(False, "google.protobuf.Any") + self.DURATION = ProtoType(False, "google.protobuf.Duration") + self.TIMESTAMP = ProtoType(False, "google.protobuf.Timestamp") + self.EMPTY = ProtoType(False, "google.protobuf.Empty") + self.STRUCT_MAP = ProtoType(False, "google.protobuf.Struct") + self.STRUCT_VALUE = ProtoType(False, "google.protobuf.Value") + self.STRUCT_NULL = ProtoType(False, "google.protobuf.NullValue") + self.STRUCT_LIST = ProtoType(False, "google.protobuf.ListValue") + self.DOUBLE_VALUE = ProtoType(False, "google.protobuf.DoubleValue") + self.FLOAT_VALUE = ProtoType(False, "google.protobuf.FloatValue") + self.INT64_VALUE = ProtoType(False, "google.protobuf.Int64Value") + self.UINT64_VALUE = ProtoType(False, "google.protobuf.UInt64Value") + self.INT32_VALUE = ProtoType(False, "google.protobuf.Int32Value") + self.UINT32_VALUE = ProtoType(False, "google.protobuf.UInt32Value") + self.BOOL_VALUE = ProtoType(False, "google.protobuf.BoolValue") + self.STRING_VALUE = ProtoType(False, "google.protobuf.StringValue") + self.BYTES_VALUE = ProtoType(False, "google.protobuf.BytesValue") + + self.SCALAR_TYPES_ = [self.BOOL, + self.BYTES, + self.DOUBLE, + self.FLOAT, + self.FIXED32, + self.FIXED64, + self.INT32, + self.INT64, + self.SFIXED32, + self.SFIXED64, + self.SINT32, + self.SINT64, + self.STRING, + self.UINT32, + self.UINT64 + ] + + self.SCALAR_TYPES: dict = dict() + + for a in self.SCALAR_TYPES_: + self.SCALAR_TYPES[a.string] = a + + self.NUMERIC_SCALAR_TYPES: tuple = ( + self.DOUBLE, + self.FLOAT, + self.FIXED32, + self.FIXED64, + self.INT32, + self.INT64, + self.SFIXED32, + self.SFIXED64, + self.SINT32, + self.SINT64, + self.UINT32, + self.UINT64 + ) + + if key_type is None and value_type is None: + self.is_scalar = is_scalar + self.string = string + self.is_map = False + self.key_type = None + self.value_type = None + else: + if key_type.is_scalar() and key_type != self.BYTES and key_type != self.DOUBLE and key_type != self.FLOAT: + self.is_scalar = False + self.string = string + self.is_map = True + self.key_type = key_type # TODO restrict what's allowed here + self.value_type = value_type + else: + # TODO: must be IllegalArgumentException + raise Exception("map key must be non-byte, non-floating point scalar: $key_type") + + @staticmethod + def to_kind(self) -> OptionElement.Kind: + return { + "bool": OptionElement.Kind.BOOLEAN, + "string": OptionElement.Kind.STRING, + "bytes": OptionElement.Kind.STRING, + "double": OptionElement.Kind.STRING, + "float": OptionElement.Kind.STRING, + "fixed32": OptionElement.Kind.STRING, + "fixed64": OptionElement.Kind.STRING, + "int32": OptionElement.Kind.STRING, + "int64": OptionElement.Kind.STRING, + "sfixed32": OptionElement.Kind.STRING, + "sfixed64": OptionElement.Kind.STRING, + "sint32": OptionElement.Kind.STRING, + "sint64": OptionElement.Kind.STRING, + "uint32": OptionElement.Kind.STRING, + "uint64": OptionElement.Kind.NUMBER + }.get(self.simple_name, OptionElement.Kind.ENUM) + + """ Returns the enclosing type, or null if self type is not nested in another type. """ + + @property + def enclosing_type_or_package(self) -> str: + dot = self.string.rfind(".") + return None if (dot == -1) else self.string[:dot] + + """ + Returns a string like "type.googleapis.com/packagename.messagename" or null if self type is + a scalar or a map. Note that self returns a non-null string for enums because it doesn't know + if the named type is a message or an enum. + """ + + @property + def type_url(self) -> str: + return None if self.is_scalar or self.is_map else f"type.googleapis.com/{self.string}" + + def nested_type(self, name: str) -> object: # ProtoType + + check(not self.is_scalar, "scalar cannot have a nested type") + check(not self.is_map, "map cannot have a nested type") + require(name and name.rfind(".") == -1 and len(name) != 0, f"unexpected name: {name}") + + return ProtoType(False, f"{self.string}.{name}") + + def __eq__(self, other): + return type(other) is ProtoType and self.string == other.string + + def __ne__(self, other): + return type(other) is not ProtoType or self.string != other.string + + def to_string(self) -> str: + return self.string + + def hash_code(self) -> int: + return hash(self.string) + + def get(self, enclosing_type_or_package: str, type_name: str) -> object: + return self.get2(f"{enclosing_type_or_package}.{type_name}") if enclosing_type_or_package else self.get2( + type_name) + + def get2(self, name: str) -> object: + scalar = self.SCALAR_TYPES[name] + if scalar: + return scalar + require(name and len(name) != 0 and name.rfind("#") == -1, f"unexpected name: {name}") + if name.startswith("map<") and name.endswith(">"): + comma = name.rfind(",") + require(comma != -1, f"expected ',' in map type: {name}") + key = self.get2(name[4:comma].strip()) + value = self.get2(name[comma + 1:len(name)].strip()) + return ProtoType(False, name, key, value) + return ProtoType(False, name) + + @staticmethod + def get3(key_type: object, value_type: object, name: str): + return ProtoType(False, name, key_type, value_type) diff --git a/karapace/protobuf/protobuf_syntax.py b/karapace/protobuf/protobuf_syntax.py new file mode 100644 index 000000000..b97a1b72d --- /dev/null +++ b/karapace/protobuf/protobuf_syntax.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class ProtobufSyntax(Enum): + pass diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py new file mode 100644 index 000000000..add24b575 --- /dev/null +++ b/karapace/protobuf/service_element.py @@ -0,0 +1,37 @@ +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_indented +from karapace.protobuf.kotlin_wrapper import * +from karapace.protobuf.option_element import OptionsList + + +class ServiceElement: + location: Location + name: str + documentation: str + rpcs: list + options: OptionsList + + def __init__(self, location: Location, name: str, documentation: str, rpcs: list, options: OptionsList + ): + self.location = location + self.name = name + self.documentation = documentation + self.rpcs = rpcs + self.options = options + + def to_schema(self): + result: list = list() + append_documentation(result, self.documentation) + result.append(f"service {self.name} {{") + if self.options: + result.append("\n") + for option in self.options: + append_indented(result, option.to_schema_declaration()) + + if self.rpcs: + result.append('\n') + for rpc in self.rpcs: + append_indented(result, rpc.to_schema()) + + result.append("}\n") + return result diff --git a/karapace/protobuf/syntax.py b/karapace/protobuf/syntax.py new file mode 100644 index 000000000..c43c86449 --- /dev/null +++ b/karapace/protobuf/syntax.py @@ -0,0 +1,6 @@ +from enum import Enum + + +class Syntax(Enum): + PROTO_2 = "proto2" + PROTO_3 = "proto3" diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py new file mode 100644 index 000000000..6e398b7b6 --- /dev/null +++ b/karapace/protobuf/syntax_reader.py @@ -0,0 +1,396 @@ +from karapace.protobuf.location import Location +from karapace.protobuf.exception import IllegalStateException +from karapace.protobuf.exception import ProtobufParserRuntimeException + + +def hex_digit(c: str) -> int: + if ord(c) in range(ord('0'), ord('9')): + return ord(c) - ord('0') + if ord(c) in range(ord('a'), ord('f')): + return ord('a') + 10 + if ord(c) in range(ord('A'), ord('F')): + return ord(c) - ord('A') + 10 + return -1 + + +def min_of(a:int, b:int)->int : + return a if a bool: + return self.pos == len(self.data) + + """ Reads a non-whitespace character """ + + def read_char(self): + char = self.peek_char() + self.pos += 1 + return char + + """ Reads a non-whitespace character 'c' """ + + def require(self, c: str): + self.expect(self.read_char() == c, f"expected '{c}'") + + """ + Peeks a non-whitespace character and returns it. The only difference between this and + [read_char] is that this doesn't consume the char. + """ + + def peek_char(self, ch: str = None): + if ch: + if self.peek_char() == ch: + self.pos += 1 + return True + else: + return False + else: + self.skip_whitespace(True) + self.expect(self.pos < len(self.data), "unexpected end of file") + return self.data[self.pos] + + """ Push back the most recently read character. """ + + def push_back(self, ch: str): + if self.data[self.pos - 1] == ch: + self.pos -= 1 + + """ Reads a quoted or unquoted string and returns it. """ + + def read_string(self) -> str: + self.skip_whitespace(True) + if self.peek_char() in ["\"", "'"]: + return self.read_quoted_string() + + else: + return self.read_word() + + def read_numeric_escape_8_3(self) -> int: + self.pos -= 1 + return self.read_numeric_escape(8, 3) + + def read_quoted_string(self) -> str: + start_quote = self.read_char() + if start_quote != '"' and start_quote != '\'': + raise IllegalStateException(f" quote expected") + + result: list = [] + + while self.pos < len(self.data): + self.pos += 1 + c = self.data[self.pos] + if c == start_quote: + """ Adjacent strings are concatenated. + Consume new quote and continue reading. """ + if self.peek_char() == '"' or self.peek_char() == "'": + start_quote = self.read_char() + continue + return "".join(result) + if c == "\\": + self.expect(self.pos < len(self.data), "unexpected end of file") + self.pos += 1 + c = self.data[self.pos] + c = { + 'a': "\u0007", # Alert. + 'b': "\b", # Backspace. + 'f': "\u000c", # Form feed. + 'n': "\n", # Newline. + 'r': "\r", # Carriage return. + 't': "\t", # Horizontal tab. + 'v': "\u000b", # Vertical tab. + 'x': self.read_numeric_escape(16, 2), + 'X': self.read_numeric_escape(16, 2), + '0': self.read_numeric_escape_8_3(), + '1': self.read_numeric_escape_8_3(), + '2': self.read_numeric_escape_8_3(), + '3': self.read_numeric_escape_8_3(), + '4': self.read_numeric_escape_8_3(), + '5': self.read_numeric_escape_8_3(), + '6': self.read_numeric_escape_8_3(), + '7': self.read_numeric_escape_8_3() + }.get(c) + + result.append(c) + if c == "\n": + self.newline() + + self.unexpected("unterminated string") + + def read_numeric_escape(self, radix: int, length: int) -> int: + value = -1 + end_pos = min_of(self.pos + length, len(self.data)) + while self.pos < end_pos: + digit = hex_digit(self.data[self.pos]) + if digit == -1 or digit >= radix: + break + + if value < 0: + value = digit + else: + value = value * radix + digit + self.pos += 1 + + self.expect(value >= 0, "expected a digit after \\x or \\X") + return chr(value) + + """ Reads a (paren-wrapped), [square-wrapped] or naked symbol name. """ + + def read_name(self) -> str: + c = self.peek_char() + if c == '(': + self.pos += 1 + result = self.read_word() + self.expect(self.read_char() == ')', "expected ')'") + return result + if c == '[': + self.pos += 1 + result = self.read_word() + self.expect(self.read_char() == ']', "expected ']'") + return result + return self.read_word() + + """ Reads a scalar, map, or type name. """ + + def read_data_type(self) -> str: + name = self.read_word() + return self.read_self.data_type(name) + + """ Reads a scalar, map, or type name with `name` as a prefix word. """ + + def read_data_type(self, name: str) -> str: + if name == "map": + + self.expect(self.read_char() == '<', "expected '<'") + key_type = self.read_self.data_type() + + self.expect(self.read_char() == ',', "expected ','") + value_type = self.read_self.data_type() + + self.expect(self.read_char() == '>', "expected '>'") + return f"map<{key_type}, {value_type}>" + else: + return name + + """ Reads a non-empty word and returns it. """ + + def read_word(self) -> str: + self.skip_whitespace(True) + start = self.pos + while self.pos < len(self.data): + c = self.data[self.pos] + if c in range('a', 'z') or c in range('A', 'Z') or c in range('0', '9') or c in ['_', '-', '.']: + self.pos += 1 + else: + break + self.expect(start < self.pos, "expected a word") + return self.data[start:self.pos - start].decode() + + """ Reads an integer and returns it. """ + + def read_int(self) -> int: + tag: str = self.read_word() + try: + radix = 10 + if tag.startswith("0x") or tag.startswith("0X"): + tag = tag[len("0x"):] + radix = 16 + return int(tag, radix) + except: + self.unexpected(f"expected an integer but was {tag}") + + """ Like skip_whitespace(), but this returns a string containing all comment text. By convention, + comments before a declaration document that declaration. """ + + def read_documentation(self) -> str: + result: str = None + while True: + self.skip_whitespace(False) + if self.pos == len(self.data) or self.data[self.pos] != '/': + if result: + return result + else: + "" + comment = self.read_comment() + if result: + result = f"{result}\n{comment}" + else: + result = "$result\n$comment" + + """ Reads a comment and returns its body. """ + + def read_comment(self) -> str: + if self.pos == len(self.data) or self.data[self.pos] != '/': + raise IllegalStateException() + + self.pos += 1 + tval = -1 + if self.pos < len(self.data): + self.pos += 1 + tval = int(self.data[self.pos]) + + if tval == int('*'): + result: list + start_of_line = True + while self.pos + 1 < len(self.data): + c: str = self.data[self.pos] + + if c == '*' and self.data[self.pos + 1] == '/': + self.pos += 2 + return "".join(result).strip() + + if c == "\n": + result.append("\n") + self.newline() + start_of_line = True + + if not start_of_line: + result.append(c) + + if c == "*": + if self.data[self.pos + 1] == ' ': + self.pos += 1 # Skip a single leading space, if present. + start_of_line = False + if not c.isspace(): + result.append(c) + start_of_line = False + self.pos += 1 + self.unexpected("unterminated comment") + + if tval == int('/'): + if self.pos < len(self.data) and self.data[self.pos] == ' ': + self.pos += 1 # Skip a single leading space, if present. + start = self.pos + while self.pos < len(self.data): + self.pos += 1 + c = self.data[self.pos] + if c == "\n": + self.newline() + break + return self.data[start:self.pos - 1 - start].decode() + self.unexpected("unexpected '/'") + + def try_append_trailing_documentation(self, documentation: str) -> str: + """ Search for a '/' character ignoring spaces and tabs.""" + while self.pos < len(self.data): + if self.data[self.pos] in [' ', "\t"]: + self.pos += 1 + + if self.data[self.pos] == '/': + self.pos += 1 + break + """ Not a whitespace or comment-starting character. Return original documentation. """ + return documentation + bval = (self.pos < len(self.data) and (self.data[self.pos] == '/' or self.data[self.pos] == '*')) + # Backtrack to start of comment. + if not bval: self.pos -= 1 + self.expect(bval, "expected '//' or '/*'") + is_star = self.data[self.pos] == '*' + + self.pos += 1 + + # Skip a single leading space, if present. + if self.pos < len(self.data) and self.data[self.pos] == ' ': + self.pos += 1 + + start = self.pos + end: int + + if is_star: + """ Consume star comment until it closes on the same line.""" + while True: + self.expect(self.pos < len(self.data), "trailing comment must be closed") + if self.data[self.pos] == '*' and self.pos + 1 < len(self.data) and self.data[self.pos + 1] == '/': + end = self.pos - 1 # The character before '*'. + self.pos += 2 # Skip to the character after '/'. + break + self.pos += 1 + + """ Ensure nothing follows a trailing star comment.""" + while self.pos < len(self.data): + self.pos += 1 + c = self.data[self.pos] + if c == "\n": + self.newline() + break + + self.expect(c == " " or c == "\t", "no syntax may follow trailing comment") + + else: + """ Consume comment until newline. """ + while True: + if self.pos == len(self.data): + end = self.pos - 1 + break + self.pos += 1 + c = self.data[self.pos] + if c == "\n": + self.newline() + end = self.pos - 2 # Account for stepping past the newline. + break + + """ Remove trailing whitespace.""" + while end > start and (self.data[end] == " " or self.data[end] == "\t"): + end -= 1 + + if end == start: + return documentation + + trailing_documentation = self.data[start:end - start + 1] + if not documentation.strip: + return trailing_documentation + return f"{documentation}\n{trailing_documentation}" + + """ + Skips whitespace characters and optionally comments. When this returns, either + self.pos == self.data.length or a non-whitespace character. + """ + + def skip_whitespace(self, skip_comments: bool): + while self.pos < len(self.data): + c = self.data[self.pos] + if c == " " or c == "\t" or c == "\r" or c == "\n": + self.pos += 1 + if c == "\n": + self.newline() + if skip_comments and c == "/": + self.read_comment() + + return + + """ Call this every time a '\n' is encountered. """ + + def newline(self): + self.line += 1 + self.line_start = self.pos + + def location(self) -> Location: + return self.location.at(self.line + 1, self.pos - self.line_start + 1) + + def expect(self, condition: bool, message: str): + location = self.location() + if not condition: + self.unexpected(message, location) + + def expect_with_location(self, condition: bool, location: Location, message: str): + if not condition: + self.unexpected(message, location) + + def unexpected(self, message: str, location: Location = None): + if not location: + location = self.location() + raise ProtobufParserRuntimeException(f"Syntax error in {location.to_string()}: {message}") diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 2cf297248..245672259 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -1,3 +1,134 @@ +import builtins + + def protobuf_encode(a: str) -> str: # TODO: PROTOBUF return a + + +def append_documentation(data: list, documentation: str): + if not documentation: + return + documentation.split() + lines: list = documentation.split("\n") + + if len(lines) > 1 and lines[-1]: + lines = lines.pop() + + for line in lines: + data.append("# ") + data.append(line) + data.append("\n") + + +def append_options(data: list, options: list): + count = len(options) + if count == 1: + data.append('[') + data.append(options[0].to_schema()) + data.append(']') + return + + data.append("[\n") + for i in range(0, count): + if i < count - 1: + endl = "," + else: + endl = "" + append_indented(data, options[i].to_schema() + endl) + data.append(']') + + +def append_indented(data: list, value: str): + lines = value.split("\n") + if len(lines) > 1 and not lines[-1]: + lines = lines.pop() + + for line in lines: + data.append(" ") + data.append(line) + data.append("\n") + + +MIN_TAG_VALUE = 1 +MAX_TAG_VALUE = ((1 << 29) & 0xffffffffffffffff) - 1 # 536,870,911 + +RESERVED_TAG_VALUE_START = 19000 +RESERVED_TAG_VALUE_END = 19999 + +""" True if the supplied value is in the valid tag range and not reserved. """ + + +class MyInt(int): + def is_valid_tag(self) -> bool: + return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or ( + RESERVED_TAG_VALUE_END + 1 <= self <= MAX_TAG_VALUE + 1) + + +builtins.int = MyInt + +# TODO: remove following text if not implemented + +""" internal expect fun Char.isDigit(): bool + +internal expect fun str.toEnglishLowerCase(): str + +expect interface MutableQueue : MutableCollection { + fun poll(): T? +} + +internal expect fun mutableQueueOf(): MutableQueue + +# TODO internal and friend for wire-compiler: https:#youtrack.jetbrains.com/issue/KT-34102 + + * Replace types in this schema which are present in [typesToStub] with empty shells that have no + * outward references. This has to be done in this module so that we can access the internal + * constructor to avoid re-linking. + +fun Schema.withStubs(typesToStub: Set): Schema { + if (typesToStub.isEmpty(): + return this + } + return Schema(protoFiles.map { protoFile -> + protoFile.copy( + types = protoFile.types.map { type -> + if (type.type in typesToStub) type.asStub() else type + }, + services = protoFile.services.map { service -> + if (service.type in typesToStub) service.asStub() else service + } + ) + }) +} + + Return a copy of this type with all possible type references removed. +private fun Type.asStub(): Type = when { + # Don't stub the built-in protobuf types which model concepts like options. + type.tostr().startsWith("google.protobuf.") -> this + + this is MessageType -> copy( + declaredFields = emptyList(), + extensionFields = mutableListOf(), + nestedTypes = nestedTypes.map { it.asStub() }, + options = Options(Options.MESSAGE_OPTIONS, emptyList()) + ) + + this is EnumType -> copy( + constants = emptyList(), + options = Options(Options.ENUM_OPTIONS, emptyList()) + ) + + this is EnclosingType -> copy( + nestedTypes = nestedTypes.map { it.asStub() } + ) + + else -> throw AssertionError("Unknown type $type") +} + + Return a copy of this service with all possible type references removed. +private fun Service.asStub() = copy( + rpcs = emptyList(), + options = Options(Options.SERVICE_OPTIONS, emptyList()) +) + +""" From da4fa3be214fc3929b7e15c0fda87415508cd628 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 30 Apr 2021 15:06:30 +0300 Subject: [PATCH 010/168] beta version of prot_parser class (no dependencies) --- karapace/protobuf/enum_constant_element.py | 37 ++ karapace/protobuf/enum_element.py | 41 ++ karapace/protobuf/extensions_element.py | 40 ++ karapace/protobuf/field_element.py | 4 +- karapace/protobuf/group_element.py | 38 ++ karapace/protobuf/kotlin_wrapper.py | 23 +- karapace/protobuf/location.py | 2 +- karapace/protobuf/message_element.py | 76 +++ karapace/protobuf/one_of_element.py | 35 ++ karapace/protobuf/option_element.py | 17 +- karapace/protobuf/option_reader.py | 5 +- karapace/protobuf/proto_parser.py | 608 ++++++++++++++++++--- karapace/protobuf/proto_type.py | 168 +++--- karapace/protobuf/reserved_document.py | 37 ++ karapace/protobuf/rpc_element.py | 53 ++ karapace/protobuf/service_element.py | 6 +- karapace/protobuf/syntax_reader.py | 72 ++- karapace/protobuf/type_element.py | 12 + 18 files changed, 1050 insertions(+), 224 deletions(-) create mode 100644 karapace/protobuf/enum_constant_element.py create mode 100644 karapace/protobuf/enum_element.py create mode 100644 karapace/protobuf/extensions_element.py create mode 100644 karapace/protobuf/group_element.py create mode 100644 karapace/protobuf/message_element.py create mode 100644 karapace/protobuf/one_of_element.py create mode 100644 karapace/protobuf/reserved_document.py create mode 100644 karapace/protobuf/rpc_element.py create mode 100644 karapace/protobuf/type_element.py diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py new file mode 100644 index 000000000..826581577 --- /dev/null +++ b/karapace/protobuf/enum_constant_element.py @@ -0,0 +1,37 @@ +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_options + + +class EnumConstantElement: + location: Location + name: str + tag: int + documentation: str + options: list = list() + + def __init__(self, + location: Location, + name: str, + tag: int, + documentation: str, + options: list, + ): + self.location = location + self.name = name + + self.tag = tag + self.options = options + if not documentation: + self.documentation = "" + else: + self.documentation = documentation + + def to_schema(self) -> str: + result: list = list() + append_documentation(result, self.documentation) + result.append(f"{self.name} = {self.tag}") + if self.options and len(self.options): + result.append(" ") + append_options(result, self.options) + result.append(";\n") + return "".join(result) diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py new file mode 100644 index 000000000..42cbce9c6 --- /dev/null +++ b/karapace/protobuf/enum_element.py @@ -0,0 +1,41 @@ +from karapace.protobuf.type_element import TypeElement +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_indented + + +class EnumElement(TypeElement): + constants: list = list() + + def __init__(self, + location: Location, + name: str, + documentation: str, + options: list, + constants: list + ): + self.location = location + self.name = name + self.documentation = documentation + self.options = options + self.constants = constants + # Enums do not allow nested type declarations. + self.nested_types = list() + + def to_schema(self) -> str: + result: list = list() + append_documentation(result, self.documentation) + result.append(f"enum {self.name} {{") + + if self.options and len(self.options) or self.constants and len(self.constants): + result.append("\n") + + if self.options and len(self.options): + for option in self.options: + append_indented(result, option.to_schema_declaration()) + + if self.constants and len(self.constants): + for constant in self.constants: + append_indented(result, constant.to_schema()) + + result.append("}\n") + return "".join(result) diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py new file mode 100644 index 000000000..c7be3e7bb --- /dev/null +++ b/karapace/protobuf/extensions_element.py @@ -0,0 +1,40 @@ +from karapace.protobuf.kotlin_wrapper import IntRange +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, MAX_TAG_VALUE + + +class ExtensionsElement: + location: Location + documentation: str = "" + """ An [Int] or [IntRange] tag. """ + values: list + + def __init__(self, location: Location, documentation: str, values: list): + self.location = location + self.documentation = documentation + self.values = values + + def to_schema(self) -> str: + result: list = [] + append_documentation(result, self.documentation) + result.append("extensions ") + + for index in range(len(self.values)): + value = self.values[index] + if index > 0: + result.append(", ") + if value is int: + result.append(value) + # TODO: maybe replace Kotlin IntRange by list? + elif value is IntRange: + result.append(f"{value[0]} to ") + last_value = value[len(value) - 1] + if last_value < MAX_TAG_VALUE: + result.append(last_value) + else: + result.append("max") + else: + raise AssertionError() + + result.append(";\n") + return "".join(result) diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 20d4687fb..b0f5a5f60 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -1,3 +1,5 @@ +from typing import Union + from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement @@ -16,7 +18,7 @@ class FieldElement: documentation: str = "", options: list = list() - def __init__(self, location: Location, label: Field.Label, element_type: str, + def __init__(self, location: Location, label: Union[None, Field.Label], element_type: str, name: str, default_value: str, json_name: str, tag: int, documentation: str, options: list): self.location = location diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py new file mode 100644 index 000000000..7cedae517 --- /dev/null +++ b/karapace/protobuf/group_element.py @@ -0,0 +1,38 @@ +from typing import Union + +from karapace.protobuf.field import Field +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_indented + + +class GroupElement: + label: Field.Label + location: Location + name: str + tag: int + documentation: str = "" + fields: list = list() + + def __init__(self, label: Union[None, Field.Label], location: Location, name: str, tag: int, documentation: str, + fields: list): + self.label = label + self.location = location + self.name = name + self.tag = tag + self.documentation = documentation + self.fields = fields + + def to_schema(self) -> str: + result: list = [] + append_documentation(result, self.documentation) + + # TODO: compare lower() to lowercase() and toLowerCase(Locale.US) Kotlin + if self.label: + result.append(f"{str(self.label.name).lower()} ") + result.append(f"group {self.name} = {self.tag} {{") + if self.fields and len(self.fields): + result.append("\n") + for field in self.fields: + append_indented(result, field.to_schema()) + result.append("}\n") + return "".join(result) diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index 4529a9c67..db2c97d43 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -1,5 +1,3 @@ - - def check(q: bool, message: str): if not q: raise IllegalStateException(message) @@ -10,12 +8,21 @@ def require(q: bool, message: str): raise IllegalArgumentException(message) +def options_to_list(a: list) -> list: + # TODO + return a + + class IllegalStateException(Exception): def __init__(self, message="IllegalStateException"): self.message = message super().__init__(self.message) +class IntRange(list): + pass + + class IllegalArgumentException(Exception): def __init__(self, message="IllegalArgumentException"): self.message = message @@ -45,3 +52,15 @@ def append_indented(self: list, value: str): class OptionsList(list): pass + + +class KotlinRange: + min: object + max: object + + def __init__(self, min, max): + self.min = min + self.max = max + + def __str__(self) -> str: + return f"{self.min}..{self.max}" diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index 83703a864..e2efc3f38 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -28,7 +28,7 @@ def with_path_only(self): """ Returns a copy of this location including only its path. """ return Location("", self.path, -1, -1) - def to_string(self) -> str: + def __str__(self) -> str: result = "" if self.base: result += self.base + "/" diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py new file mode 100644 index 000000000..eb7701a77 --- /dev/null +++ b/karapace/protobuf/message_element.py @@ -0,0 +1,76 @@ +from karapace.protobuf.type_element import TypeElement +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_indented + + +class MessageElement(TypeElement): + reserveds: list = [] + fields: list = [] + one_ofs: list = [] + extensions: list = [] + groups: list = [] + + def __init__(self, + location: Location, + name: str, + documentation: str, + nested_types: list, + options: list, + reserveds: list, + fields: list, + one_ofs: list, + extensions: list, + groups: list, + ): + self.location = location + self.name = name + self.documentation = documentation + self.nested_types = nested_types + self.options = options + self.reserveds = reserveds + self.fields = fields + self.one_ofs = one_ofs + self.extensions = extensions + self.groups = groups + + def to_schema(self) -> str: + result: list = list() + append_documentation(result, self.documentation) + result.append(f"message {self.name} {{") + if self.reserveds and len(self.reserveds): + result.append("\n") + for reserved in self.reserveds: + append_indented(result, reserved.to_schema()) + + if self.options and len(self.options): + result.append("\n") + for option in self.options: + append_indented(result, option.to_schema_declaration()) + + if self.fields and len(self.fields): + for field in self.fields: + result.append("\n") + append_indented(result, field.to_schema()) + + if self.one_ofs and len(self.one_ofs): + for one_of in self.one_ofs: + result.append("\n") + append_indented(result, one_of.to_schema()) + + if self.groups and len(self.groups): + for group in self.groups: + result.append("\n") + append_indented(result, group.to_schema()) + + if self.extensions and len(self.extensions): + result.append("\n") + for extension in self.extensions: + append_indented(result, extension.to_schema()) + + if self.nested_types and len(self.nested_types): + result.append("\n") + for nested_type in self.nested_types: + append_indented(result, nested_type.to_schema()) + + result.append("}\n") + return "".join(result) diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py new file mode 100644 index 000000000..efee5f887 --- /dev/null +++ b/karapace/protobuf/one_of_element.py @@ -0,0 +1,35 @@ +from karapace.protobuf.utils import append_documentation, append_indented + + +class OneOfElement: + name: str + documentation: str = "" + fields: list = list() + groups: list = list() + options: list = list() + + def __init__(self, name: str, documentation: str, fields: list, groups: list, options: list): + self.name = name + self.documentation = documentation + self.fields = fields + self.groups = groups + self.options = options + + def to_schema(self) -> str: + result: list = list() + append_documentation(result, self.documentation) + result.append(f"oneof {self.name} {{") + if self.options and len(self.options): + for option in self.options: + append_indented(result, option.to_schema_declaration()) + + if self.fields and len(self.fields): + result.append("\n") + for field in self.fields: + append_indented(result, field.to_schema()) + if self.groups and len(self.groups): + result.append("\n") + for group in self.groups: + append_indented(result, group.to_schema()) + result.append("}\n") + return "".join(result) diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 72f17c5f7..764ee425e 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -3,10 +3,6 @@ from karapace.protobuf.utils import append_indented -class OptionsList(list): - pass - - class ListOptionElement(list): pass @@ -40,12 +36,12 @@ def to_schema(self) -> str: self.kind in [self.Kind.BOOLEAN, self.Kind.NUMBER, self.Kind.ENUM]: f"{self.formattedName} = {self.value}", self.kind == self.Kind.OPTION: f"{self.formattedName}.{self.value.to_schema()}", self.kind == self.Kind.MAP: list([f"{self.formattedName} = {{\n", - self.format_option_map(self.value), - "}" - ]), + self.format_option_map(self.value), + "}" + ]), self.kind == self.Kind.LIST: list([f"{self.formattedName} = ", - self.append_options(self.value) - ]) + self.append_options(self.value) + ]) }[True] if type(aline) is list: return "".join(aline) @@ -55,7 +51,8 @@ def to_schema(self) -> str: def to_schema_declaration(self): return f"option {self.to_schema()};\n" - def append_options(self, options: list): + @staticmethod + def append_options(options: list): data: list = list() count = len(options) if count == 1: diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index f03103761..53988fcb5 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -1,3 +1,5 @@ +from typing import Union + from karapace.protobuf.syntax_reader import SyntaxReader from karapace.protobuf.option_element import OptionElement @@ -129,7 +131,8 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) """ Adds an object or objects to a List. """ - def add_to_list(self, _list: list, value: object): + @staticmethod + def add_to_list(_list: list, value: Union[list, str]): if type(value) is list: for v in list(value): _list.append(v) diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 1c70f610d..5cc8f7923 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -1,45 +1,68 @@ from builtins import str - from enum import Enum +from typing import Union -from typing import List, Any, Union - -from io import StringIO - +from karapace.protobuf.enum_constant_element import EnumConstantElement +from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.exception import error +from karapace.protobuf.extend_element import ExtendElement +from karapace.protobuf.extensions_element import ExtensionsElement +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.group_element import GroupElement +from karapace.protobuf.kotlin_wrapper import options_to_list, KotlinRange from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.one_of_element import OneOfElement +from karapace.protobuf.option_element import OptionElement from karapace.protobuf.option_reader import OptionReader from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.reserved_document import ReservedElement +from karapace.protobuf.rpc_element import RpcElement +from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax from karapace.protobuf.syntax_reader import SyntaxReader -from karapace.protobuf.service_element import ServiceElement -from karapace.protobuf.exception import error -from karapace.protobuf.option_element import OptionElement -from karapace.protobuf.extend_element import ExtendElement +from karapace.protobuf.type_element import TypeElement +from karapace.protobuf.utils import MAX_TAG_VALUE -from enum import Enum +class Context(Enum): + FILE = 1 + MESSAGE = 2 + ENUM = 3 + RPC = 4 + EXTEND = 5 + SERVICE = 6 + def permits_package(self) -> bool: + return self == Context.FILE + def permits_syntax(self) -> bool: + return self == Context.FILE -class TypeElement : - location: Location - name: str - documentation: str - options: list - nested_types: list - def to_schema(self): - pass + def permits_import(self) -> bool: + return self == Context.FILE + def permits_extensions(self) -> bool: + return self == Context.MESSAGE -class SyntaxReader: - pass + def permits_rpc(self) -> bool: + return self == Context.SERVICE + def permits_one_of(self) -> bool: + return self == Context.MESSAGE -class Context : - FILE = "file" + def permits_message(self) -> bool: + return self == Context.FILE or self == Context.MESSAGE + def permits_service(self) -> bool: + return self == Context.FILE or self == Context.MESSAGE + def permits_enum(self) -> bool: + return self == Context.FILE or self == Context.MESSAGE + def permits_extend(self) -> bool: + return self == Context.FILE or self == Context.MESSAGE class ProtoParser: @@ -51,50 +74,48 @@ class ProtoParser: services: list extends_list: list options: list - declaration_count: int = 0 + declaration_count: int syntax: Syntax = None package_name: str = None prefix: str = "" - def __int__(self, location: Location, data:str): + def __int__(self, location: Location, data: str): self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: while True: - documentation = self.reader.read_documentation() + documentation = self.reader.read_documentation() if self.reader.exhausted(): return ProtoFileElement(self.location, self.package_name, self.syntax, self.imports, self.public_imports, self.nested_types, self.services, self.extends_list, self.options) declaration = self.read_declaration(documentation, Context.FILE) - if type(declaration) is TypeElement : + if type(declaration) is TypeElement: # TODO: must add check for execption duplicate = next((x for x in iter(self.nested_types) if x.name == declaration.name), None) - if duplicate : + if duplicate: error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") self.nested_types.append(declaration) - if type(declaration) is ServiceElement : + if type(declaration) is ServiceElement: duplicate = next((x for x in iter(self.services) if x.name == declaration.name), None) - if duplicate : + if duplicate: error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") self.services.append(declaration) if type(declaration) is OptionElement: - self.options.append(declaration) + self.options.append(declaration) - if type(declaration) is ExtendElement : + if type(declaration) is ExtendElement: self.extends_list.append(declaration) - - def read_declaration(self, documentation: str, context: Context): self.declaration_count += 1 index = self.declaration_count # Skip unnecessary semicolons, occasionally used after a nested message declaration. - if self.reader.peek_char(';') : return None - + if self.reader.peek_char(';'): + return None location = self.reader.location() label = self.reader.read_word() @@ -103,98 +124,505 @@ def read_declaration(self, documentation: str, context: Context): # are named after any of the label we check here. result = None - if label == "package" and context.permits_package() : - self.package_name = self.reader.readName() + if label == "package" and context.permits_package(): + self.package_name = self.reader.read_name() self.prefix = f"{self.package_name}." self.reader.require(';') return result - elif label == "import" and context.permits_import() : + elif label == "import" and context.permits_import(): import_string = self.reader.read_string() - if import_string == "public" : + if import_string == "public": self.public_imports.append(self.reader.read_string()) - - else : + + else: self.imports.append(import_string) self.reader.require(';') return result - elif label == "syntax" and context.permits_syntax() : - self.reader.expect(self.syntax == None, location, "too many syntax definitions" ) - self.reader.require('=') - self.reader.expect(index == 0, location ,"'syntax' element must be the first declaration in a file") + elif label == "syntax" and context.permits_syntax(): + self.reader.expect_with_location(not self.syntax, location, "too many syntax definitions") + self.reader.require("=") + self.reader.expect_with_location(index == 0, location, + "'syntax' element must be the first declaration in a file") syntax_string = self.reader.read_quoted_string() - try : - syntax = Syntax(syntax_string) + try: + Syntax(syntax_string) except Exception as e: # TODO: } catch (e: IllegalArgumentException) { ??? self.reader.unexpected(str(e), location) - self.reader.require(';') + self.reader.require(";") return result - elif label == "option" : - result = OptionReader(self.reader).read_option('=') - self.reader.require(';') + elif label == "option": + result = OptionReader(self.reader).read_option("=") + self.reader.require(";") return result - elif label == "reserved" : + elif label == "reserved": return self.read_reserved(location, documentation) - elif label == "message" and context.permits_message() : + elif label == "message" and context.permits_message(): return self.read_message(location, documentation) - elif label == "enum" and context.permits_enum() : + elif label == "enum" and context.permits_enum(): return self.read_enum_element(location, documentation) - elif label == "service" and context.permits_service() : + elif label == "service" and context.permits_service(): return self.read_service(location, documentation) - elif label == "extend" and context.permits_extend() : + elif label == "extend" and context.permits_extend(): return self.read_extend(location, documentation) - elif label == "rpc" and context.permits_rpc() : - return self.seread_rpc(location, documentation) - elif label == "oneof" and context.permits_one_of() : + elif label == "rpc" and context.permits_rpc(): + return self.read_rpc(location, documentation) + elif label == "oneof" and context.permits_one_of(): return self.read_one_of(documentation) - elif label == "extensions" and context.permits_extensions() : + elif label == "extensions" and context.permits_extensions(): return self.read_extensions(location, documentation) - elif context == Context.MESSAGE or context == Context.EXTEND : + elif context == Context.MESSAGE or context == Context.EXTEND: return self.read_field(documentation, location, label) - elif context == Context.ENUM : + elif context == Context.ENUM: return self.read_enum_constant(documentation, location, label) - else : + else: self.reader.unexpected("unexpected label: $label", location) - """ Reads a message declaration. """ - def read_message( self, location: Location, documentation: String )-> MessageElement : - name :str = self.reader.readName() - fields:list = list() - one_ofs:list = list() - nestedTypes:list = list() - extensions :list = list() - options :list = list() - reserveds :list = list() - groups :list = list() - - previousPrefix = self.prefix + + def read_message(self, location: Location, documentation: str) -> MessageElement: + name: str = self.reader.read_name() + fields: list = list() + one_ofs: list = list() + nested_types: list = list() + extensions: list = list() + options: list = list() + reserveds: list = list() + groups: list = list() + + previous_prefix = self.prefix self.prefix = f"{self.prefix}{name}." - self.reader.require('{') - while True : + self.reader.require("{") + while True: nested_documentation = self.reader.read_documentation() - if self.reader.peek_char('}'): + if self.reader.peek_char("}"): break - - declared = self.read_declaration(nested_documentation, Context.MESSAGE) : + declared = self.read_declaration(nested_documentation, Context.MESSAGE) type_declared = type(declared) - if type_declared is FieldElement : + if type_declared is FieldElement: fields.append(declared) - elif type_declared is OneOfElement : + elif type_declared is OneOfElement: one_ofs.append(declared) - elif type_declared is GroupElement : + elif type_declared is GroupElement: groups.append(declared) - elif type_declared is TypeElement : - nestedTypes.append(declared) - elif type_declared is ExtensionsElement : + elif type_declared is TypeElement: + nested_types.append(declared) + elif type_declared is ExtensionsElement: extensions.append(declared) - elif type_declared is OptionElement : + elif type_declared is OptionElement: options.append(declared) # Extend declarations always add in a global scope regardless of nesting. - elif type_declared is ExtendElement : + elif type_declared is ExtendElement: self.extends_list.append(declared) - elif type_declared is ReservedElement : + elif type_declared is ReservedElement: reserveds.append(declared) - \ No newline at end of file + + self.prefix = previous_prefix + + return MessageElement( + location, + name, + documentation, + nested_types, + options, + reserveds, + fields, + one_ofs, + extensions, + groups, + ) + + """ Reads an extend declaration. """ + + def read_extend(self, location: Location, documentation: str) -> ExtendElement: + name = self.reader.read_name() + fields: list = [] + self.reader.require("{") + while True: + nested_documentation = self.reader.read_documentation() + if self.reader.peek_char("}"): + break + + declared = self.read_declaration(nested_documentation, Context.EXTEND) + if declared is FieldElement: + fields.append(declared) + # TODO: add else clause to catch unexpected declarations. + else: + pass + + return ExtendElement( + location, + name, + documentation, + fields, + ) + + """ Reads a service declaration and returns it. """ + + def read_service(self, location: Location, documentation: str) -> ServiceElement: + name = self.reader.read_name() + rpcs = list() + options: list = list() + self.reader.require('{') + while True: + rpc_documentation = self.reader.read_documentation() + if self.reader.peek_char("}"): + break + declared = self.read_declaration(rpc_documentation, Context.SERVICE) + if declared is RpcElement: + rpcs.append(declared) + elif declared is OptionElement: + options.append(declared) + # TODO: add else clause to catch unexpected declarations. + else: + pass + + return ServiceElement( + location, + name, + documentation, + rpcs, + options, + ) + + """ Reads an enumerated atype declaration and returns it. """ + + def read_enum_element(self, location: Location, documentation: str) -> EnumElement: + name = self.reader.read_name() + constants: list = list() + options: list = list() + declared = None + self.reader.require("{") + while True: + value_documentation = self.reader.read_documentation() + if self.reader.peek_char("}"): + break + declared = self.read_declaration(value_documentation, Context.ENUM) + + if declared is EnumConstantElement: + constants.append(declared) + elif declared is OptionElement: + options.append(declared) + # TODO: add else clause to catch unexpected declarations. + else: + pass + + return EnumElement(location, name, documentation, options, constants) + + def read_field(self, documentation: str, location: Location, word: str): + label: Union[None, Field.Label] + atype: str + if word == "required": + self.reader.expect_with_location(self.syntax != Syntax.PROTO_3, location, + "'required' label forbidden in proto3 field declarations") + label = Field.Label.REQUIRED + atype = self.reader.read_data_type() + elif word == "optional": + label = Field.Label.OPTIONAL + atype = self.reader.read_data_type() + + elif word == "repeated": + label = Field.Label.REPEATED + atype = self.reader.read_data_type() + else: + self.reader.expect_with_location(self.syntax == Syntax.PROTO_3 or + (word == "map" and self.reader.peek_char() == "<"), + location, + f"unexpected label: {word}") + + label = None + atype = self.reader.read_data_type_by_name(word) + + self.reader.expect_with_location(not atype.startswith("map<") or not label, + location, + "'map' atype cannot have label" + ) + if atype == "group ": + return self.read_group(location, documentation, label) + else: + return self.read_field_with_label(location, documentation, label, atype) + + """ Reads an field declaration and returns it. """ + + def read_field_with_label(self, + location: Location, + documentation: str, + label: Union[None, Field.Label], + atype: str + ) -> FieldElement: + + name = self.reader.read_name() + self.reader.require('=') + tag = self.reader.read_int() + + # Mutable copy to extract the default value, and add packed if necessary. + options: list = OptionReader(self.reader).read_options() + + default_value = self.strip_default(options) + json_name = self.strip_json_name(options) + self.reader.require(';') + + documentation = self.reader.try_append_trailing_documentation(documentation) + + return FieldElement( + location, + label, + atype, + name, + default_value, + json_name, + tag, + documentation, + options_to_list(options), + ) + + """ Defaults aren't options. """ + + def strip_default(self, options: list) -> str: + return self.strip_value("default", options) + + """ `json_name` isn't an option. """ + + def strip_json_name(self, options: list) -> str: + return self.strip_value("json_name", options) + + """ + This finds an option named [name], removes, and returns it. + Returns None if no [name] option is present. + """ + + @staticmethod + def strip_value(name: str, options: list) -> str: + result: Union[None, str] = None + for element in options[:]: + if element.name == name: + options.remove(element) + result = str(element.value) + return result + + def read_one_of(self, documentation: str) -> OneOfElement: + name: str = self.reader.read_name() + fields: list = list() + groups: list = list() + options: list = list() + + self.reader.require("{") + while True: + nested_documentation = self.reader.read_documentation() + if self.reader.peek_char("}"): + break + + location = self.reader.location() + atype = self.reader.read_data_type() + if atype == "group": + groups.append(self.read_group(location, nested_documentation, None)) + elif atype == "option": + options.append(OptionReader(self.reader).read_option("=")) + self.reader.require(";") + else: + fields.append(self.read_field_with_label(location, nested_documentation, None, atype)) + + return OneOfElement( + name, + documentation, + fields, + groups, + options, + ) + + def read_group( + self, + location: Location, + documentation: str, + label: Union[None, Field.Label], + ) -> GroupElement: + name = self.reader.read_word() + self.reader.require("=") + tag = self.reader.read_int() + fields: list = list() + self.reader.require("{") + + while True: + nested_documentation = self.reader.read_documentation() + if self.reader.peek_char("}"): + break + + field_location = self.reader.location() + field_label = self.reader.read_word() + field = self.read_field(nested_documentation, field_location, field_label) + if field is FieldElement: + fields.append(field) + else: + self.reader.unexpected(f"expected field declaration, was {field}") + + return GroupElement( + label, + location, + name, + tag, + documentation, + fields + ) + + """ Reads a reserved tags and names list like "reserved 10, 12 to 14, 'foo';". """ + + def read_reserved(self, location: Location, documentation: str) -> ReservedElement: + values: list = list() + while True: + ch = self.reader.peek_char() + if ch in ["\"", "'"]: + values.append(self.reader.read_quoted_string()) + else: + tag_start = self.reader.read_int() + ch = self.reader.peek_char() + if ch in [",", ";"]: + values.append(tag_start) + else: + self.reader.expect_with_location(self.reader.read_word() == "to", location, + "expected ',', ';', or 'to'") + tag_end = self.reader.read_int() + values.append(KotlinRange(tag_start, tag_end)) + + ch = self.reader.read_char() + if ch == ";": + break + elif ch == ",": + continue + else: + self.reader.unexpected("expected ',' or ';'") + a = False + if values and len(values): + a = True + + self.reader.expect_with_location(a, location, + "'reserved' must have at least one field name or tag") + my_documentation = self.reader.try_append_trailing_documentation(documentation) + + return ReservedElement( + location, + my_documentation, + values + ) + + """ Reads extensions like "extensions 101;" or "extensions 101 to max;". """ + + def read_extensions(self, + location: Location, + documentation: str + ) -> ExtensionsElement: + values: list = list() + while True: + start: int = self.reader.read_int() + ch = self.reader.peek_char() + end: int + if ch in [",", ";"]: + values.append(start) + else: + self.reader.expect_with_location(self.reader.read_word() == "to", location, "expected ',', ';' or 'to'") + s = self.reader.read_word() + if s == "max": + end = MAX_TAG_VALUE + else: + end = int(s) + values.append(KotlinRange(start, end)) + + ch = self.reader.read_char() + if ch == ";": + break + elif ch == ",": + continue + else: + self.reader.unexpected("expected ',' or ';'") + + return ExtensionsElement( + location, + documentation, + values + ) + + """ Reads an enum constant like "ROCK = 0;". The label is the constant name. """ + + def read_enum_constant(self, documentation: str, location: Location, label: str) -> EnumConstantElement: + self.reader.require('=') + tag = self.reader.read_int() + + options: list = OptionReader(self.reader).read_options() + self.reader.require(';') + + documentation = self.reader.try_append_trailing_documentation(documentation) + + return EnumConstantElement( + location, + label, + tag, + documentation, + options, + ) + + """ Reads an rpc and returns it. """ + + def read_rpc(self, location: Location, documentation: str) -> RpcElement: + name = self.reader.read_name() + + self.reader.require('(') + request_streaming = False + + word = self.reader.read_word() + if word == "stream": + request_streaming = True + request_type = self.reader.read_data_type() + else: + request_type = self.reader.read_data_type_by_name(word) + + self.reader.require(')') + + self.reader.expect_with_location(self.reader.read_word() == "returns", location, "expected 'returns'") + + self.reader.require('(') + response_streaming = False + + word = self.reader.read_word() + if word == "stream": + response_streaming = True + response_type = self.reader.read_data_type() + else: + response_type = self.reader.read_data_type_by_name(word) + + self.reader.require(')') + + options: list = list() + if self.reader.peek_char('{'): + while True: + rpc_documentation = self.reader.read_documentation() + if self.reader.peek_char('}'): + break + + declared = self.read_declaration(rpc_documentation, Context.RPC) + if declared is OptionElement: + options.append(declared) + # TODO: add else clause to catch unexpected declarations. + else: + pass + + else: + self.reader.require(';') + + return RpcElement( + location, + name, + documentation, + request_type, + response_type, + request_streaming, + response_streaming, + options + ) + + """ Parse a named `.proto` schema. """ + + @staticmethod + def parse(location: Location, data: str) -> ProtoFileElement: + proto_parser = ProtoParser(location, data) + return proto_parser.read_proto_file() diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 9618f590b..3adf994e5 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -3,10 +3,18 @@ fully-qualified name using the protocol buffer package. """ + from karapace.protobuf.kotlin_wrapper import check, require from karapace.protobuf.option_element import OptionElement +def static_init(cls): + if getattr(cls, "static_init", None): + cls.static_init() + return cls + + +@static_init class ProtoType: is_scalar: bool string: str @@ -22,80 +30,82 @@ def simple_name(self) -> str: dot = self.string.rfind(".") return self.string[dot + 1] + @classmethod + def static_init(cls): + cls.BOOL = ProtoType(True, "bool") + cls.BYTES = ProtoType(True, "bytes") + cls.DOUBLE = ProtoType(True, "double") + cls.FLOAT = ProtoType(True, "float") + cls.FIXED32 = ProtoType(True, "fixed32") + cls.FIXED64 = ProtoType(True, "fixed64") + cls.INT32 = ProtoType(True, "int32") + cls.INT64 = ProtoType(True, "int64") + cls.SFIXED32 = ProtoType(True, "sfixed32") + cls.SFIXED64 = ProtoType(True, "sfixed64") + cls.SINT32 = ProtoType(True, "sint32") + cls.SINT64 = ProtoType(True, "sint64") + cls.STRING = ProtoType(True, "string") + cls.UINT32 = ProtoType(True, "uint32") + cls.UINT64 = ProtoType(True, "uint64") + cls.ANY = ProtoType(False, "google.protobuf.Any") + cls.DURATION = ProtoType(False, "google.protobuf.Duration") + cls.TIMESTAMP = ProtoType(False, "google.protobuf.Timestamp") + cls.EMPTY = ProtoType(False, "google.protobuf.Empty") + cls.STRUCT_MAP = ProtoType(False, "google.protobuf.Struct") + cls.STRUCT_VALUE = ProtoType(False, "google.protobuf.Value") + cls.STRUCT_NULL = ProtoType(False, "google.protobuf.NullValue") + cls.STRUCT_LIST = ProtoType(False, "google.protobuf.ListValue") + cls.DOUBLE_VALUE = ProtoType(False, "google.protobuf.DoubleValue") + cls.FLOAT_VALUE = ProtoType(False, "google.protobuf.FloatValue") + cls.INT64_VALUE = ProtoType(False, "google.protobuf.Int64Value") + cls.UINT64_VALUE = ProtoType(False, "google.protobuf.UInt64Value") + cls.INT32_VALUE = ProtoType(False, "google.protobuf.Int32Value") + cls.UINT32_VALUE = ProtoType(False, "google.protobuf.UInt32Value") + cls.BOOL_VALUE = ProtoType(False, "google.protobuf.BoolValue") + cls.STRING_VALUE = ProtoType(False, "google.protobuf.StringValue") + cls.BYTES_VALUE = ProtoType(False, "google.protobuf.BytesValue") + + cls.SCALAR_TYPES_ = [cls.BOOL, + cls.BYTES, + cls.DOUBLE, + cls.FLOAT, + cls.FIXED32, + cls.FIXED64, + cls.INT32, + cls.INT64, + cls.SFIXED32, + cls.SFIXED64, + cls.SINT32, + cls.SINT64, + cls.STRING, + cls.UINT32, + cls.UINT64 + ] + + cls.SCALAR_TYPES: dict = dict() + + for a in cls.SCALAR_TYPES_: + cls.SCALAR_TYPES[a.string] = a + + cls.NUMERIC_SCALAR_TYPES: tuple = ( + cls.DOUBLE, + cls.FLOAT, + cls.FIXED32, + cls.FIXED64, + cls.INT32, + cls.INT64, + cls.SFIXED32, + cls.SFIXED64, + cls.SINT32, + cls.SINT64, + cls.UINT32, + cls.UINT64 + ) + """ Creates a scalar or message type. """ def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None): - self.BOOL = ProtoType(True, "bool") - self.BYTES = ProtoType(True, "bytes") - self.DOUBLE = ProtoType(True, "double") - self.FLOAT = ProtoType(True, "float") - self.FIXED32 = ProtoType(True, "fixed32") - self.FIXED64 = ProtoType(True, "fixed64") - self.INT32 = ProtoType(True, "int32") - self.INT64 = ProtoType(True, "int64") - self.SFIXED32 = ProtoType(True, "sfixed32") - self.SFIXED64 = ProtoType(True, "sfixed64") - self.SINT32 = ProtoType(True, "sint32") - self.SINT64 = ProtoType(True, "sint64") - self.STRING = ProtoType(True, "string") - self.UINT32 = ProtoType(True, "uint32") - self.UINT64 = ProtoType(True, "uint64") - self.ANY = ProtoType(False, "google.protobuf.Any") - self.DURATION = ProtoType(False, "google.protobuf.Duration") - self.TIMESTAMP = ProtoType(False, "google.protobuf.Timestamp") - self.EMPTY = ProtoType(False, "google.protobuf.Empty") - self.STRUCT_MAP = ProtoType(False, "google.protobuf.Struct") - self.STRUCT_VALUE = ProtoType(False, "google.protobuf.Value") - self.STRUCT_NULL = ProtoType(False, "google.protobuf.NullValue") - self.STRUCT_LIST = ProtoType(False, "google.protobuf.ListValue") - self.DOUBLE_VALUE = ProtoType(False, "google.protobuf.DoubleValue") - self.FLOAT_VALUE = ProtoType(False, "google.protobuf.FloatValue") - self.INT64_VALUE = ProtoType(False, "google.protobuf.Int64Value") - self.UINT64_VALUE = ProtoType(False, "google.protobuf.UInt64Value") - self.INT32_VALUE = ProtoType(False, "google.protobuf.Int32Value") - self.UINT32_VALUE = ProtoType(False, "google.protobuf.UInt32Value") - self.BOOL_VALUE = ProtoType(False, "google.protobuf.BoolValue") - self.STRING_VALUE = ProtoType(False, "google.protobuf.StringValue") - self.BYTES_VALUE = ProtoType(False, "google.protobuf.BytesValue") - - self.SCALAR_TYPES_ = [self.BOOL, - self.BYTES, - self.DOUBLE, - self.FLOAT, - self.FIXED32, - self.FIXED64, - self.INT32, - self.INT64, - self.SFIXED32, - self.SFIXED64, - self.SINT32, - self.SINT64, - self.STRING, - self.UINT32, - self.UINT64 - ] - - self.SCALAR_TYPES: dict = dict() - - for a in self.SCALAR_TYPES_: - self.SCALAR_TYPES[a.string] = a - - self.NUMERIC_SCALAR_TYPES: tuple = ( - self.DOUBLE, - self.FLOAT, - self.FIXED32, - self.FIXED64, - self.INT32, - self.INT64, - self.SFIXED32, - self.SFIXED64, - self.SINT32, - self.SINT64, - self.UINT32, - self.UINT64 - ) - if key_type is None and value_type is None: self.is_scalar = is_scalar self.string = string @@ -164,26 +174,28 @@ def __eq__(self, other): def __ne__(self, other): return type(other) is not ProtoType or self.string != other.string - def to_string(self) -> str: + def __str__(self) -> str: return self.string def hash_code(self) -> int: return hash(self.string) - def get(self, enclosing_type_or_package: str, type_name: str) -> object: - return self.get2(f"{enclosing_type_or_package}.{type_name}") if enclosing_type_or_package else self.get2( - type_name) + @staticmethod + def get(enclosing_type_or_package: str, type_name: str) -> object: + return ProtoType.get2(f"{enclosing_type_or_package}.{type_name}") \ + if enclosing_type_or_package else ProtoType.get2(type_name) - def get2(self, name: str) -> object: - scalar = self.SCALAR_TYPES[name] + @staticmethod + def get2(name: str) -> object: + scalar = ProtoType.SCALAR_TYPES[name] if scalar: return scalar require(name and len(name) != 0 and name.rfind("#") == -1, f"unexpected name: {name}") if name.startswith("map<") and name.endswith(">"): comma = name.rfind(",") require(comma != -1, f"expected ',' in map type: {name}") - key = self.get2(name[4:comma].strip()) - value = self.get2(name[comma + 1:len(name)].strip()) + key = ProtoType.get2(name[4:comma].strip()) + value = ProtoType.get2(name[comma + 1:len(name)].strip()) return ProtoType(False, name, key, value) return ProtoType(False, name) diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py new file mode 100644 index 000000000..0e8bf9e96 --- /dev/null +++ b/karapace/protobuf/reserved_document.py @@ -0,0 +1,37 @@ +from karapace.protobuf.kotlin_wrapper import IntRange +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation + + +class ReservedElement: + location: Location + documentation: str + """ A [String] name or [Int] or [IntRange] tag. """ + values: list + + def __init__(self, location: Location, documentation: str, values: list): + self.location = location + self.documentation = documentation + self.values = values + + def to_schema(self) -> str: + result: list = list() + append_documentation(result, self.documentation) + result.append("reserved ") + + for index in range(len(self.values)): + value = self.values[index] + if index > 0: + result.append(", ") + + if value is str: + result.append(f"\"{value}\"") + elif value is int: + result.append(f"{value}") + elif value is IntRange: + last_index = len(value) - 1 + result.append(f"{value[0]} to {value[last_index]}") + else: + raise AssertionError() + result.append(";\n") + return "".join(result) diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py new file mode 100644 index 000000000..b264f0df0 --- /dev/null +++ b/karapace/protobuf/rpc_element.py @@ -0,0 +1,53 @@ +from karapace.protobuf.location import Location +from karapace.protobuf.utils import append_documentation, append_indented + + +class RpcElement: + location: Location + name: str + documentation: str + request_type: str + response_type: str + request_streaming: bool + response_streaming: bool + options: list + + def __init__(self, + location: Location, + name: str, + documentation: str, + request_type: str, + response_type: str, + request_streaming: bool, + response_streaming: bool, + options: list, + ): + self.location = location + self.name = name + self.documentation = documentation + self.request_type = request_type + self.response_type = response_type + self.request_streaming = request_streaming + self.response_streaming = response_streaming + self.options = options + + def to_schema(self) -> str: + result: list = list() + append_documentation(result, self.documentation) + result.append(f"rpc {self.name} (") + + if self.request_streaming: + result.append("stream ") + result.append(f"{self.request_type}) returns (") + + if self.response_streaming: + result.append("stream ") + result.append(f"{self.response_type})") + + if self.options and len(self.options): + result.append(" {\n") + for option in self.options: + append_indented(result, option.to_schema_declaration()) + result.append("}") + result.append(";\n") + return "".join(result) diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index add24b575..38c79f442 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -1,7 +1,5 @@ from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented -from karapace.protobuf.kotlin_wrapper import * -from karapace.protobuf.option_element import OptionsList class ServiceElement: @@ -9,9 +7,9 @@ class ServiceElement: name: str documentation: str rpcs: list - options: OptionsList + options: list - def __init__(self, location: Location, name: str, documentation: str, rpcs: list, options: OptionsList + def __init__(self, location: Location, name: str, documentation: str, rpcs: list, options: list ): self.location = location self.name = name diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 6e398b7b6..36c17027c 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -13,13 +13,13 @@ def hex_digit(c: str) -> int: return -1 -def min_of(a:int, b:int)->int : - return a if a int: + return a if a < b else b class SyntaxReader: data: str - location: Location + _location: Location """ Next character to be read """ pos: int = 0 @@ -30,7 +30,7 @@ class SyntaxReader: def __init__(self, data: str, location: Location): self.data = data - self.location = location + self._location = location def exhausted(self) -> bool: return self.pos == len(self.data) @@ -80,10 +80,6 @@ def read_string(self) -> str: else: return self.read_word() - def read_numeric_escape_8_3(self) -> int: - self.pos -= 1 - return self.read_numeric_escape(8, 3) - def read_quoted_string(self) -> str: start_quote = self.read_char() if start_quote != '"' and start_quote != '\'': @@ -105,7 +101,8 @@ def read_quoted_string(self) -> str: self.expect(self.pos < len(self.data), "unexpected end of file") self.pos += 1 c = self.data[self.pos] - c = { + + d: str = { 'a': "\u0007", # Alert. 'b': "\b", # Backspace. 'f': "\u000c", # Form feed. @@ -113,17 +110,15 @@ def read_quoted_string(self) -> str: 'r': "\r", # Carriage return. 't': "\t", # Horizontal tab. 'v': "\u000b", # Vertical tab. - 'x': self.read_numeric_escape(16, 2), - 'X': self.read_numeric_escape(16, 2), - '0': self.read_numeric_escape_8_3(), - '1': self.read_numeric_escape_8_3(), - '2': self.read_numeric_escape_8_3(), - '3': self.read_numeric_escape_8_3(), - '4': self.read_numeric_escape_8_3(), - '5': self.read_numeric_escape_8_3(), - '6': self.read_numeric_escape_8_3(), - '7': self.read_numeric_escape_8_3() }.get(c) + if d: + c = d + else: + if c in ['x', 'X']: + c = self.read_numeric_escape(16, 2) + elif ord(c) in range(ord('0'), ord('7')): + self.pos -= 1 + c = self.read_numeric_escape(8, 3) result.append(c) if c == "\n": @@ -131,7 +126,7 @@ def read_quoted_string(self) -> str: self.unexpected("unterminated string") - def read_numeric_escape(self, radix: int, length: int) -> int: + def read_numeric_escape(self, radix: int, length: int) -> str: value = -1 end_pos = min_of(self.pos + length, len(self.data)) while self.pos < end_pos: @@ -168,18 +163,18 @@ def read_name(self) -> str: def read_data_type(self) -> str: name = self.read_word() - return self.read_self.data_type(name) + return self.read_data_type_by_name(name) """ Reads a scalar, map, or type name with `name` as a prefix word. """ - def read_data_type(self, name: str) -> str: + def read_data_type_by_name(self, name: str) -> str: if name == "map": self.expect(self.read_char() == '<', "expected '<'") - key_type = self.read_self.data_type() + key_type = self.read_data_type() self.expect(self.read_char() == ',', "expected ','") - value_type = self.read_self.data_type() + value_type = self.read_data_type() self.expect(self.read_char() == '>', "expected '>'") return f"map<{key_type}, {value_type}>" @@ -193,12 +188,14 @@ def read_word(self) -> str: start = self.pos while self.pos < len(self.data): c = self.data[self.pos] - if c in range('a', 'z') or c in range('A', 'Z') or c in range('0', '9') or c in ['_', '-', '.']: + if ord(c) in range(ord('a'), ord('z')) \ + or ord(c) in range(ord('A'), ord('Z')) \ + or ord(c) in range(ord('0'), ord('9')) or c in ['_', '-', '.']: self.pos += 1 else: break self.expect(start < self.pos, "expected a word") - return self.data[start:self.pos - start].decode() + return self.data[start:self.pos - start] """ Reads an integer and returns it. """ @@ -210,21 +207,21 @@ def read_int(self) -> int: tag = tag[len("0x"):] radix = 16 return int(tag, radix) - except: + except Exception: self.unexpected(f"expected an integer but was {tag}") """ Like skip_whitespace(), but this returns a string containing all comment text. By convention, comments before a declaration document that declaration. """ def read_documentation(self) -> str: - result: str = None + result = None while True: self.skip_whitespace(False) if self.pos == len(self.data) or self.data[self.pos] != '/': if result: return result else: - "" + return "" comment = self.read_comment() if result: result = f"{result}\n{comment}" @@ -241,10 +238,10 @@ def read_comment(self) -> str: tval = -1 if self.pos < len(self.data): self.pos += 1 - tval = int(self.data[self.pos]) + tval = ord(self.data[self.pos]) - if tval == int('*'): - result: list + if tval == ord('*'): + result: list = list() start_of_line = True while self.pos + 1 < len(self.data): c: str = self.data[self.pos] @@ -271,7 +268,7 @@ def read_comment(self) -> str: self.pos += 1 self.unexpected("unterminated comment") - if tval == int('/'): + if tval == ord('/'): if self.pos < len(self.data) and self.data[self.pos] == ' ': self.pos += 1 # Skip a single leading space, if present. start = self.pos @@ -281,7 +278,7 @@ def read_comment(self) -> str: if c == "\n": self.newline() break - return self.data[start:self.pos - 1 - start].decode() + return self.data[start:self.pos - 1 - start] self.unexpected("unexpected '/'") def try_append_trailing_documentation(self, documentation: str) -> str: @@ -297,7 +294,8 @@ def try_append_trailing_documentation(self, documentation: str) -> str: return documentation bval = (self.pos < len(self.data) and (self.data[self.pos] == '/' or self.data[self.pos] == '*')) # Backtrack to start of comment. - if not bval: self.pos -= 1 + if not bval: + self.pos -= 1 self.expect(bval, "expected '//' or '/*'") is_star = self.data[self.pos] == '*' @@ -379,7 +377,7 @@ def newline(self): self.line_start = self.pos def location(self) -> Location: - return self.location.at(self.line + 1, self.pos - self.line_start + 1) + return self._location.at(self.line + 1, self.pos - self.line_start + 1) def expect(self, condition: bool, message: str): location = self.location() @@ -393,4 +391,4 @@ def expect_with_location(self, condition: bool, location: Location, message: str def unexpected(self, message: str, location: Location = None): if not location: location = self.location() - raise ProtobufParserRuntimeException(f"Syntax error in {location.to_string()}: {message}") + raise ProtobufParserRuntimeException(f"Syntax error in {str(location)}: {message}") diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py new file mode 100644 index 000000000..b676250d2 --- /dev/null +++ b/karapace/protobuf/type_element.py @@ -0,0 +1,12 @@ +from karapace.protobuf.location import Location + + +class TypeElement: + location: Location + name: str + documentation: str + options: list + nested_types: list + + def to_schema(self) -> str: + pass From 8ffba9cebffa0e88db78ea03ffc3a7e3c6e10c4e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 1 May 2021 10:47:52 +0300 Subject: [PATCH 011/168] lint issues fuxup --- karapace/protobuf/enum_constant_element.py | 19 +- karapace/protobuf/enum_element.py | 16 +- karapace/protobuf/exception.py | 8 +- karapace/protobuf/field_element.py | 27 ++- karapace/protobuf/group_element.py | 10 +- karapace/protobuf/kotlin_wrapper.py | 15 +- karapace/protobuf/location.py | 10 +- karapace/protobuf/message_element.py | 41 ++-- karapace/protobuf/one_of_element.py | 6 +- karapace/protobuf/option_element.py | 60 ++---- karapace/protobuf/option_reader.py | 78 ++++---- karapace/protobuf/proto_file_element.py | 37 ++-- karapace/protobuf/proto_parser.py | 221 +++++++++------------ karapace/protobuf/proto_type.py | 60 ++---- karapace/protobuf/rpc_element.py | 23 ++- karapace/protobuf/service_element.py | 3 +- karapace/protobuf/syntax_reader.py | 150 ++++++-------- karapace/protobuf/utils.py | 71 +------ 18 files changed, 330 insertions(+), 525 deletions(-) diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 826581577..b04e343a8 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -1,5 +1,5 @@ from karapace.protobuf.location import Location -from karapace.protobuf.utils import append_documentation, append_options +from karapace.protobuf.utils import append_documentation, append_options class EnumConstantElement: @@ -9,13 +9,14 @@ class EnumConstantElement: documentation: str options: list = list() - def __init__(self, - location: Location, - name: str, - tag: int, - documentation: str, - options: list, - ): + def __init__( + self, + location: Location, + name: str, + tag: int, + documentation: str, + options: list, + ): self.location = location self.name = name @@ -30,7 +31,7 @@ def to_schema(self) -> str: result: list = list() append_documentation(result, self.documentation) result.append(f"{self.name} = {self.tag}") - if self.options and len(self.options): + if self.options: result.append(" ") append_options(result, self.options) result.append(";\n") diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 42cbce9c6..05b1e5fa2 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -1,18 +1,12 @@ -from karapace.protobuf.type_element import TypeElement from karapace.protobuf.location import Location +from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented class EnumElement(TypeElement): constants: list = list() - def __init__(self, - location: Location, - name: str, - documentation: str, - options: list, - constants: list - ): + def __init__(self, location: Location, name: str, documentation: str, options: list, constants: list): self.location = location self.name = name self.documentation = documentation @@ -26,14 +20,14 @@ def to_schema(self) -> str: append_documentation(result, self.documentation) result.append(f"enum {self.name} {{") - if self.options and len(self.options) or self.constants and len(self.constants): + if self.options or self.constants: result.append("\n") - if self.options and len(self.options): + if self.options: for option in self.options: append_indented(result, option.to_schema_declaration()) - if self.constants and len(self.constants): + if self.constants: for constant in self.constants: append_indented(result, constant.to_schema()) diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index 3b32dbb6c..b74e1efe2 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -7,7 +7,6 @@ class ProtobufParserRuntimeException(Exception): class IllegalStateException(Exception): - def __init__(self, message="IllegalStateException"): self.message = message super().__init__(self.message) @@ -15,14 +14,11 @@ def __init__(self, message="IllegalStateException"): class Error(Exception): """Base class for errors in this module.""" - pass class ProtobufException(Error): - """Generic Avro schema error.""" - pass + """Generic Protobuf schema error.""" class SchemaParseException(ProtobufException): - """Error while parsing a JSON schema descriptor.""" - pass + """Error while parsing a Protobuf schema descriptor.""" diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index b0f5a5f60..10b80722d 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -1,10 +1,9 @@ -from typing import Union - from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.utils import append_documentation, append_options +from typing import Union class FieldElement: @@ -14,13 +13,14 @@ class FieldElement: name: str default_value: str = None json_name: str = None - tag: int = 0, - documentation: str = "", + tag: int = 0 + documentation: str = "" options: list = list() - def __init__(self, location: Location, label: Union[None, Field.Label], element_type: str, - name: str, default_value: str, json_name: str, tag: int, - documentation: str, options: list): + def __init__( + self, location: Location, label: Union[None, Field.Label], element_type: str, name: str, default_value: str, + json_name: str, tag: int, documentation: str, options: list + ): self.location = location self.label = label self.element_type = element_type @@ -41,26 +41,25 @@ def to_schema(self): result.append(f"{self.element_type} {self.name} = {self.tag}") options_with_default = self.options_with_special_values() - if options_with_default and len(options_with_default) > 0: + if options_with_default: result.append(' ') append_options(result, options_with_default) result.append(";\n") - """ - Both `default` and `json_name` are defined in the schema like options but they are actually - not options themselves as they're missing from `google.protobuf.FieldOptions`. - """ - def options_with_special_values(self) -> list: + """ Both `default` and `json_name` are defined in the schema like options but they are actually + not options themselves as they're missing from `google.protobuf.FieldOptions`. + """ options = self.options.copy() if self.default_value: - proto_type = ProtoType.get2(self.element_type) + proto_type: ProtoType = ProtoType.get2(self.element_type) options.append(OptionElement("default", proto_type.to_kind(), self.default_value, False)) if self.json_name: self.options.append(OptionElement("json_name", OptionElement.Kind.STRING, self.json_name, False)) return options + # Only non-repeated scalar types and Enums support default values. diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index 7cedae517..d04e24f8c 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -1,8 +1,7 @@ -from typing import Union - from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented +from typing import Union class GroupElement: @@ -13,8 +12,9 @@ class GroupElement: documentation: str = "" fields: list = list() - def __init__(self, label: Union[None, Field.Label], location: Location, name: str, tag: int, documentation: str, - fields: list): + def __init__( + self, label: Union[None, Field.Label], location: Location, name: str, tag: int, documentation: str, fields: list + ): self.label = label self.location = location self.name = name @@ -30,7 +30,7 @@ def to_schema(self) -> str: if self.label: result.append(f"{str(self.label.name).lower()} ") result.append(f"group {self.name} = {self.tag} {{") - if self.fields and len(self.fields): + if self.fields: result.append("\n") for field in self.fields: append_indented(result, field.to_schema()) diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index db2c97d43..e88b762c8 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -33,12 +33,11 @@ class String(str): pass -class Any(object): +class Any: pass class StringBuilder(list): - def append_indented(self: list, value: str): lines = value.split("\n") if len(lines) > 1 and not lines[-1]: @@ -55,12 +54,12 @@ class OptionsList(list): class KotlinRange: - min: object - max: object + minimum: int + maximum: int - def __init__(self, min, max): - self.min = min - self.max = max + def __init__(self, minimum, maximum): + self.minimum = minimum + self.maximum = maximum def __str__(self) -> str: - return f"{self.min}..{self.max}" + return f"{self.minimum}..{self.maximum}" diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index e2efc3f38..9abeb2985 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -45,13 +45,17 @@ def __str__(self) -> str: return result @staticmethod - def get(*args, **kwds): + def get(*args): + result = None if len(args) == 1: # (path) path = args[0] - return Location.get("", path) + result = Location.get("", path) if len(args) == 2: # (base,path) path: str = args[1] base: str = args[0] if base.endswith("/"): base = base[:-1] - return Location(base, path) + result = Location(base, path) + else: + raise Exception() + return result diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index eb7701a77..d6f2c1e69 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -1,5 +1,5 @@ -from karapace.protobuf.type_element import TypeElement from karapace.protobuf.location import Location +from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented @@ -10,18 +10,19 @@ class MessageElement(TypeElement): extensions: list = [] groups: list = [] - def __init__(self, - location: Location, - name: str, - documentation: str, - nested_types: list, - options: list, - reserveds: list, - fields: list, - one_ofs: list, - extensions: list, - groups: list, - ): + def __init__( + self, + location: Location, + name: str, + documentation: str, + nested_types: list, + options: list, + reserveds: list, + fields: list, + one_ofs: list, + extensions: list, + groups: list, + ): self.location = location self.name = name self.documentation = documentation @@ -37,37 +38,37 @@ def to_schema(self) -> str: result: list = list() append_documentation(result, self.documentation) result.append(f"message {self.name} {{") - if self.reserveds and len(self.reserveds): + if self.reserveds: result.append("\n") for reserved in self.reserveds: append_indented(result, reserved.to_schema()) - if self.options and len(self.options): + if self.options: result.append("\n") for option in self.options: append_indented(result, option.to_schema_declaration()) - if self.fields and len(self.fields): + if self.fields: for field in self.fields: result.append("\n") append_indented(result, field.to_schema()) - if self.one_ofs and len(self.one_ofs): + if self.one_ofs: for one_of in self.one_ofs: result.append("\n") append_indented(result, one_of.to_schema()) - if self.groups and len(self.groups): + if self.groups: for group in self.groups: result.append("\n") append_indented(result, group.to_schema()) - if self.extensions and len(self.extensions): + if self.extensions: result.append("\n") for extension in self.extensions: append_indented(result, extension.to_schema()) - if self.nested_types and len(self.nested_types): + if self.nested_types: result.append("\n") for nested_type in self.nested_types: append_indented(result, nested_type.to_schema()) diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index efee5f887..9a3c323ad 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -19,15 +19,15 @@ def to_schema(self) -> str: result: list = list() append_documentation(result, self.documentation) result.append(f"oneof {self.name} {{") - if self.options and len(self.options): + if self.options: for option in self.options: append_indented(result, option.to_schema_declaration()) - if self.fields and len(self.fields): + if self.fields: result.append("\n") for field in self.fields: append_indented(result, field.to_schema()) - if self.groups and len(self.groups): + if self.groups: result.append("\n") for group in self.groups: append_indented(result, group.to_schema()) diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 764ee425e..626bf28ab 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -1,5 +1,6 @@ from enum import Enum -from karapace.protobuf.kotlin_wrapper import * +# from karapace.protobuf.kotlin_wrapper import * +# from karapace.protobuf.kotlin_wrapper import * from karapace.protobuf.utils import append_indented @@ -36,17 +37,13 @@ def to_schema(self) -> str: self.kind in [self.Kind.BOOLEAN, self.Kind.NUMBER, self.Kind.ENUM]: f"{self.formattedName} = {self.value}", self.kind == self.Kind.OPTION: f"{self.formattedName}.{self.value.to_schema()}", self.kind == self.Kind.MAP: list([f"{self.formattedName} = {{\n", - self.format_option_map(self.value), - "}" - ]), + self.format_option_map(self.value), "}"]), self.kind == self.Kind.LIST: list([f"{self.formattedName} = ", - self.append_options(self.value) - ]) + self.append_options(self.value)]) }[True] - if type(aline) is list: + if isinstance(aline, list): return "".join(aline) - else: - return aline + return aline def to_schema_declaration(self): return f"option {self.to_schema()};\n" @@ -74,53 +71,30 @@ def append_options(options: list): def format_option_map(self, value: dict) -> str: keys = list(value.keys()) last_index = len(keys) - 1 - result: StringBuilder = StringBuilder() - for index in range(len(keys)): + result: list = list() + for index, key in enumerate(keys): endl = "," if (index != last_index) else "" - result.append_indented(f"{keys[index]}: {self.format_option_map_value(value[keys[index]])}{endl}") + append_indented(result, f"{key}: {self.format_option_map_value(value[key])}{endl}") return "".join(result) def format_option_map_value(self, value) -> str: aline = { - type(value) is str: f"\"{value}\"", - type(value) is dict: list(["{\n", - self.format_option_map_value(value), - "}" - ]), - type(value) is list: list(["[\n", - self.format_list_map_value(value), - "]" - ]) + isinstance(value, str): f"\"{value}\"", + isinstance(value, dict): list(["{\n", self.format_option_map_value(value), "}"]), + isinstance(value, list): list(["[\n", self.format_list_map_value(value), "]"]) }[True] - if type(aline) is list: + if isinstance(aline, list): return "".join(aline) - if type(aline) is str: + if isinstance(aline, str): return aline return value def format_list_map_value(self, value) -> str: keys = value.keys() last_index = len(value) - 1 - result: StringBuilder = StringBuilder() - for index in range(len(keys)): + result: list = list() + for index, key in enumerate(keys): endl = "," if (index != last_index) else "" - result.append_indented(f"{self.format_option_map_value(value[keys[index]])}{endl}") + append_indented(result, f"{self.format_option_map_value(value[key])}{endl}") return "".join(result) - - # TODO: REMOVE WHEN ALL CLEAN - """ companion object { - internal PACKED_OPTION_ELEMENT = - OptionElement("packed", BOOLEAN, value = "true", is_parenthesized = false) - - @JvmOverloads - def create( - name: String, - kind: Kind, - value: Any, - is_parenthesized: Boolean = false - ) = OptionElement(name, kind, value, is_parenthesized) - } -} - - """ diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index 53988fcb5..509de9508 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -1,7 +1,6 @@ -from typing import Union - -from karapace.protobuf.syntax_reader import SyntaxReader from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.syntax_reader import SyntaxReader +from typing import Union class KindAndValue: @@ -19,12 +18,10 @@ class OptionReader: def __init__(self, reader: SyntaxReader): self.reader = reader - """ - Reads options enclosed in '[' and ']' if they are present and returns them. Returns an empty - list if no options are present. - """ - def read_options(self) -> list: + """ Reads options enclosed in '[' and ']' if they are present and returns them. Returns an empty + list if no options are present. + """ if not self.reader.peek_char('['): return list() result: list = list() @@ -39,9 +36,9 @@ def read_options(self) -> list: self.reader.expect(self.reader.peek_char(','), "Expected ',' or ']") return result - """ Reads a option containing a name, an '=' or ':', and a value. """ - def read_option(self, key_value_separator: str) -> OptionElement: + """ Reads a option containing a name, an '=' or ':', and a value. + """ is_extension = (self.reader.peek_char() == '[') is_parenthesized = (self.reader.peek_char() == '(') name = self.reader.read_name() # Option name. @@ -70,33 +67,32 @@ def read_option(self, key_value_separator: str) -> OptionElement: kind = OptionElement.Kind.OPTION return OptionElement(name, kind, value, is_parenthesized) - """ Reads a value that can be a map, list, string, number, boolean or enum. """ - def read_kind_and_value(self) -> KindAndValue: + """ Reads a value that can be a map, list, string, number, boolean or enum. """ peeked = self.reader.peek_char() + result: KindAndValue if peeked == '{': - return KindAndValue(OptionElement.Kind.MAP, self.read_map('{', '}', ':')) - if peeked == '[': - return KindAndValue(OptionElement.Kind.LIST, self.read_list()) - if peeked == '"' or peeked == "'": - return KindAndValue(OptionElement.Kind.STRING, self.reader.read_string()) - - if peeked.is_digit() or peeked == '-': - return KindAndValue(OptionElement.Kind.NUMBER, self.reader.read_word()) - - word = self.reader.read_word() - if word == "true": - return KindAndValue(OptionElement.Kind.BOOLEAN, "true") - if word == "false": - return KindAndValue(OptionElement.Kind.BOOLEAN, "false") - return KindAndValue(OptionElement.Kind.ENUM, word) - - """ - Returns a map of string keys and values. This is similar to a JSON object, with ':' and '}' - surrounding the map, ':' separating keys from values, and ',' or ';' separating entries. - """ + result = KindAndValue(OptionElement.Kind.MAP, self.read_map('{', '}', ':')) + elif peeked == '[': + result = KindAndValue(OptionElement.Kind.LIST, self.read_list()) + elif peeked in ('"', "'"): + result = KindAndValue(OptionElement.Kind.STRING, self.reader.read_string()) + elif peeked.is_digit() or peeked == '-': + result = KindAndValue(OptionElement.Kind.NUMBER, self.reader.read_word()) + else: + word = self.reader.read_word() + if word == "true": + result = KindAndValue(OptionElement.Kind.BOOLEAN, "true") + elif word == "false": + result = KindAndValue(OptionElement.Kind.BOOLEAN, "false") + else: + result = KindAndValue(OptionElement.Kind.ENUM, word) + return result def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) -> dict: + """ Returns a map of string keys and values. This is similar to a JSON object, with ':' and '}' + surrounding the map, ':' separating keys from values, and ',' or ';' separating entries. + """ if self.reader.read_char() != open_brace: raise AssertionError() result: dict = dict() @@ -119,7 +115,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) previous = result[name] if not previous: result[name] = value - elif type(previous) is list: # Add to previous List + elif isinstance(previous, list): # Add to previous List self.add_to_list(previous, value) else: new_list: list = list() @@ -127,24 +123,22 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) self.add_to_list(new_list, value) result[name] = new_list # Discard optional separator. - self.reader.peek_char(',') or self.reader.peek_char(';') - - """ Adds an object or objects to a List. """ + if not self.reader.peek_char(','): + self.reader.peek_char(';') @staticmethod def add_to_list(_list: list, value: Union[list, str]): - if type(value) is list: + """ Adds an object or objects to a List. """ + if isinstance(value, list): for v in list(value): _list.append(v) else: _list.append(value) - """ - * Returns a list of values. This is similar to JSON with '[' and ']' surrounding the list and ',' - * separating values. - """ - def read_list(self) -> list: + """ Returns a list of values. This is similar to JSON with '[' and ']' surrounding the list and ',' + separating values. + """ self.reader.require('[') result: list = list() while True: diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index ef8516481..bbe0b45bf 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -13,15 +13,18 @@ class ProtoFileElement: extend_declarations: list options: list - def __init__(self, location: Location, - package_name: str = None, - syntax: Syntax = None, - imports=None, - public_imports=None, - types=None, - services=None, - extend_declarations=None, - options=None): + def __init__( + self, + location: Location, + package_name: str = None, + syntax: Syntax = None, + imports=None, + public_imports=None, + types=None, + services=None, + extend_declarations=None, + options=None + ): if options is None: options = list() @@ -46,8 +49,10 @@ def __init__(self, location: Location, self.options = options def to_schema(self): - strings: list = ["// Proto schema formatted by Wire, do not edit.\n", "// Source: ", - str(self.location.with_path_only()), "\n"] + strings: list = [ + "// Proto schema formatted by Wire, do not edit.\n", "// Source: ", + str(self.location.with_path_only()), "\n" + ] if self.syntax: strings.append("\n") strings.append("syntax = \"") @@ -58,7 +63,7 @@ def to_schema(self): strings.append("\n") strings.append("package " + str(self.package_name) + ";\n") - if (self.imports and len(self.imports)) or (self.public_imports and len(self.public_imports)): + if self.imports or self.public_imports: strings.append("\n") for file in self.imports: @@ -67,22 +72,22 @@ def to_schema(self): for file in self.public_imports: strings.append("import public \"" + str(file) + "\";\n") - if self.options and len(self.options): + if self.options: strings.append("\n") for option in self.options: strings.append(str(option.to_schema_declaration())) - if self.types and len(self.types): + if self.types: for type_element in self.types: strings.append("\n") strings.append(str(type_element.to_schema)) - if self.extend_declarations and len(self.extend_declarations): + if self.extend_declarations: for extend_declaration in self.extend_declarations: strings.append("\n") strings.append(extend_declaration.to_schema()) - if self.services and len(self.extend_declarations): + if self.services: for service in self.services: strings.append("\n") strings.append(str(service.to_schema)) diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 5cc8f7923..6c3eadd0e 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -1,7 +1,5 @@ from builtins import str from enum import Enum -from typing import Union - from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.exception import error @@ -10,7 +8,7 @@ from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement from karapace.protobuf.group_element import GroupElement -from karapace.protobuf.kotlin_wrapper import options_to_list, KotlinRange +from karapace.protobuf.kotlin_wrapper import KotlinRange, options_to_list from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.one_of_element import OneOfElement @@ -24,6 +22,7 @@ from karapace.protobuf.syntax_reader import SyntaxReader from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import MAX_TAG_VALUE +from typing import Union class Context(Enum): @@ -53,16 +52,16 @@ def permits_one_of(self) -> bool: return self == Context.MESSAGE def permits_message(self) -> bool: - return self == Context.FILE or self == Context.MESSAGE + return self in [Context.FILE, Context.MESSAGE] def permits_service(self) -> bool: - return self == Context.FILE or self == Context.MESSAGE + return self in [Context.FILE, Context.MESSAGE] def permits_enum(self) -> bool: - return self == Context.FILE or self == Context.MESSAGE + return self in [Context.FILE, Context.MESSAGE] def permits_extend(self) -> bool: - return self == Context.FILE or self == Context.MESSAGE + return self in [Context.FILE, Context.MESSAGE] class ProtoParser: @@ -74,39 +73,40 @@ class ProtoParser: services: list extends_list: list options: list - declaration_count: int + declaration_count: int = 0 syntax: Syntax = None package_name: str = None prefix: str = "" - def __int__(self, location: Location, data: str): + def __init__(self, location: Location, data: str): self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: while True: documentation = self.reader.read_documentation() if self.reader.exhausted(): - return ProtoFileElement(self.location, self.package_name, self.syntax, self.imports, - self.public_imports, self.nested_types, self.services, self.extends_list, - self.options) + return ProtoFileElement( + self.location, self.package_name, self.syntax, self.imports, self.public_imports, self.nested_types, + self.services, self.extends_list, self.options + ) declaration = self.read_declaration(documentation, Context.FILE) - if type(declaration) is TypeElement: - # TODO: must add check for execption + if isinstance(declaration, TypeElement): + # TODO: must add check for exception duplicate = next((x for x in iter(self.nested_types) if x.name == declaration.name), None) if duplicate: error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") self.nested_types.append(declaration) - if type(declaration) is ServiceElement: + if isinstance(declaration, ServiceElement): duplicate = next((x for x in iter(self.services) if x.name == declaration.name), None) if duplicate: error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") self.services.append(declaration) - if type(declaration) is OptionElement: + if isinstance(declaration, OptionElement): self.options.append(declaration) - if type(declaration) is ExtendElement: + if isinstance(declaration, ExtendElement): self.extends_list.append(declaration) def read_declaration(self, documentation: str, context: Context): @@ -124,11 +124,11 @@ def read_declaration(self, documentation: str, context: Context): # are named after any of the label we check here. result = None + # pylint no-else-return if label == "package" and context.permits_package(): self.package_name = self.reader.read_name() self.prefix = f"{self.package_name}." self.reader.require(';') - return result elif label == "import" and context.permits_import(): import_string = self.reader.read_string() if import_string == "public": @@ -137,51 +137,51 @@ def read_declaration(self, documentation: str, context: Context): else: self.imports.append(import_string) self.reader.require(';') - return result elif label == "syntax" and context.permits_syntax(): self.reader.expect_with_location(not self.syntax, location, "too many syntax definitions") self.reader.require("=") - self.reader.expect_with_location(index == 0, location, - "'syntax' element must be the first declaration in a file") + self.reader.expect_with_location( + index == 0, location, "'syntax' element must be the first declaration in a file" + ) syntax_string = self.reader.read_quoted_string() try: Syntax(syntax_string) - except Exception as e: - # TODO: } catch (e: IllegalArgumentException) { ??? + except ValueError as e: self.reader.unexpected(str(e), location) self.reader.require(";") - return result elif label == "option": result = OptionReader(self.reader).read_option("=") self.reader.require(";") - return result elif label == "reserved": - return self.read_reserved(location, documentation) + result = self.read_reserved(location, documentation) elif label == "message" and context.permits_message(): - return self.read_message(location, documentation) + result = self.read_message(location, documentation) elif label == "enum" and context.permits_enum(): - return self.read_enum_element(location, documentation) + result = self.read_enum_element(location, documentation) elif label == "service" and context.permits_service(): - return self.read_service(location, documentation) + result = self.read_service(location, documentation) elif label == "extend" and context.permits_extend(): - return self.read_extend(location, documentation) + result = self.read_extend(location, documentation) elif label == "rpc" and context.permits_rpc(): - return self.read_rpc(location, documentation) + result = self.read_rpc(location, documentation) elif label == "oneof" and context.permits_one_of(): - return self.read_one_of(documentation) + result = self.read_one_of(documentation) elif label == "extensions" and context.permits_extensions(): - return self.read_extensions(location, documentation) - elif context == Context.MESSAGE or context == Context.EXTEND: - return self.read_field(documentation, location, label) + result = self.read_extensions(location, documentation) + elif context in [Context.MESSAGE, Context.EXTEND]: + result = self.read_field(documentation, location, label) elif context == Context.ENUM: - return self.read_enum_constant(documentation, location, label) + result = self.read_enum_constant(documentation, location, label) else: - self.reader.unexpected("unexpected label: $label", location) + result = None - """ Reads a message declaration. """ + if not result: + self.reader.unexpected("unexpected label: $label", location) + return result def read_message(self, location: Location, documentation: str) -> MessageElement: + """ Reads a message declaration. """ name: str = self.reader.read_name() fields: list = list() one_ofs: list = list() @@ -200,23 +200,23 @@ def read_message(self, location: Location, documentation: str) -> MessageElement if self.reader.peek_char("}"): break declared = self.read_declaration(nested_documentation, Context.MESSAGE) - type_declared = type(declared) - if type_declared is FieldElement: + + if isinstance(declared, FieldElement): fields.append(declared) - elif type_declared is OneOfElement: + elif isinstance(declared, OneOfElement): one_ofs.append(declared) - elif type_declared is GroupElement: + elif isinstance(declared, GroupElement): groups.append(declared) - elif type_declared is TypeElement: + elif isinstance(declared, TypeElement): nested_types.append(declared) - elif type_declared is ExtensionsElement: + elif isinstance(declared, ExtensionsElement): extensions.append(declared) - elif type_declared is OptionElement: + elif isinstance(declared, OptionElement): options.append(declared) # Extend declarations always add in a global scope regardless of nesting. - elif type_declared is ExtendElement: + elif isinstance(declared, ExtendElement): self.extends_list.append(declared) - elif type_declared is ReservedElement: + elif isinstance(declared, ReservedElement): reserveds.append(declared) self.prefix = previous_prefix @@ -234,9 +234,8 @@ def read_message(self, location: Location, documentation: str) -> MessageElement groups, ) - """ Reads an extend declaration. """ - def read_extend(self, location: Location, documentation: str) -> ExtendElement: + """ Reads an extend declaration. """ name = self.reader.read_name() fields: list = [] self.reader.require("{") @@ -259,9 +258,8 @@ def read_extend(self, location: Location, documentation: str) -> ExtendElement: fields, ) - """ Reads a service declaration and returns it. """ - def read_service(self, location: Location, documentation: str) -> ServiceElement: + """ Reads a service declaration and returns it. """ name = self.reader.read_name() rpcs = list() options: list = list() @@ -287,9 +285,8 @@ def read_service(self, location: Location, documentation: str) -> ServiceElement options, ) - """ Reads an enumerated atype declaration and returns it. """ - def read_enum_element(self, location: Location, documentation: str) -> EnumElement: + """ Reads an enumerated atype declaration and returns it. """ name = self.reader.read_name() constants: list = list() options: list = list() @@ -315,8 +312,9 @@ def read_field(self, documentation: str, location: Location, word: str): label: Union[None, Field.Label] atype: str if word == "required": - self.reader.expect_with_location(self.syntax != Syntax.PROTO_3, location, - "'required' label forbidden in proto3 field declarations") + self.reader.expect_with_location( + self.syntax != Syntax.PROTO_3, location, "'required' label forbidden in proto3 field declarations" + ) label = Field.Label.REQUIRED atype = self.reader.read_data_type() elif word == "optional": @@ -327,32 +325,25 @@ def read_field(self, documentation: str, location: Location, word: str): label = Field.Label.REPEATED atype = self.reader.read_data_type() else: - self.reader.expect_with_location(self.syntax == Syntax.PROTO_3 or - (word == "map" and self.reader.peek_char() == "<"), - location, - f"unexpected label: {word}") + self.reader.expect_with_location( + self.syntax == Syntax.PROTO_3 or (word == "map" and self.reader.peek_char() == "<"), location, + f"unexpected label: {word}" + ) label = None atype = self.reader.read_data_type_by_name(word) - self.reader.expect_with_location(not atype.startswith("map<") or not label, - location, - "'map' atype cannot have label" - ) + self.reader.expect_with_location( + not atype.startswith("map<") or not label, location, "'map' atype cannot have label" + ) if atype == "group ": return self.read_group(location, documentation, label) - else: - return self.read_field_with_label(location, documentation, label, atype) - - """ Reads an field declaration and returns it. """ - - def read_field_with_label(self, - location: Location, - documentation: str, - label: Union[None, Field.Label], - atype: str - ) -> FieldElement: + return self.read_field_with_label(location, documentation, label, atype) + def read_field_with_label( + self, location: Location, documentation: str, label: Union[None, Field.Label], atype: str + ) -> FieldElement: + """ Reads an field declaration and returns it. """ name = self.reader.read_name() self.reader.require('=') tag = self.reader.read_int() @@ -378,23 +369,19 @@ def read_field_with_label(self, options_to_list(options), ) - """ Defaults aren't options. """ - def strip_default(self, options: list) -> str: + """ Defaults aren't options. """ return self.strip_value("default", options) - """ `json_name` isn't an option. """ - def strip_json_name(self, options: list) -> str: + """ `json_name` isn't an option. """ return self.strip_value("json_name", options) - """ - This finds an option named [name], removes, and returns it. - Returns None if no [name] option is present. - """ - @staticmethod def strip_value(name: str, options: list) -> str: + """ This finds an option named [name], removes, and returns it. + Returns None if no [name] option is present. + """ result: Union[None, str] = None for element in options[:]: if element.name == name: @@ -433,10 +420,10 @@ def read_one_of(self, documentation: str) -> OneOfElement: ) def read_group( - self, - location: Location, - documentation: str, - label: Union[None, Field.Label], + self, + location: Location, + documentation: str, + label: Union[None, Field.Label], ) -> GroupElement: name = self.reader.read_word() self.reader.require("=") @@ -457,18 +444,10 @@ def read_group( else: self.reader.unexpected(f"expected field declaration, was {field}") - return GroupElement( - label, - location, - name, - tag, - documentation, - fields - ) - - """ Reads a reserved tags and names list like "reserved 10, 12 to 14, 'foo';". """ + return GroupElement(label, location, name, tag, documentation, fields) def read_reserved(self, location: Location, documentation: str) -> ReservedElement: + """ Reads a reserved tags and names list like "reserved 10, 12 to 14, 'foo';". """ values: list = list() while True: ch = self.reader.peek_char() @@ -480,12 +459,12 @@ def read_reserved(self, location: Location, documentation: str) -> ReservedEleme if ch in [",", ";"]: values.append(tag_start) else: - self.reader.expect_with_location(self.reader.read_word() == "to", location, - "expected ',', ';', or 'to'") + self.reader.expect_with_location(self.reader.read_word() == "to", location, "expected ',', ';', or 'to'") tag_end = self.reader.read_int() values.append(KotlinRange(tag_start, tag_end)) ch = self.reader.read_char() + # pylint: disable=no-else-break if ch == ";": break elif ch == ",": @@ -493,25 +472,16 @@ def read_reserved(self, location: Location, documentation: str) -> ReservedEleme else: self.reader.unexpected("expected ',' or ';'") a = False - if values and len(values): + if values: a = True - self.reader.expect_with_location(a, location, - "'reserved' must have at least one field name or tag") + self.reader.expect_with_location(a, location, "'reserved' must have at least one field name or tag") my_documentation = self.reader.try_append_trailing_documentation(documentation) - return ReservedElement( - location, - my_documentation, - values - ) - - """ Reads extensions like "extensions 101;" or "extensions 101 to max;". """ + return ReservedElement(location, my_documentation, values) - def read_extensions(self, - location: Location, - documentation: str - ) -> ExtensionsElement: + def read_extensions(self, location: Location, documentation: str) -> ExtensionsElement: + """ Reads extensions like "extensions 101;" or "extensions 101 to max;". """ values: list = list() while True: start: int = self.reader.read_int() @@ -529,6 +499,7 @@ def read_extensions(self, values.append(KotlinRange(start, end)) ch = self.reader.read_char() + # pylint: disable=no-else-break if ch == ";": break elif ch == ",": @@ -536,15 +507,10 @@ def read_extensions(self, else: self.reader.unexpected("expected ',' or ';'") - return ExtensionsElement( - location, - documentation, - values - ) - - """ Reads an enum constant like "ROCK = 0;". The label is the constant name. """ + return ExtensionsElement(location, documentation, values) def read_enum_constant(self, documentation: str, location: Location, label: str) -> EnumConstantElement: + """ Reads an enum constant like "ROCK = 0;". The label is the constant name. """ self.reader.require('=') tag = self.reader.read_int() @@ -561,9 +527,8 @@ def read_enum_constant(self, documentation: str, location: Location, label: str) options, ) - """ Reads an rpc and returns it. """ - def read_rpc(self, location: Location, documentation: str) -> RpcElement: + """ Reads an rpc and returns it. """ name = self.reader.read_name() self.reader.require('(') @@ -610,19 +575,11 @@ def read_rpc(self, location: Location, documentation: str) -> RpcElement: self.reader.require(';') return RpcElement( - location, - name, - documentation, - request_type, - response_type, - request_streaming, - response_streaming, - options + location, name, documentation, request_type, response_type, request_streaming, response_streaming, options ) - """ Parse a named `.proto` schema. """ - @staticmethod def parse(location: Location, data: str) -> ProtoFileElement: + """ Parse a named `.proto` schema. """ proto_parser = ProtoParser(location, data) return proto_parser.read_proto_file() diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 3adf994e5..e8c92cff9 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -3,7 +3,6 @@ fully-qualified name using the protocol buffer package. """ - from karapace.protobuf.kotlin_wrapper import check, require from karapace.protobuf.option_element import OptionElement @@ -21,7 +20,6 @@ class ProtoType: is_map: bool """ The type of the map's keys. Only present when [is_map] is True. """ key_type: object # ProtoType - """ The type of the map's values. Only present when [is_map] is True. """ value_type: object # ProtoType @@ -65,22 +63,10 @@ def static_init(cls): cls.STRING_VALUE = ProtoType(False, "google.protobuf.StringValue") cls.BYTES_VALUE = ProtoType(False, "google.protobuf.BytesValue") - cls.SCALAR_TYPES_ = [cls.BOOL, - cls.BYTES, - cls.DOUBLE, - cls.FLOAT, - cls.FIXED32, - cls.FIXED64, - cls.INT32, - cls.INT64, - cls.SFIXED32, - cls.SFIXED64, - cls.SINT32, - cls.SINT64, - cls.STRING, - cls.UINT32, - cls.UINT64 - ] + cls.SCALAR_TYPES_ = [ + cls.BOOL, cls.BYTES, cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, + cls.SFIXED64, cls.SINT32, cls.SINT64, cls.STRING, cls.UINT32, cls.UINT64 + ] cls.SCALAR_TYPES: dict = dict() @@ -88,24 +74,12 @@ def static_init(cls): cls.SCALAR_TYPES[a.string] = a cls.NUMERIC_SCALAR_TYPES: tuple = ( - cls.DOUBLE, - cls.FLOAT, - cls.FIXED32, - cls.FIXED64, - cls.INT32, - cls.INT64, - cls.SFIXED32, - cls.SFIXED64, - cls.SINT32, - cls.SINT64, - cls.UINT32, - cls.UINT64 + cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, cls.SFIXED64, cls.SINT32, + cls.SINT64, cls.UINT32, cls.UINT64 ) - """ Creates a scalar or message type. """ - def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None): - + """ Creates a scalar or message type. """ if key_type is None and value_type is None: self.is_scalar = is_scalar self.string = string @@ -123,7 +97,6 @@ def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None) # TODO: must be IllegalArgumentException raise Exception("map key must be non-byte, non-floating point scalar: $key_type") - @staticmethod def to_kind(self) -> OptionElement.Kind: return { "bool": OptionElement.Kind.BOOLEAN, @@ -143,21 +116,18 @@ def to_kind(self) -> OptionElement.Kind: "uint64": OptionElement.Kind.NUMBER }.get(self.simple_name, OptionElement.Kind.ENUM) - """ Returns the enclosing type, or null if self type is not nested in another type. """ - @property def enclosing_type_or_package(self) -> str: + """ Returns the enclosing type, or null if self type is not nested in another type. """ dot = self.string.rfind(".") return None if (dot == -1) else self.string[:dot] - """ - Returns a string like "type.googleapis.com/packagename.messagename" or null if self type is - a scalar or a map. Note that self returns a non-null string for enums because it doesn't know - if the named type is a message or an enum. - """ - @property def type_url(self) -> str: + """ Returns a string like "type.googleapis.com/packagename.messagename" or null if self type is + a scalar or a map. Note that self returns a non-null string for enums because it doesn't know + if the named type is a message or an enum. + """ return None if self.is_scalar or self.is_map else f"type.googleapis.com/{self.string}" def nested_type(self, name: str) -> object: # ProtoType @@ -169,10 +139,10 @@ def nested_type(self, name: str) -> object: # ProtoType return ProtoType(False, f"{self.string}.{name}") def __eq__(self, other): - return type(other) is ProtoType and self.string == other.string + return isinstance(other, ProtoType) and self.string == other.string def __ne__(self, other): - return type(other) is not ProtoType or self.string != other.string + return not isinstance(other, ProtoType) or self.string != other.string def __str__(self) -> str: return self.string @@ -186,7 +156,7 @@ def get(enclosing_type_or_package: str, type_name: str) -> object: if enclosing_type_or_package else ProtoType.get2(type_name) @staticmethod - def get2(name: str) -> object: + def get2(name: str): scalar = ProtoType.SCALAR_TYPES[name] if scalar: return scalar diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index b264f0df0..ae812f81e 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -12,16 +12,17 @@ class RpcElement: response_streaming: bool options: list - def __init__(self, - location: Location, - name: str, - documentation: str, - request_type: str, - response_type: str, - request_streaming: bool, - response_streaming: bool, - options: list, - ): + def __init__( + self, + location: Location, + name: str, + documentation: str, + request_type: str, + response_type: str, + request_streaming: bool, + response_streaming: bool, + options: list, + ): self.location = location self.name = name self.documentation = documentation @@ -44,7 +45,7 @@ def to_schema(self) -> str: result.append("stream ") result.append(f"{self.response_type})") - if self.options and len(self.options): + if self.options: result.append(" {\n") for option in self.options: append_indented(result, option.to_schema_declaration()) diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 38c79f442..95c6b0136 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -9,8 +9,7 @@ class ServiceElement: rpcs: list options: list - def __init__(self, location: Location, name: str, documentation: str, rpcs: list, options: list - ): + def __init__(self, location: Location, name: str, documentation: str, rpcs: list, options: list): self.location = location self.name = name self.documentation = documentation diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 36c17027c..bc603cc63 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -1,6 +1,5 @@ +from karapace.protobuf.exception import IllegalStateException, ProtobufParserRuntimeException from karapace.protobuf.location import Location -from karapace.protobuf.exception import IllegalStateException -from karapace.protobuf.exception import ProtobufParserRuntimeException def hex_digit(c: str) -> int: @@ -22,7 +21,6 @@ class SyntaxReader: _location: Location """ Next character to be read """ pos: int = 0 - """ The number of newline characters """ line: int = 0 """ The index of the most recent newline character. """ @@ -35,55 +33,46 @@ def __init__(self, data: str, location: Location): def exhausted(self) -> bool: return self.pos == len(self.data) - """ Reads a non-whitespace character """ - def read_char(self): + """ Reads a non-whitespace character """ + char = self.peek_char() self.pos += 1 return char - """ Reads a non-whitespace character 'c' """ - def require(self, c: str): + """ Reads a non-whitespace character 'c' """ self.expect(self.read_char() == c, f"expected '{c}'") - """ - Peeks a non-whitespace character and returns it. The only difference between this and - [read_char] is that this doesn't consume the char. - """ - def peek_char(self, ch: str = None): + """ Peeks a non-whitespace character and returns it. The only difference between this and + [read_char] is that this doesn't consume the char. + """ + if ch: if self.peek_char() == ch: self.pos += 1 return True - else: - return False - else: - self.skip_whitespace(True) - self.expect(self.pos < len(self.data), "unexpected end of file") - return self.data[self.pos] - - """ Push back the most recently read character. """ + self.skip_whitespace(True) + self.expect(self.pos < len(self.data), "unexpected end of file") + return self.data[self.pos] def push_back(self, ch: str): + """ Push back the most recently read character. """ if self.data[self.pos - 1] == ch: self.pos -= 1 - """ Reads a quoted or unquoted string and returns it. """ - def read_string(self) -> str: + """ Reads a quoted or unquoted string and returns it. """ self.skip_whitespace(True) - if self.peek_char() in ["\"", "'"]: + if self.peek_char() in ['"', "'"]: return self.read_quoted_string() - - else: - return self.read_word() + return self.read_word() def read_quoted_string(self) -> str: start_quote = self.read_char() - if start_quote != '"' and start_quote != '\'': - raise IllegalStateException(f" quote expected") + if start_quote not in ('"', "'"): + raise IllegalStateException(" quote expected") result: list = [] @@ -91,9 +80,8 @@ def read_quoted_string(self) -> str: self.pos += 1 c = self.data[self.pos] if c == start_quote: - """ Adjacent strings are concatenated. - Consume new quote and continue reading. """ if self.peek_char() == '"' or self.peek_char() == "'": + # Adjacent strings are concatenated. Consume new quote and continue reading. start_quote = self.read_char() continue return "".join(result) @@ -143,9 +131,9 @@ def read_numeric_escape(self, radix: int, length: int) -> str: self.expect(value >= 0, "expected a digit after \\x or \\X") return chr(value) - """ Reads a (paren-wrapped), [square-wrapped] or naked symbol name. """ - def read_name(self) -> str: + """ Reads a (paren-wrapped), [square-wrapped] or naked symbol name. """ + c = self.peek_char() if c == '(': self.pos += 1 @@ -159,17 +147,15 @@ def read_name(self) -> str: return result return self.read_word() - """ Reads a scalar, map, or type name. """ - def read_data_type(self) -> str: + """ Reads a scalar, map, or type name. """ + name = self.read_word() return self.read_data_type_by_name(name) - """ Reads a scalar, map, or type name with `name` as a prefix word. """ - def read_data_type_by_name(self, name: str) -> str: + """ Reads a scalar, map, or type name with `name` as a prefix word. """ if name == "map": - self.expect(self.read_char() == '<', "expected '<'") key_type = self.read_data_type() @@ -178,12 +164,10 @@ def read_data_type_by_name(self, name: str) -> str: self.expect(self.read_char() == '>', "expected '>'") return f"map<{key_type}, {value_type}>" - else: - return name - - """ Reads a non-empty word and returns it. """ + return name def read_word(self) -> str: + """ Reads a non-empty word and returns it. """ self.skip_whitespace(True) start = self.pos while self.pos < len(self.data): @@ -197,9 +181,8 @@ def read_word(self) -> str: self.expect(start < self.pos, "expected a word") return self.data[start:self.pos - start] - """ Reads an integer and returns it. """ - def read_int(self) -> int: + """ Reads an integer and returns it. """ tag: str = self.read_word() try: radix = 10 @@ -207,30 +190,30 @@ def read_int(self) -> int: tag = tag[len("0x"):] radix = 16 return int(tag, radix) - except Exception: + except OSError as err: + print("OS error: {0}".format(err)) + except ValueError: self.unexpected(f"expected an integer but was {tag}") - """ Like skip_whitespace(), but this returns a string containing all comment text. By convention, - comments before a declaration document that declaration. """ - def read_documentation(self) -> str: + """ Like skip_whitespace(), but this returns a string containing all comment text. By convention, + comments before a declaration document that declaration. """ + result = None while True: self.skip_whitespace(False) if self.pos == len(self.data) or self.data[self.pos] != '/': if result: return result - else: - return "" + return "" comment = self.read_comment() if result: result = f"{result}\n{comment}" else: result = "$result\n$comment" - """ Reads a comment and returns its body. """ - def read_comment(self) -> str: + """ Reads a comment and returns its body. """ if self.pos == len(self.data) or self.data[self.pos] != '/': raise IllegalStateException() @@ -239,36 +222,34 @@ def read_comment(self) -> str: if self.pos < len(self.data): self.pos += 1 tval = ord(self.data[self.pos]) - + result: str = "" if tval == ord('*'): - result: list = list() + buffer: list = list() start_of_line = True while self.pos + 1 < len(self.data): + # pylint: disable=no-else-break c: str = self.data[self.pos] - if c == '*' and self.data[self.pos + 1] == '/': self.pos += 2 - return "".join(result).strip() - - if c == "\n": - result.append("\n") + result = "".join(buffer).strip() + break + elif c == "\n": + buffer.append("\n") self.newline() start_of_line = True - - if not start_of_line: - result.append(c) - - if c == "*": + elif not start_of_line: + buffer.append(c) + elif c == "*": if self.data[self.pos + 1] == ' ': self.pos += 1 # Skip a single leading space, if present. start_of_line = False - if not c.isspace(): - result.append(c) + elif not c.isspace(): + buffer.append(c) start_of_line = False self.pos += 1 - self.unexpected("unterminated comment") - - if tval == ord('/'): + if not result: + self.unexpected("unterminated comment") + elif tval == ord('/'): if self.pos < len(self.data) and self.data[self.pos] == ' ': self.pos += 1 # Skip a single leading space, if present. start = self.pos @@ -278,8 +259,10 @@ def read_comment(self) -> str: if c == "\n": self.newline() break - return self.data[start:self.pos - 1 - start] - self.unexpected("unexpected '/'") + result = self.data[start:self.pos - 1 - start] + if not result: + self.unexpected("unexpected '/'") + return result def try_append_trailing_documentation(self, documentation: str) -> str: """ Search for a '/' character ignoring spaces and tabs.""" @@ -290,7 +273,7 @@ def try_append_trailing_documentation(self, documentation: str) -> str: if self.data[self.pos] == '/': self.pos += 1 break - """ Not a whitespace or comment-starting character. Return original documentation. """ + # Not a whitespace or comment-starting character. Return original documentation. return documentation bval = (self.pos < len(self.data) and (self.data[self.pos] == '/' or self.data[self.pos] == '*')) # Backtrack to start of comment. @@ -309,7 +292,7 @@ def try_append_trailing_documentation(self, documentation: str) -> str: end: int if is_star: - """ Consume star comment until it closes on the same line.""" + # Consume star comment until it closes on the same line. while True: self.expect(self.pos < len(self.data), "trailing comment must be closed") if self.data[self.pos] == '*' and self.pos + 1 < len(self.data) and self.data[self.pos + 1] == '/': @@ -317,8 +300,7 @@ def try_append_trailing_documentation(self, documentation: str) -> str: self.pos += 2 # Skip to the character after '/'. break self.pos += 1 - - """ Ensure nothing follows a trailing star comment.""" + # Ensure nothing follows a trailing star comment. while self.pos < len(self.data): self.pos += 1 c = self.data[self.pos] @@ -326,10 +308,10 @@ def try_append_trailing_documentation(self, documentation: str) -> str: self.newline() break - self.expect(c == " " or c == "\t", "no syntax may follow trailing comment") + self.expect(c in [" ", "\t"], "no syntax may follow trailing comment") else: - """ Consume comment until newline. """ + # Consume comment until newline. while True: if self.pos == len(self.data): end = self.pos - 1 @@ -340,8 +322,7 @@ def try_append_trailing_documentation(self, documentation: str) -> str: self.newline() end = self.pos - 2 # Account for stepping past the newline. break - - """ Remove trailing whitespace.""" + # Remove trailing whitespace. while end > start and (self.data[end] == " " or self.data[end] == "\t"): end -= 1 @@ -353,15 +334,13 @@ def try_append_trailing_documentation(self, documentation: str) -> str: return trailing_documentation return f"{documentation}\n{trailing_documentation}" - """ - Skips whitespace characters and optionally comments. When this returns, either - self.pos == self.data.length or a non-whitespace character. - """ - def skip_whitespace(self, skip_comments: bool): + """ Skips whitespace characters and optionally comments. When this returns, either + self.pos == self.data.length or a non-whitespace character. + """ while self.pos < len(self.data): c = self.data[self.pos] - if c == " " or c == "\t" or c == "\r" or c == "\n": + if c in [" ", "\t", "\r", "\n"]: self.pos += 1 if c == "\n": self.newline() @@ -370,9 +349,8 @@ def skip_whitespace(self, skip_comments: bool): return - """ Call this every time a '\n' is encountered. """ - def newline(self): + """ Call this every time a '\n' is encountered. """ self.line += 1 self.line_start = self.pos diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 245672259..318e4a72a 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -55,80 +55,13 @@ def append_indented(data: list, value: str): RESERVED_TAG_VALUE_START = 19000 RESERVED_TAG_VALUE_END = 19999 - """ True if the supplied value is in the valid tag range and not reserved. """ class MyInt(int): def is_valid_tag(self) -> bool: - return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or ( - RESERVED_TAG_VALUE_END + 1 <= self <= MAX_TAG_VALUE + 1) + return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or\ + (RESERVED_TAG_VALUE_END + 1 <= self <= MAX_TAG_VALUE + 1) builtins.int = MyInt - -# TODO: remove following text if not implemented - -""" internal expect fun Char.isDigit(): bool - -internal expect fun str.toEnglishLowerCase(): str - -expect interface MutableQueue : MutableCollection { - fun poll(): T? -} - -internal expect fun mutableQueueOf(): MutableQueue - -# TODO internal and friend for wire-compiler: https:#youtrack.jetbrains.com/issue/KT-34102 - - * Replace types in this schema which are present in [typesToStub] with empty shells that have no - * outward references. This has to be done in this module so that we can access the internal - * constructor to avoid re-linking. - -fun Schema.withStubs(typesToStub: Set): Schema { - if (typesToStub.isEmpty(): - return this - } - return Schema(protoFiles.map { protoFile -> - protoFile.copy( - types = protoFile.types.map { type -> - if (type.type in typesToStub) type.asStub() else type - }, - services = protoFile.services.map { service -> - if (service.type in typesToStub) service.asStub() else service - } - ) - }) -} - - Return a copy of this type with all possible type references removed. -private fun Type.asStub(): Type = when { - # Don't stub the built-in protobuf types which model concepts like options. - type.tostr().startsWith("google.protobuf.") -> this - - this is MessageType -> copy( - declaredFields = emptyList(), - extensionFields = mutableListOf(), - nestedTypes = nestedTypes.map { it.asStub() }, - options = Options(Options.MESSAGE_OPTIONS, emptyList()) - ) - - this is EnumType -> copy( - constants = emptyList(), - options = Options(Options.ENUM_OPTIONS, emptyList()) - ) - - this is EnclosingType -> copy( - nestedTypes = nestedTypes.map { it.asStub() } - ) - - else -> throw AssertionError("Unknown type $type") -} - - Return a copy of this service with all possible type references removed. -private fun Service.asStub() = copy( - rpcs = emptyList(), - options = Options(Options.SERVICE_OPTIONS, emptyList()) -) - -""" From 9ca84d8d14ad09f2f13d7b3ffec5b9d72d94e500 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 6 May 2021 11:57:53 +0300 Subject: [PATCH 012/168] fixup by @hackaugusto suggestions --- karapace/compatibility/__init__.py | 15 ++++++++++++--- karapace/protobuf/schema.py | 2 +- karapace/schema_reader.py | 11 ++++------- karapace/serialization.py | 2 +- tests/integration/test_client.py | 16 +--------------- tests/integration/test_client_protobuf.py | 18 ++++++++++++++++++ tests/schemas/protobuf.py | 11 +++++++++++ tests/utils.py | 4 ---- 8 files changed, 48 insertions(+), 31 deletions(-) create mode 100644 tests/integration/test_client_protobuf.py create mode 100644 tests/schemas/protobuf.py diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index f939ed342..218bf8403 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -139,11 +139,20 @@ def check_compatibility( writer=old_schema.schema, ) elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: - result = check_protobuf_compatibility(reader=old_schema.schema, writer=new_schema.schema) + result = check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ) elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: - result = check_protobuf_compatibility(reader=new_schema.schema, writer=old_schema.schema) - result = result.merged_with(check_protobuf_compatibility(reader=old_schema.schema, writer=new_schema.schema)) + result = check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + result = result.merged_with(check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + )) else: result = SchemaCompatibilityResult.incompatible( diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 05b66c698..e16951a23 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -4,5 +4,5 @@ class ProtobufSchema: def __init__(self, schema: str): self.schema = schema - def to_json(self): + def __str__(self) -> str: return self.schema diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 16156c459..d96a6b9ac 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -23,7 +23,6 @@ import json import logging -import sys import time log = logging.getLogger(__name__) @@ -89,8 +88,8 @@ def parse_protobuf(schema_str: str): try: return TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) # TypeError - Raised when the user forgets to encode the schema as a string. - except Exception as e: # FIXME: bare except - print("Unexpected error:", sys.exc_info()[0]) + except Exception as e: # FIXME: bare exception + log.exception("Unexpected error:") raise InvalidSchema from e @staticmethod @@ -108,18 +107,16 @@ def to_json(self): return self.schema.schema if isinstance(self.schema, AvroSchema): return self.schema.to_json(names=None) - if isinstance(self.schema, ProtobufSchema): - return self.schema.to_json() return self.schema def __str__(self) -> str: if isinstance(self.schema, ProtobufSchema): - return self.schema.to_json() + return str(self.schema) return json_encode(self.to_json(), compact=True) def __repr__(self): if isinstance(self.schema, ProtobufSchema): - return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" + return f"TypedSchema(type={self.schema_type}, schema={str(self)})" return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" def __eq__(self, other): diff --git a/karapace/serialization.py b/karapace/serialization.py index 4a0276620..9d57786e7 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -72,7 +72,7 @@ def __init__(self, schema_registry_url: str = "http://localhost:8081"): async def post_new_schema(self, subject: str, schema: TypedSchema) -> int: if schema.schema_type is SchemaType.PROTOBUF: - payload = {"schema": schema.to_json(), "schemaType": schema.schema_type.value} + payload = {"schema": str(schema), "schemaType": schema.schema_type.value} else: payload = {"schema": json_encode(schema.to_json()), "schemaType": schema.schema_type.value} result = await self.client.post(f"subjects/{quote(subject)}/versions", json=payload) diff --git a/tests/integration/test_client.py b/tests/integration/test_client.py index 89a0e8736..6ce07f85d 100644 --- a/tests/integration/test_client.py +++ b/tests/integration/test_client.py @@ -1,6 +1,6 @@ from karapace.schema_reader import SchemaType, TypedSchema from karapace.serialization import SchemaRegistryClient -from tests.utils import new_random_name, schema_avro_json, schema_protobuf_plain +from tests.utils import new_random_name, schema_avro_json async def test_remote_client(registry_async_client): @@ -15,17 +15,3 @@ async def test_remote_client(registry_async_client): stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_avro - - -async def test_remote_client_protobuf(registry_async_client): - schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) - reg_cli = SchemaRegistryClient() - reg_cli.client = registry_async_client - subject = new_random_name("subject") - sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) - assert sc_id >= 0 - stored_schema = await reg_cli.get_schema_for_id(sc_id) - assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" - stored_id, stored_schema = await reg_cli.get_latest_schema(subject) - assert stored_id == sc_id - assert stored_schema == schema_protobuf diff --git a/tests/integration/test_client_protobuf.py b/tests/integration/test_client_protobuf.py new file mode 100644 index 000000000..862991389 --- /dev/null +++ b/tests/integration/test_client_protobuf.py @@ -0,0 +1,18 @@ +from karapace.schema_reader import SchemaType, TypedSchema +from karapace.serialization import SchemaRegistryClient +from tests.schemas.protobuf import schema_protobuf_plain +from tests.utils import new_random_name + + +async def test_remote_client_protobuf(registry_async_client): + schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) + reg_cli = SchemaRegistryClient() + reg_cli.client = registry_async_client + subject = new_random_name("subject") + sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) + assert sc_id >= 0 + stored_schema = await reg_cli.get_schema_for_id(sc_id) + assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" + stored_id, stored_schema = await reg_cli.get_latest_schema(subject) + assert stored_id == sc_id + assert stored_schema == schema_protobuf diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py new file mode 100644 index 000000000..e8e1a0bb8 --- /dev/null +++ b/tests/schemas/protobuf.py @@ -0,0 +1,11 @@ +schema_protobuf_plain = """ +syntax = "proto3"; +package com.codingharbour.protobuf; + +option java_outer_classname = "SimpleMessageProtos"; +message SimpleMessage { + string content = 1; + string date_time = 2; + string content2 = 3; +} +""" diff --git a/tests/utils.py b/tests/utils.py index d194015f6..3a17cc931 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -42,10 +42,6 @@ }] }) -schema_protobuf_plain = "syntax = \"proto3\";\npackage com.codingharbour.protobuf;\n\noption java_outer_classname = \""\ - "SimpleMessageProtos\";\n\nmessage SimpleMessage {\n string content = 1;\n"\ - " string date_time = 2;\n string content2 = 3;\n}\n" - test_objects_jsonschema = [{"foo": 100}, {"foo": 200}] test_objects_avro = [ From adb145cd65f111d2ebf52f1ae25765260616386f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 19 May 2021 17:08:22 +0300 Subject: [PATCH 013/168] Ported first part of tests from Wire project. ProtoParser code debugged by this tests --- karapace/protobuf/enum_constant_element.py | 2 +- karapace/protobuf/enum_element.py | 4 +- karapace/protobuf/exception.py | 6 + karapace/protobuf/extend_element.py | 2 +- karapace/protobuf/extensions_element.py | 4 +- karapace/protobuf/field_element.py | 26 +- karapace/protobuf/kotlin_wrapper.py | 35 +- karapace/protobuf/location.py | 9 +- karapace/protobuf/message_element.py | 16 +- karapace/protobuf/option_element.py | 25 +- karapace/protobuf/option_reader.py | 4 +- karapace/protobuf/proto_file_element.py | 68 ++- karapace/protobuf/proto_parser.py | 93 +-- karapace/protobuf/proto_type.py | 70 +-- karapace/protobuf/reserved_document.py | 6 +- karapace/protobuf/syntax.py | 5 + karapace/protobuf/syntax_reader.py | 63 +- karapace/protobuf/type_element.py | 8 + karapace/protobuf/utils.py | 19 +- tests/unit/test_proto_parser.py | 670 +++++++++++++++++++++ 20 files changed, 940 insertions(+), 195 deletions(-) create mode 100644 tests/unit/test_proto_parser.py diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index b04e343a8..b071c11b8 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -7,7 +7,7 @@ class EnumConstantElement: name: str tag: int documentation: str - options: list = list() + options: list = [] def __init__( self, diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 05b1e5fa2..7e346ee45 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -4,7 +4,7 @@ class EnumElement(TypeElement): - constants: list = list() + constants: list = [] def __init__(self, location: Location, name: str, documentation: str, options: list, constants: list): self.location = location @@ -13,7 +13,7 @@ def __init__(self, location: Location, name: str, documentation: str, options: l self.options = options self.constants = constants # Enums do not allow nested type declarations. - self.nested_types = list() + self.nested_types = [] def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index b74e1efe2..4a0a337fa 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -12,6 +12,12 @@ def __init__(self, message="IllegalStateException"): super().__init__(self.message) +class IllegalArgumentException(Exception): + def __init__(self, message="IllegalArgumentException"): + self.message = message + super().__init__(self.message) + + class Error(Exception): """Base class for errors in this module.""" diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index 0aecd8e8a..69be4771f 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -21,7 +21,7 @@ def to_schema(self): if self.fields: result.append("\n") for field in self.fields: - append_indented(result, field.to_schema_declaration()) + append_indented(result, field.to_schema()) result.append("}\n") return result diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index c7be3e7bb..72ebcd90d 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -23,10 +23,10 @@ def to_schema(self) -> str: value = self.values[index] if index > 0: result.append(", ") - if value is int: + if isinstance(value, int): result.append(value) # TODO: maybe replace Kotlin IntRange by list? - elif value is IntRange: + elif isinstance(value, IntRange): result.append(f"{value[0]} to ") last_value = value[len(value) - 1] if last_value < MAX_TAG_VALUE: diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 10b80722d..3fe85fa1d 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -3,7 +3,6 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.utils import append_documentation, append_options -from typing import Union class FieldElement: @@ -15,11 +14,19 @@ class FieldElement: json_name: str = None tag: int = 0 documentation: str = "" - options: list = list() + options: list = [] def __init__( - self, location: Location, label: Union[None, Field.Label], element_type: str, name: str, default_value: str, - json_name: str, tag: int, documentation: str, options: list + self, + location: Location, + label: Field.Label = None, + element_type: str = None, + name: str = None, + default_value: str = None, + json_name: str = None, + tag: int = None, + documentation: str = None, + options: list = None ): self.location = location self.label = label @@ -29,14 +36,17 @@ def __init__( self.json_name = json_name self.tag = tag self.documentation = documentation - self.options = options + if not options: + self.options = [] + else: + self.options = options def to_schema(self): result: list = list() append_documentation(result, self.documentation) if self.label: - result.append(f"{self.label.name.to_english_lower_case()} ") + result.append(f"{self.label.name.lower()} ") result.append(f"{self.element_type} {self.name} = {self.tag}") @@ -44,7 +54,9 @@ def to_schema(self): if options_with_default: result.append(' ') append_options(result, options_with_default) - result.append(";\n") + result.append(";\n") + + return "".join(result) def options_with_special_values(self) -> list: """ Both `default` and `json_name` are defined in the schema like options but they are actually diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index e88b762c8..e1dc9eb5a 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -1,8 +1,31 @@ +from karapace.protobuf.exception import IllegalArgumentException, IllegalStateException + + def check(q: bool, message: str): if not q: raise IllegalStateException(message) +def trim_margin(s: str) -> str: + lines = s.split("\n") + new_lines = list() + + for line in lines: + idx = line.find("|") + if idx < 0: + new_lines.append(line) + else: + new_lines.append(line[idx + 1:].rstrip()) + + if not new_lines[0].strip(): + del new_lines[0] + + if not new_lines[-1].strip(): + del new_lines[-1] + + return "\n".join(new_lines) + + def require(q: bool, message: str): if not q: raise IllegalArgumentException(message) @@ -13,22 +36,10 @@ def options_to_list(a: list) -> list: return a -class IllegalStateException(Exception): - def __init__(self, message="IllegalStateException"): - self.message = message - super().__init__(self.message) - - class IntRange(list): pass -class IllegalArgumentException(Exception): - def __init__(self, message="IllegalArgumentException"): - self.message = message - super().__init__(self.message) - - class String(str): pass diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index 9abeb2985..55fe1f999 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -37,10 +37,10 @@ def __str__(self) -> str: if self.line != -1: result += ":" - result += self.line + result += str(self.line) if self.column != -1: result += ":" - result += self.column + result += str(self.column) return result @@ -55,7 +55,6 @@ def get(*args): base: str = args[0] if base.endswith("/"): base = base[:-1] - result = Location(base, path) - else: - raise Exception() + result = Location(base, path) + return result diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index d6f2c1e69..a954cc693 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -14,14 +14,14 @@ def __init__( self, location: Location, name: str, - documentation: str, - nested_types: list, - options: list, - reserveds: list, - fields: list, - one_ofs: list, - extensions: list, - groups: list, + documentation: str = None, + nested_types: list = None, + options: list = None, + reserveds: list = None, + fields: list = None, + one_ofs: list = None, + extensions: list = None, + groups: list = None, ): self.location = location self.name = name diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 626bf28ab..63c8f02ed 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -24,7 +24,7 @@ class Kind(Enum): """ If true, this [OptionElement] is a custom option. """ is_parenthesized: bool - def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool): + def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool = None): self.name = name self.kind = kind self.value = value @@ -32,15 +32,18 @@ def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool): self.formattedName = f"({self.name})" if is_parenthesized else self.name def to_schema(self) -> str: - aline = { - self.kind == self.Kind.STRING: f"{self.formattedName} = \"{self.value}\"", - self.kind in [self.Kind.BOOLEAN, self.Kind.NUMBER, self.Kind.ENUM]: f"{self.formattedName} = {self.value}", - self.kind == self.Kind.OPTION: f"{self.formattedName}.{self.value.to_schema()}", - self.kind == self.Kind.MAP: list([f"{self.formattedName} = {{\n", - self.format_option_map(self.value), "}"]), - self.kind == self.Kind.LIST: list([f"{self.formattedName} = ", - self.append_options(self.value)]) - }[True] + aline = None + if self.kind == self.Kind.STRING: + aline = f"{self.formattedName} = \"{self.value}\"" + elif self.kind in [self.Kind.BOOLEAN, self.Kind.NUMBER, self.Kind.ENUM]: + aline = f"{self.formattedName} = {self.value}" + elif self.kind == self.Kind.OPTION: + aline = f"{self.formattedName}.{self.value.to_schema()}" + elif self.kind == self.Kind.MAP: + aline = [f"{self.formattedName} = {{\n", self.format_option_map(self.value), "}"] + elif self.kind == self.Kind.LIST: + aline = [f"{self.formattedName} = ", self.append_options(self.value)] + if isinstance(aline, list): return "".join(aline) return aline @@ -60,7 +63,7 @@ def append_options(options: list): data.append("[\n") for i in range(0, count): - if i < count - 1: + if i < count: endl = "," else: endl = "" diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index 509de9508..5f63c5f71 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -9,7 +9,7 @@ class KindAndValue: def __init__(self, kind: OptionElement.Kind, value: object): self.kind = kind - self.valuer = value + self.value = value class OptionReader: @@ -104,7 +104,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) option = self.read_option(key_value_separator) name = option.name value = option.value - if value is OptionElement: + if isinstance(value, OptionElement): nested = result[name] if not nested: nested = dict() diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index bbe0b45bf..86d2b8f00 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -14,29 +14,29 @@ class ProtoFileElement: options: list def __init__( - self, - location: Location, - package_name: str = None, - syntax: Syntax = None, - imports=None, - public_imports=None, - types=None, - services=None, - extend_declarations=None, - options=None + self, + location: Location, + package_name: str = None, + syntax: Syntax = None, + imports=None, + public_imports=None, + types=None, + services=None, + extend_declarations=None, + options=None ): - if options is None: - options = list() - if extend_declarations is None: - extend_declarations = list() - if services is None: - services = list() - if types is None: - types = list() - if public_imports is None: + if not options: + options = [] + if not extend_declarations: + extend_declarations = [] + if not services: + services = [] + if not types: + types = [] + if not public_imports: public_imports = [] - if imports is None: + if not imports: imports = [] self.location = location self.package_name = package_name @@ -80,20 +80,42 @@ def to_schema(self): if self.types: for type_element in self.types: strings.append("\n") - strings.append(str(type_element.to_schema)) + strings.append(str(type_element.to_schema())) if self.extend_declarations: for extend_declaration in self.extend_declarations: strings.append("\n") - strings.append(extend_declaration.to_schema()) + strings.append(str(extend_declaration.to_schema())) if self.services: for service in self.services: strings.append("\n") - strings.append(str(service.to_schema)) + strings.append(str(service.to_schema())) return "".join(strings) @staticmethod def empty(path): return ProtoFileElement(Location.get(path)) + + # TODO: there maybe be faster comparison workaround + def __eq__(self, other: 'ProtoFileElement'): + a = self.to_schema() + b = other.to_schema() + # sys.stderr.write("\n\nTESTA=[") + # sys.stderr.write(a) + # sys.stderr.write("]\n\nTESTB=[") + # sys.stderr.write(b) + # sys.stderr.write("]\n\n") + + return a == b + + # return str(self.location) == str(other.location) and \ + # self.package_name == other.package_name and \ + # str(self.syntax) == str(other.syntax) and \ + # str(self.imports) == str(other.imports) and \ + # str(self.public_imports) == str(self.public_imports) and \ + # str(self.types) == str(self.types) and \ + # str(self.services) == str(self.services) and \ + # str(self.extend_declarations) == str(self.extend_declarations) and \ + # str(self.options) == str(self.options) diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 6c3eadd0e..5376cb857 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -2,7 +2,7 @@ from enum import Enum from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement -from karapace.protobuf.exception import error +from karapace.protobuf.exception import error, IllegalArgumentException from karapace.protobuf.extend_element import ExtendElement from karapace.protobuf.extensions_element import ExtensionsElement from karapace.protobuf.field import Field @@ -67,18 +67,31 @@ def permits_extend(self) -> bool: class ProtoParser: location: Location reader: SyntaxReader - public_imports: list - imports: list - nested_types: list - services: list - extends_list: list - options: list + public_imports: list = [] + imports: list = [] + nested_types: list = [] + services: list = [] + extends_list: list = [] + options: list = [] declaration_count: int = 0 syntax: Syntax = None package_name: str = None prefix: str = "" + data: str def __init__(self, location: Location, data: str): + self.location = location + self.imports = [] + self.nested_types = [] + self.services = [] + self.extends_list = [] + self.options = [] + self.declaration_count = 0 + self.syntax = None + self.package_name = None + self.prefix = "" + self.data = data + self.public_imports = [] self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: @@ -91,27 +104,28 @@ def read_proto_file(self) -> ProtoFileElement: ) declaration = self.read_declaration(documentation, Context.FILE) if isinstance(declaration, TypeElement): - # TODO: must add check for exception + # TODO: add check for exception? duplicate = next((x for x in iter(self.nested_types) if x.name == declaration.name), None) if duplicate: error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") self.nested_types.append(declaration) - if isinstance(declaration, ServiceElement): + elif isinstance(declaration, ServiceElement): duplicate = next((x for x in iter(self.services) if x.name == declaration.name), None) if duplicate: error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") self.services.append(declaration) - if isinstance(declaration, OptionElement): + elif isinstance(declaration, OptionElement): self.options.append(declaration) - if isinstance(declaration, ExtendElement): + elif isinstance(declaration, ExtendElement): self.extends_list.append(declaration) def read_declaration(self, documentation: str, context: Context): - self.declaration_count += 1 + index = self.declaration_count + self.declaration_count += 1 # Skip unnecessary semicolons, occasionally used after a nested message declaration. if self.reader.peek_char(';'): @@ -146,10 +160,11 @@ def read_declaration(self, documentation: str, context: Context): syntax_string = self.reader.read_quoted_string() try: - Syntax(syntax_string) - except ValueError as e: + self.syntax = Syntax(syntax_string) + except IllegalArgumentException as e: self.reader.unexpected(str(e), location) self.reader.require(";") + result = None elif label == "option": result = OptionReader(self.reader).read_option("=") self.reader.require(";") @@ -174,22 +189,19 @@ def read_declaration(self, documentation: str, context: Context): elif context == Context.ENUM: result = self.read_enum_constant(documentation, location, label) else: - result = None - - if not result: - self.reader.unexpected("unexpected label: $label", location) + self.reader.unexpected(f"unexpected label: {label}", location) return result def read_message(self, location: Location, documentation: str) -> MessageElement: """ Reads a message declaration. """ name: str = self.reader.read_name() - fields: list = list() - one_ofs: list = list() - nested_types: list = list() - extensions: list = list() - options: list = list() - reserveds: list = list() - groups: list = list() + fields: list = [] + one_ofs: list = [] + nested_types: list = [] + extensions: list = [] + options: list = [] + reserveds: list = [] + groups: list = [] previous_prefix = self.prefix self.prefix = f"{self.prefix}{name}." @@ -245,7 +257,7 @@ def read_extend(self, location: Location, documentation: str) -> ExtendElement: break declared = self.read_declaration(nested_documentation, Context.EXTEND) - if declared is FieldElement: + if isinstance(declared, FieldElement): fields.append(declared) # TODO: add else clause to catch unexpected declarations. else: @@ -269,9 +281,9 @@ def read_service(self, location: Location, documentation: str) -> ServiceElement if self.reader.peek_char("}"): break declared = self.read_declaration(rpc_documentation, Context.SERVICE) - if declared is RpcElement: + if isinstance(declared, RpcElement): rpcs.append(declared) - elif declared is OptionElement: + elif isinstance(declared, OptionElement): options.append(declared) # TODO: add else clause to catch unexpected declarations. else: @@ -298,14 +310,13 @@ def read_enum_element(self, location: Location, documentation: str) -> EnumEleme break declared = self.read_declaration(value_documentation, Context.ENUM) - if declared is EnumConstantElement: - constants.append(declared) - elif declared is OptionElement: - options.append(declared) - # TODO: add else clause to catch unexpected declarations. - else: - pass - + if isinstance(declared, EnumConstantElement): + constants.append(declared) + elif isinstance(declared, OptionElement): + options.append(declared) + # TODO: add else clause to catch unexpected declarations. + else: + pass return EnumElement(location, name, documentation, options, constants) def read_field(self, documentation: str, location: Location, word: str): @@ -333,10 +344,8 @@ def read_field(self, documentation: str, location: Location, word: str): label = None atype = self.reader.read_data_type_by_name(word) - self.reader.expect_with_location( - not atype.startswith("map<") or not label, location, "'map' atype cannot have label" - ) - if atype == "group ": + self.reader.expect_with_location(not atype.startswith("map<") or not label, location, "'map' type cannot have label") + if atype == "group": return self.read_group(location, documentation, label) return self.read_field_with_label(location, documentation, label, atype) @@ -439,7 +448,7 @@ def read_group( field_location = self.reader.location() field_label = self.reader.read_word() field = self.read_field(nested_documentation, field_location, field_label) - if field is FieldElement: + if isinstance(field, FieldElement): fields.append(field) else: self.reader.unexpected(f"expected field declaration, was {field}") @@ -565,7 +574,7 @@ def read_rpc(self, location: Location, documentation: str) -> RpcElement: break declared = self.read_declaration(rpc_documentation, Context.RPC) - if declared is OptionElement: + if isinstance(declared, OptionElement): options.append(declared) # TODO: add else clause to catch unexpected declarations. else: diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index e8c92cff9..5228b2803 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -26,42 +26,42 @@ class ProtoType: @property def simple_name(self) -> str: dot = self.string.rfind(".") - return self.string[dot + 1] + return self.string[dot + 1:] @classmethod def static_init(cls): - cls.BOOL = ProtoType(True, "bool") - cls.BYTES = ProtoType(True, "bytes") - cls.DOUBLE = ProtoType(True, "double") - cls.FLOAT = ProtoType(True, "float") - cls.FIXED32 = ProtoType(True, "fixed32") - cls.FIXED64 = ProtoType(True, "fixed64") - cls.INT32 = ProtoType(True, "int32") - cls.INT64 = ProtoType(True, "int64") - cls.SFIXED32 = ProtoType(True, "sfixed32") - cls.SFIXED64 = ProtoType(True, "sfixed64") - cls.SINT32 = ProtoType(True, "sint32") - cls.SINT64 = ProtoType(True, "sint64") - cls.STRING = ProtoType(True, "string") - cls.UINT32 = ProtoType(True, "uint32") - cls.UINT64 = ProtoType(True, "uint64") - cls.ANY = ProtoType(False, "google.protobuf.Any") - cls.DURATION = ProtoType(False, "google.protobuf.Duration") - cls.TIMESTAMP = ProtoType(False, "google.protobuf.Timestamp") - cls.EMPTY = ProtoType(False, "google.protobuf.Empty") - cls.STRUCT_MAP = ProtoType(False, "google.protobuf.Struct") - cls.STRUCT_VALUE = ProtoType(False, "google.protobuf.Value") - cls.STRUCT_NULL = ProtoType(False, "google.protobuf.NullValue") - cls.STRUCT_LIST = ProtoType(False, "google.protobuf.ListValue") - cls.DOUBLE_VALUE = ProtoType(False, "google.protobuf.DoubleValue") - cls.FLOAT_VALUE = ProtoType(False, "google.protobuf.FloatValue") - cls.INT64_VALUE = ProtoType(False, "google.protobuf.Int64Value") - cls.UINT64_VALUE = ProtoType(False, "google.protobuf.UInt64Value") - cls.INT32_VALUE = ProtoType(False, "google.protobuf.Int32Value") - cls.UINT32_VALUE = ProtoType(False, "google.protobuf.UInt32Value") - cls.BOOL_VALUE = ProtoType(False, "google.protobuf.BoolValue") - cls.STRING_VALUE = ProtoType(False, "google.protobuf.StringValue") - cls.BYTES_VALUE = ProtoType(False, "google.protobuf.BytesValue") + cls.BOOL = cls(True, "bool") + cls.BYTES = cls(True, "bytes") + cls.DOUBLE = cls(True, "double") + cls.FLOAT = cls(True, "float") + cls.FIXED32 = cls(True, "fixed32") + cls.FIXED64 = cls(True, "fixed64") + cls.INT32 = cls(True, "int32") + cls.INT64 = cls(True, "int64") + cls.SFIXED32 = cls(True, "sfixed32") + cls.SFIXED64 = cls(True, "sfixed64") + cls.SINT32 = cls(True, "sint32") + cls.SINT64 = cls(True, "sint64") + cls.STRING = cls(True, "string") + cls.UINT32 = cls(True, "uint32") + cls.UINT64 = cls(True, "uint64") + cls.ANY = cls(False, "google.protobuf.Any") + cls.DURATION = cls(False, "google.protobuf.Duration") + cls.TIMESTAMP = cls(False, "google.protobuf.Timestamp") + cls.EMPTY = cls(False, "google.protobuf.Empty") + cls.STRUCT_MAP = cls(False, "google.protobuf.Struct") + cls.STRUCT_VALUE = cls(False, "google.protobuf.Value") + cls.STRUCT_NULL = cls(False, "google.protobuf.NullValue") + cls.STRUCT_LIST = cls(False, "google.protobuf.ListValue") + cls.DOUBLE_VALUE = cls(False, "google.protobuf.DoubleValue") + cls.FLOAT_VALUE = cls(False, "google.protobuf.FloatValue") + cls.INT64_VALUE = cls(False, "google.protobuf.Int64Value") + cls.UINT64_VALUE = cls(False, "google.protobuf.UInt64Value") + cls.INT32_VALUE = cls(False, "google.protobuf.Int32Value") + cls.UINT32_VALUE = cls(False, "google.protobuf.UInt32Value") + cls.BOOL_VALUE = cls(False, "google.protobuf.BoolValue") + cls.STRING_VALUE = cls(False, "google.protobuf.StringValue") + cls.BYTES_VALUE = cls(False, "google.protobuf.BytesValue") cls.SCALAR_TYPES_ = [ cls.BOOL, cls.BYTES, cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, @@ -80,7 +80,7 @@ def static_init(cls): def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None): """ Creates a scalar or message type. """ - if key_type is None and value_type is None: + if not key_type and not value_type: self.is_scalar = is_scalar self.string = string self.is_map = False @@ -95,7 +95,7 @@ def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None) self.value_type = value_type else: # TODO: must be IllegalArgumentException - raise Exception("map key must be non-byte, non-floating point scalar: $key_type") + raise Exception(f"map key must be non-byte, non-floating point scalar: {key_type}") def to_kind(self) -> OptionElement.Kind: return { diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 0e8bf9e96..8f116891f 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -24,11 +24,11 @@ def to_schema(self) -> str: if index > 0: result.append(", ") - if value is str: + if isinstance(value, str): result.append(f"\"{value}\"") - elif value is int: + elif isinstance(value, int): result.append(f"{value}") - elif value is IntRange: + elif isinstance(value, IntRange): last_index = len(value) - 1 result.append(f"{value[0]} to {value[last_index]}") else: diff --git a/karapace/protobuf/syntax.py b/karapace/protobuf/syntax.py index c43c86449..c8bba80e5 100644 --- a/karapace/protobuf/syntax.py +++ b/karapace/protobuf/syntax.py @@ -1,6 +1,11 @@ from enum import Enum +from karapace.protobuf.exception import IllegalArgumentException class Syntax(Enum): PROTO_2 = "proto2" PROTO_3 = "proto3" + + @classmethod + def _missing_(cls, string): + raise IllegalArgumentException(f"unexpected syntax: {string}") diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index bc603cc63..dca8b51f3 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -1,13 +1,13 @@ -from karapace.protobuf.exception import IllegalStateException, ProtobufParserRuntimeException +from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.location import Location def hex_digit(c: str) -> int: - if ord(c) in range(ord('0'), ord('9')): + if ord(c) in range(ord('0'), ord('9') + 1): return ord(c) - ord('0') - if ord(c) in range(ord('a'), ord('f')): + if ord(c) in range(ord('a'), ord('f') + 1): return ord('a') + 10 - if ord(c) in range(ord('A'), ord('F')): + if ord(c) in range(ord('A'), ord('F') + 1): return ord(c) - ord('A') + 10 return -1 @@ -27,6 +27,9 @@ class SyntaxReader: line_start: int = 0 def __init__(self, data: str, location: Location): + self.pos = 0 + self.line = 0 + self.line_start = 0 self.data = data self._location = location @@ -53,6 +56,7 @@ def peek_char(self, ch: str = None): if self.peek_char() == ch: self.pos += 1 return True + return False self.skip_whitespace(True) self.expect(self.pos < len(self.data), "unexpected end of file") return self.data[self.pos] @@ -77,8 +81,8 @@ def read_quoted_string(self) -> str: result: list = [] while self.pos < len(self.data): - self.pos += 1 c = self.data[self.pos] + self.pos += 1 if c == start_quote: if self.peek_char() == '"' or self.peek_char() == "'": # Adjacent strings are concatenated. Consume new quote and continue reading. @@ -87,9 +91,8 @@ def read_quoted_string(self) -> str: return "".join(result) if c == "\\": self.expect(self.pos < len(self.data), "unexpected end of file") - self.pos += 1 c = self.data[self.pos] - + self.pos += 1 d: str = { 'a': "\u0007", # Alert. 'b': "\b", # Backspace. @@ -104,7 +107,7 @@ def read_quoted_string(self) -> str: else: if c in ['x', 'X']: c = self.read_numeric_escape(16, 2) - elif ord(c) in range(ord('0'), ord('7')): + elif ord(c) in range(ord('0'), ord('7') + 1): self.pos -= 1 c = self.read_numeric_escape(8, 3) @@ -172,14 +175,14 @@ def read_word(self) -> str: start = self.pos while self.pos < len(self.data): c = self.data[self.pos] - if ord(c) in range(ord('a'), ord('z')) \ - or ord(c) in range(ord('A'), ord('Z')) \ - or ord(c) in range(ord('0'), ord('9')) or c in ['_', '-', '.']: + if ord(c) in range(ord('a'), ord('z') + 1) \ + or ord(c) in range(ord('A'), ord('Z') + 1) \ + or ord(c) in range(ord('0'), ord('9') + 1) or c in ['_', '-', '.']: self.pos += 1 else: break self.expect(start < self.pos, "expected a word") - return self.data[start:self.pos - start] + return self.data[start:self.pos] def read_int(self) -> int: """ Reads an integer and returns it. """ @@ -210,7 +213,7 @@ def read_documentation(self) -> str: if result: result = f"{result}\n{comment}" else: - result = "$result\n$comment" + result = f"{comment}" def read_comment(self) -> str: """ Reads a comment and returns its body. """ @@ -220,8 +223,8 @@ def read_comment(self) -> str: self.pos += 1 tval = -1 if self.pos < len(self.data): - self.pos += 1 tval = ord(self.data[self.pos]) + self.pos += 1 result: str = "" if tval == ord('*'): buffer: list = list() @@ -254,12 +257,12 @@ def read_comment(self) -> str: self.pos += 1 # Skip a single leading space, if present. start = self.pos while self.pos < len(self.data): - self.pos += 1 c = self.data[self.pos] + self.pos += 1 if c == "\n": self.newline() break - result = self.data[start:self.pos - 1 - start] + result = self.data[start:self.pos - 1] if not result: self.unexpected("unexpected '/'") return result @@ -269,15 +272,15 @@ def try_append_trailing_documentation(self, documentation: str) -> str: while self.pos < len(self.data): if self.data[self.pos] in [' ', "\t"]: self.pos += 1 - - if self.data[self.pos] == '/': + elif self.data[self.pos] == '/': self.pos += 1 break - # Not a whitespace or comment-starting character. Return original documentation. - return documentation + else: + # Not a whitespace or comment-starting character. Return original documentation. + return documentation bval = (self.pos < len(self.data) and (self.data[self.pos] == '/' or self.data[self.pos] == '*')) - # Backtrack to start of comment. if not bval: + # Backtrack to start of comment. self.pos -= 1 self.expect(bval, "expected '//' or '/*'") is_star = self.data[self.pos] == '*' @@ -302,8 +305,8 @@ def try_append_trailing_documentation(self, documentation: str) -> str: self.pos += 1 # Ensure nothing follows a trailing star comment. while self.pos < len(self.data): - self.pos += 1 c = self.data[self.pos] + self.pos += 1 if c == "\n": self.newline() break @@ -316,12 +319,13 @@ def try_append_trailing_documentation(self, documentation: str) -> str: if self.pos == len(self.data): end = self.pos - 1 break - self.pos += 1 c = self.data[self.pos] + self.pos += 1 if c == "\n": self.newline() end = self.pos - 2 # Account for stepping past the newline. break + # Remove trailing whitespace. while end > start and (self.data[end] == " " or self.data[end] == "\t"): end -= 1 @@ -329,8 +333,8 @@ def try_append_trailing_documentation(self, documentation: str) -> str: if end == start: return documentation - trailing_documentation = self.data[start:end - start + 1] - if not documentation.strip: + trailing_documentation = self.data[start:end + 1] + if not documentation.strip(): return trailing_documentation return f"{documentation}\n{trailing_documentation}" @@ -344,10 +348,10 @@ def skip_whitespace(self, skip_comments: bool): self.pos += 1 if c == "\n": self.newline() - if skip_comments and c == "/": + elif skip_comments and c == "/": self.read_comment() - - return + else: + return def newline(self): """ Call this every time a '\n' is encountered. """ @@ -369,4 +373,5 @@ def expect_with_location(self, condition: bool, location: Location, message: str def unexpected(self, message: str, location: Location = None): if not location: location = self.location() - raise ProtobufParserRuntimeException(f"Syntax error in {str(location)}: {message}") + w = f"Syntax error in {str(location)}: {message}" + raise IllegalStateException(w) diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index b676250d2..5181154e2 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -10,3 +10,11 @@ class TypeElement: def to_schema(self) -> str: pass + + def __repr__(self): + mytype = type(self) + return f"{mytype}({self.to_schema()})" + + def __str__(self): + mytype = type(self) + return f"{mytype}({self.to_schema()})" diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 318e4a72a..a07064bce 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -1,6 +1,3 @@ -import builtins - - def protobuf_encode(a: str) -> str: # TODO: PROTOBUF return a @@ -31,7 +28,7 @@ def append_options(data: list, options: list): data.append("[\n") for i in range(0, count): - if i < count - 1: + if i < count: endl = "," else: endl = "" @@ -42,7 +39,7 @@ def append_options(data: list, options: list): def append_indented(data: list, value: str): lines = value.split("\n") if len(lines) > 1 and not lines[-1]: - lines = lines.pop() + del lines[-1] for line in lines: data.append(" ") @@ -57,11 +54,9 @@ def append_indented(data: list, value: str): RESERVED_TAG_VALUE_END = 19999 """ True if the supplied value is in the valid tag range and not reserved. """ +# class MyInt(int): +# def is_valid_tag(self) -> bool: +# return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or\ +# (RESERVED_TAG_VALUE_END + 1 <= self <= MAX_TAG_VALUE + 1) -class MyInt(int): - def is_valid_tag(self) -> bool: - return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or\ - (RESERVED_TAG_VALUE_END + 1 <= self <= MAX_TAG_VALUE + 1) - - -builtins.int = MyInt +# builtins.int = MyInt diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py new file mode 100644 index 000000000..ac34e4372 --- /dev/null +++ b/tests/unit/test_proto_parser.py @@ -0,0 +1,670 @@ +from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.exception import IllegalStateException +from karapace.protobuf.extend_element import ExtendElement +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.proto_parser import ProtoParser +from karapace.protobuf.syntax import Syntax + +import unittest + + +class ProtoParserTest(unittest.TestCase): + location: Location = Location.get("file.proto") + + def test_type_parsing(self, ): + proto: str = """ + |message Types { + | required any f1 = 1; + | required bool f2 = 2; + | required bytes f3 = 3; + | required double f4 = 4; + | required float f5 = 5; + | required fixed32 f6 = 6; + | required fixed64 f7 = 7; + | required int32 f8 = 8; + | required int64 f9 = 9; + | required sfixed32 f10 = 10; + | required sfixed64 f11 = 11; + | required sint32 f12 = 12; + | required sint64 f13 = 13; + | required string f14 = 14; + | required uint32 f15 = 15; + | required uint64 f16 = 16; + | map f17 = 17; + | map f18 = 18; + | required arbitrary f19 = 19; + | required nested.nested f20 = 20; + |} + """ + proto: str = trim_margin(proto) + + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="Types", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="any", + name="f1", + tag=1 + ), + FieldElement( + location=self.location.at(3, 3), + label=Field.Label.REQUIRED, + element_type="bool", + name="f2", + tag=2 + ), + FieldElement( + location=self.location.at(4, 3), + label=Field.Label.REQUIRED, + element_type="bytes", + name="f3", + tag=3 + ), + FieldElement( + location=self.location.at(5, 3), + label=Field.Label.REQUIRED, + element_type="double", + name="f4", + tag=4 + ), + FieldElement( + location=self.location.at(6, 3), + label=Field.Label.REQUIRED, + element_type="float", + name="f5", + tag=5 + ), + FieldElement( + location=self.location.at(7, 3), + label=Field.Label.REQUIRED, + element_type="fixed32", + name="f6", + tag=6 + ), + FieldElement( + location=self.location.at(8, 3), + label=Field.Label.REQUIRED, + element_type="fixed64", + name="f7", + tag=7 + ), + FieldElement( + location=self.location.at(9, 3), + label=Field.Label.REQUIRED, + element_type="int32", + name="f8", + tag=8 + ), + FieldElement( + location=self.location.at(10, 3), + label=Field.Label.REQUIRED, + element_type="int64", + name="f9", + tag=9 + ), + FieldElement( + location=self.location.at(11, 3), + label=Field.Label.REQUIRED, + element_type="sfixed32", + name="f10", + tag=10 + ), + FieldElement( + location=self.location.at(12, 3), + label=Field.Label.REQUIRED, + element_type="sfixed64", + name="f11", + tag=11 + ), + FieldElement( + location=self.location.at(13, 3), + label=Field.Label.REQUIRED, + element_type="sint32", + name="f12", + tag=12 + ), + FieldElement( + location=self.location.at(14, 3), + label=Field.Label.REQUIRED, + element_type="sint64", + name="f13", + tag=13 + ), + FieldElement( + location=self.location.at(15, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="f14", + tag=14 + ), + FieldElement( + location=self.location.at(16, 3), + label=Field.Label.REQUIRED, + element_type="uint32", + name="f15", + tag=15 + ), + FieldElement( + location=self.location.at(17, 3), + label=Field.Label.REQUIRED, + element_type="uint64", + name="f16", + tag=16 + ), + FieldElement(location=self.location.at(18, 3), element_type="map", name="f17", tag=17), + FieldElement( + location=self.location.at(19, 3), + element_type="map", + name="f18", + tag=18 + ), + FieldElement( + location=self.location.at(20, 3), + label=Field.Label.REQUIRED, + element_type="arbitrary", + name="f19", + tag=19 + ), + FieldElement( + location=self.location.at(21, 3), + label=Field.Label.REQUIRED, + element_type="nested.nested", + name="f20", + tag=20 + ) + ] + ) + ] + ) + my = ProtoParser.parse(self.location, proto) + self.assertEqual(my, expected) + + def test_map_with_label_throws(self): + with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:15: 'map' type cannot have label"): + ProtoParser.parse(self.location, "message Hey { required map a = 1; }") + self.fail() + + with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:15: 'map' type cannot have label"): + ProtoParser.parse(self.location, "message Hey { optional map a = 1; }") + self.fail() + + with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:15: 'map' type cannot have label"): + ProtoParser.parse(self.location, "message Hey { repeated map a = 1; }") + self.fail() + + def test_default_field_option_is_special(self): + """ It looks like an option, but 'default' is special. It's not defined as an option. + """ + proto = """ + |message Message { + | required string a = 1 [default = "b", faulted = "c"]; + |} + |""" + + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="Message", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="a", + default_value="b", + options=[OptionElement("faulted", OptionElement.Kind.STRING, "c")], + tag=1 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_json_name_option_is_special(self): + """ It looks like an option, but 'json_name' is special. It's not defined as an option. + """ + proto = """ + |message Message { + | required string a = 1 [json_name = "b", faulted = "c"]; + |} + |""" + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="Message", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="a", + json_name="b", + tag=1, + options=[OptionElement("faulted", OptionElement.Kind.STRING, "c")] + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_single_line_comment(self): + proto = """ + |// Test all the things! + |message Test {} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, "Test all the things!") + + def test_multiple_single_line_comments(self): + proto = """ + |// Test all + |// the things! + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test all + |the things! + """ + expected = trim_margin(expected) + + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, expected) + + def test_single_line_javadoc_comment(self): + proto = """ + |/** Test */ + |message Test {} + |""" + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, "Test") + + def test_multiline_javadoc_comment(self): + proto = """ + |/** + | * Test + | * + | * Foo + | */ + |message Test {} + |""" + proto = trim_margin(proto) + expected = """ + |Test + | + |Foo + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, expected) + + def test_multiple_single_line_comments_with_leading_whitespace(self): + proto = """ + |// Test + |// All + |// The + |// Things! + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test + | All + | The + | Things! + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, expected) + + def test_multiline_javadoc_comment_with_leading_whitespace(self): + proto = """ + |/** + | * Test + | * All + | * The + | * Things! + | */ + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test + | All + | The + | Things! + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, expected) + + def test_multiline_javadoc_comment_without_leading_asterisks(self): + # We do not honor leading whitespace when the comment lacks leading asterisks. + proto = """ + |/** + | Test + | All + | The + | Things! + | */ + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test + |All + |The + |Things! + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(self.location, proto) + element_type = parsed.types[0] + self.assertEqual(element_type.documentation, expected) + + def test_message_field_trailing_comment(self): + # Trailing message field comment. + proto = """ + |message Test { + | optional string name = 1; // Test all the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + message: MessageElement = parsed.types[0] + field = message.fields[0] + self.assertEqual(field.documentation, "Test all the things!") + + def test_message_field_leading_and_trailing_comment_are_combined(self): + proto = """ + |message Test { + | // Test all... + | optional string name = 1; // ...the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + message: MessageElement = parsed.types[0] + field = message.fields[0] + self.assertEqual(field.documentation, "Test all...\n...the things!") + + def test_trailing_comment_not_assigned_to_following_field(self): + proto = """ + |message Test { + | optional string first_name = 1; // Testing! + | optional string last_name = 2; + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + message: MessageElement = parsed.types[0] + field1 = message.fields[0] + self.assertEqual(field1.documentation, "Testing!") + field2 = message.fields[1] + self.assertEqual(field2.documentation, "") + + def test_enum_value_trailing_comment(self): + proto = """ + |enum Test { + | FOO = 1; // Test all the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + self.assertEqual(value.documentation, "Test all the things!") + + def test_trailing_singleline_comment(self): + proto = """ + |enum Test { + | FOO = 1; /* Test all the things! */ + | BAR = 2;/*Test all the things!*/ + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + enum_element: EnumElement = parsed.types[0] + c_foo = enum_element.constants[0] + self.assertEqual(c_foo.documentation, "Test all the things!") + c_bar = enum_element.constants[1] + self.assertEqual(c_bar.documentation, "Test all the things!") + + def test_trailing_multiline_comment(self): + proto = """ + |enum Test { + | FOO = 1; /* Test all the + |things! */ + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + self.assertEqual(value.documentation, "Test all the\nthings!") + + def test_trailing_multiline_comment_must_be_last_on_line_throws(self): + proto = """ + |enum Test { + | FOO = 1; /* Test all the things! */ BAR = 2; + |} + """ + proto = trim_margin(proto) + with self.assertRaisesRegex( + IllegalStateException, "Syntax error in file.proto:2:40: no syntax may follow trailing comment" + ): + ProtoParser.parse(self.location, proto) + self.fail() + + def test_invalid_trailing_comment(self): + proto = """ + |enum Test { + | FOO = 1; / + |} + """ + proto = trim_margin(proto) + # try : + # ProtoParser.parse(self.location, proto) + # except IllegalStateException as e : + # if e.message != "Syntax error in file.proto:2:12: expected '//' or '/*'" : + # self.fail() + + with self.assertRaises(IllegalStateException) as re: + # TODO: this test in Kotlin source contains "2:13:" Need compile square.wire and check how it can be? + + ProtoParser.parse(self.location, proto) + self.fail() + self.assertEqual(re.exception.message, "Syntax error in file.proto:2:12: expected '//' or '/*'") + + def test_enum_value_leading_and_trailing_comments_are_combined(self): + proto = """ + |enum Test { + | // Test all... + | FOO = 1; // ...the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(self.location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + self.assertEqual(value.documentation, "Test all...\n...the things!") + + def test_trailing_comment_not_combined_when_empty(self): + """ (Kotlin) Can't use raw strings here; otherwise, the formatter removes the trailing whitespace on line 3. """ + proto = "enum Test {\n" \ + " // Test all...\n" \ + " FOO = 1; // \n" \ + "}" + parsed = ProtoParser.parse(self.location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + self.assertEqual(value.documentation, "Test all...") + + def test_syntax_not_required(self): + proto = "message Foo {}" + parsed = ProtoParser.parse(self.location, proto) + self.assertIsNone(parsed.syntax) + + def test_syntax_specified(self): + proto = """ + |syntax = "proto3"; + |message Foo {} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=self.location.at(2, 1), name="Foo")] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_invalid_syntax_value_throws(self): + proto = """ + |syntax = "proto4"; + |message Foo {} + """ + proto = trim_margin(proto) + with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:1: unexpected syntax: proto4"): + ProtoParser.parse(self.location, proto) + self.fail() + + def test_syntax_not_first_declaration_throws(self): + proto = """ + |message Foo {} + |syntax = "proto3"; + """ + proto = trim_margin(proto) + with self.assertRaisesRegex( + IllegalStateException, "Syntax error in file.proto:2:1: 'syntax' element must be the first declaration " + "in a file" + ): + ProtoParser.parse(self.location, proto) + self.fail() + + def test_syntax_may_follow_comments_and_empty_lines(self): + proto = """ + |/* comment 1 */ + |// comment 2 + | + |syntax = "proto3"; + |message Foo {} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=self.location.at(5, 1), name="Foo")] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_proto3_message_fields_do_not_require_labels(self): + proto = """ + |syntax = "proto3"; + |message Message { + | string a = 1; + | int32 b = 2; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[ + MessageElement( + location=self.location.at(2, 1), + name="Message", + fields=[ + FieldElement(location=self.location.at(3, 3), element_type="string", name="a", tag=1), + FieldElement(location=self.location.at(4, 3), element_type="int32", name="b", tag=2) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_proto3_extension_fields_do_not_require_labels(self): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | string a = 1; + | int32 b = 2; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=self.location.at(2, 1), name="Message")], + extend_declarations=[ + ExtendElement( + location=self.location.at(4, 1), + name="Message", + documentation="", + fields=[ + FieldElement(location=self.location.at(5, 3), element_type="string", name="a", tag=1), + FieldElement(location=self.location.at(6, 3), element_type="int32", name="b", tag=2) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_proto3_message_fields_allow_optional(self): + proto = """ + |syntax = "proto3"; + |message Message { + | optional string a = 1; + |} + """ + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[ + MessageElement( + location=self.location.at(2, 1), + name="Message", + fields=[ + FieldElement( + location=self.location.at(3, 3), + element_type="string", + name="a", + tag=1, + label=Field.Label.OPTIONAL + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) From ad490fba0b35f0154231232c7cfa086403e46b1b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 19 May 2021 17:33:17 +0300 Subject: [PATCH 014/168] fixup lint problem --- karapace/protobuf/proto_file_element.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 86d2b8f00..052959e38 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -14,16 +14,16 @@ class ProtoFileElement: options: list def __init__( - self, - location: Location, - package_name: str = None, - syntax: Syntax = None, - imports=None, - public_imports=None, - types=None, - services=None, - extend_declarations=None, - options=None + self, + location: Location, + package_name: str = None, + syntax: Syntax = None, + imports=None, + public_imports=None, + types=None, + services=None, + extend_declarations=None, + options=None ): if not options: From 4b88525dbbec82db7bf0a3d046862e63f5f33150 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 20 May 2021 02:19:37 +0300 Subject: [PATCH 015/168] Add protobuf skeleton (#6) * Add protobuf skeleton * Add skeleton files * remove unfinished tests * fixup lint errors * Changed project structure, and added one test and debugged issues for PR #1 * fixup lint issues * fixup by @hackaugusto suggestions --- karapace/compatibility/__init__.py | 27 +++++++++++++++++++++ karapace/compatibility/protobuf/__init__.py | 0 karapace/compatibility/protobuf/checks.py | 11 +++++++++ karapace/kafka_rest_apis/__init__.py | 5 ++-- karapace/protobuf/__init__.py | 0 karapace/protobuf/schema.py | 8 ++++++ karapace/protobuf/utils.py | 3 +++ karapace/schema_reader.py | 27 +++++++++++++++++++++ karapace/schema_registry_apis.py | 3 ++- karapace/serialization.py | 16 +++++++++++- tests/integration/test_client_protobuf.py | 18 ++++++++++++++ tests/schemas/protobuf.py | 11 +++++++++ 12 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 karapace/compatibility/protobuf/__init__.py create mode 100644 karapace/compatibility/protobuf/checks.py create mode 100644 karapace/protobuf/__init__.py create mode 100644 karapace/protobuf/schema.py create mode 100644 karapace/protobuf/utils.py create mode 100644 tests/integration/test_client_protobuf.py create mode 100644 tests/schemas/protobuf.py diff --git a/karapace/compatibility/__init__.py b/karapace/compatibility/__init__.py index 57cc2593b..218bf8403 100644 --- a/karapace/compatibility/__init__.py +++ b/karapace/compatibility/__init__.py @@ -11,6 +11,7 @@ SchemaIncompatibilityType ) from karapace.compatibility.jsonschema.checks import compatibility as jsonschema_compatibility +from karapace.compatibility.protobuf.checks import check_protobuf_schema_compatibility from karapace.schema_reader import SchemaType, TypedSchema import logging @@ -62,6 +63,10 @@ def check_jsonschema_compatibility(reader: Draft7Validator, writer: Draft7Valida return jsonschema_compatibility(reader, writer) +def check_protobuf_compatibility(reader, writer) -> SchemaCompatibilityResult: + return check_protobuf_schema_compatibility(reader, writer) + + def check_compatibility( old_schema: TypedSchema, new_schema: TypedSchema, compatibility_mode: CompatibilityModes ) -> SchemaCompatibilityResult: @@ -127,6 +132,28 @@ def check_compatibility( ) ) + elif old_schema.schema_type is SchemaType.PROTOBUF: + if compatibility_mode in {CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE}: + result = check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + elif compatibility_mode in {CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE}: + result = check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + ) + + elif compatibility_mode in {CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE}: + result = check_protobuf_compatibility( + reader=new_schema.schema, + writer=old_schema.schema, + ) + result = result.merged_with(check_protobuf_compatibility( + reader=old_schema.schema, + writer=new_schema.schema, + )) + else: result = SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, diff --git a/karapace/compatibility/protobuf/__init__.py b/karapace/compatibility/protobuf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py new file mode 100644 index 000000000..e761655b2 --- /dev/null +++ b/karapace/compatibility/protobuf/checks.py @@ -0,0 +1,11 @@ +# TODO: PROTOBUF* this functionality must be implemented +from karapace.avro_compatibility import SchemaCompatibilityResult + + +def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: + # TODO: PROTOBUF* for investigation purposes yet + + if writer != reader: + return SchemaCompatibilityResult.compatible() + + return SchemaCompatibilityResult.compatible() diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 3f1d50ef3..05f50beea 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -25,9 +25,10 @@ RECORD_KEYS = ["key", "value", "partition"] PUBLISH_KEYS = {"records", "value_schema", "value_schema_id", "key_schema", "key_schema_id"} RECORD_CODES = [42201, 42202] -KNOWN_FORMATS = {"json", "avro", "binary"} +KNOWN_FORMATS = {"json", "avro", "protobuf", "binary"} OFFSET_RESET_STRATEGIES = {"latest", "earliest"} -SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA} +# TODO: PROTOBUF* check schema mapping +SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA, "protobuf": SchemaType.PROTOBUF} TypedConsumer = namedtuple("TypedConsumer", ["consumer", "serialization_format", "config"]) diff --git a/karapace/protobuf/__init__.py b/karapace/protobuf/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py new file mode 100644 index 000000000..e16951a23 --- /dev/null +++ b/karapace/protobuf/schema.py @@ -0,0 +1,8 @@ +class ProtobufSchema: + schema: str + + def __init__(self, schema: str): + self.schema = schema + + def __str__(self) -> str: + return self.schema diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py new file mode 100644 index 000000000..2cf297248 --- /dev/null +++ b/karapace/protobuf/utils.py @@ -0,0 +1,3 @@ +def protobuf_encode(a: str) -> str: + # TODO: PROTOBUF + return a diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index e51f01421..8b5890b11 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -14,6 +14,7 @@ from kafka.errors import NoBrokersAvailable, NodeNotReadyError, TopicAlreadyExistsError from karapace import constants from karapace.avro_compatibility import parse_avro_schema_definition +from karapace.protobuf.schema import ProtobufSchema from karapace.statsd import StatsClient from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue @@ -38,6 +39,17 @@ def parse_jsonschema_definition(schema_definition: str) -> Draft7Validator: return Draft7Validator(schema) +def parse_protobuf_schema_definition(schema_definition: str) -> ProtobufSchema: + """ Parses and validates `schema_definition`. + + Raises: + Nothing yet. + + """ + + return ProtobufSchema(schema_definition) + + class InvalidSchema(Exception): pass @@ -71,12 +83,23 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen except SchemaParseException as e: raise InvalidSchema from e + @staticmethod + def parse_protobuf(schema_str: str): + try: + return TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) + # TypeError - Raised when the user forgets to encode the schema as a string. + except Exception as e: # FIXME: bare exception + log.exception("Unexpected error:") + raise InvalidSchema from e + @staticmethod def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsistent-return-statements if schema_type is SchemaType.AVRO: return TypedSchema.parse_avro(schema_str) if schema_type is SchemaType.JSONSCHEMA: return TypedSchema.parse_json(schema_str) + if schema_type is SchemaType.PROTOBUF: + return TypedSchema.parse_protobuf(schema_str) raise InvalidSchema(f"Unknown parser {schema_type} for {schema_str}") def to_json(self): @@ -87,9 +110,13 @@ def to_json(self): return self.schema def __str__(self) -> str: + if isinstance(self.schema, ProtobufSchema): + return str(self.schema) return json_encode(self.to_json(), compact=True) def __repr__(self): + if isinstance(self.schema, ProtobufSchema): + return f"TypedSchema(type={self.schema_type}, schema={str(self)})" return f"TypedSchema(type={self.schema_type}, schema={json_encode(self.to_json())})" def __eq__(self, other): diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index ec76ce539..c621615c7 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -263,6 +263,7 @@ def send_delete_subject_message(self, subject, version): value = '{{"subject":"{}","version":{}}}'.format(subject, version) return self.send_kafka_message(key, value) + # TODO: PROTOBUF add protobuf compatibility_check async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json @@ -667,7 +668,7 @@ def _validate_schema_request_body(self, content_type, body) -> None: def _validate_schema_type(self, content_type, body) -> None: schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value)) - if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO}: + if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO, SchemaType.PROTOBUF}: self.r( body={ "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, diff --git a/karapace/serialization.py b/karapace/serialization.py index 14cbd6dab..9d57786e7 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -71,7 +71,10 @@ def __init__(self, schema_registry_url: str = "http://localhost:8081"): self.base_url = schema_registry_url async def post_new_schema(self, subject: str, schema: TypedSchema) -> int: - payload = {"schema": json_encode(schema.to_json()), "schemaType": schema.schema_type.value} + if schema.schema_type is SchemaType.PROTOBUF: + payload = {"schema": str(schema), "schemaType": schema.schema_type.value} + else: + payload = {"schema": json_encode(schema.to_json()), "schemaType": schema.schema_type.value} result = await self.client.post(f"subjects/{quote(subject)}/versions", json=payload) if not result.ok: raise SchemaRetrievalError(result.json()) @@ -135,6 +138,7 @@ def get_subject_name(self, topic_name: str, schema: str, subject_type: str, sche namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") + # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}" async def get_schema_for_subject(self, subject: str) -> TypedSchema: @@ -174,16 +178,22 @@ async def get_schema_for_id(self, schema_id: int) -> TypedSchema: def read_value(schema: TypedSchema, bio: io.BytesIO): + if schema.schema_type is SchemaType.AVRO: reader = DatumReader(schema.schema) return reader.read(BinaryDecoder(bio)) if schema.schema_type is SchemaType.JSONSCHEMA: + value = load(bio) try: schema.schema.validate(value) except ValidationError as e: raise InvalidPayload from e return value + if schema.schema_type is SchemaType.PROTOBUF: + # TODO: PROTOBUF* we need use protobuf validator there + value = bio.read() + return value raise ValueError("Unknown schema type") @@ -197,6 +207,10 @@ def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict): except ValidationError as e: raise InvalidPayload from e bio.write(json_encode(value, binary=True)) + elif schema.schema_type is SchemaType.PROTOBUF: + # TODO: PROTOBUF* we need use protobuf validator there + bio.write(value) + else: raise ValueError("Unknown schema type") diff --git a/tests/integration/test_client_protobuf.py b/tests/integration/test_client_protobuf.py new file mode 100644 index 000000000..862991389 --- /dev/null +++ b/tests/integration/test_client_protobuf.py @@ -0,0 +1,18 @@ +from karapace.schema_reader import SchemaType, TypedSchema +from karapace.serialization import SchemaRegistryClient +from tests.schemas.protobuf import schema_protobuf_plain +from tests.utils import new_random_name + + +async def test_remote_client_protobuf(registry_async_client): + schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) + reg_cli = SchemaRegistryClient() + reg_cli.client = registry_async_client + subject = new_random_name("subject") + sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) + assert sc_id >= 0 + stored_schema = await reg_cli.get_schema_for_id(sc_id) + assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" + stored_id, stored_schema = await reg_cli.get_latest_schema(subject) + assert stored_id == sc_id + assert stored_schema == schema_protobuf diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py new file mode 100644 index 000000000..e8e1a0bb8 --- /dev/null +++ b/tests/schemas/protobuf.py @@ -0,0 +1,11 @@ +schema_protobuf_plain = """ +syntax = "proto3"; +package com.codingharbour.protobuf; + +option java_outer_classname = "SimpleMessageProtos"; +message SimpleMessage { + string content = 1; + string date_time = 2; + string content2 = 3; +} +""" From af054c7627faab74c32cc59b3112e8745395fdc5 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 20 May 2021 10:49:23 +0300 Subject: [PATCH 016/168] fixup lint issues after conflict remove --- karapace/protobuf/schema.py | 3 +++ karapace/protobuf/utils.py | 1 + karapace/schema_reader.py | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index e16951a23..8a3e9f7a8 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -6,3 +6,6 @@ def __init__(self, schema: str): def __str__(self) -> str: return self.schema + + def to_json(self): + return self.schema diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index b5d1255bf..a07064bce 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -2,6 +2,7 @@ def protobuf_encode(a: str) -> str: # TODO: PROTOBUF return a + def append_documentation(data: list, documentation: str): if not documentation: return diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index a6d6e9b4b..f49a4be88 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -23,7 +23,6 @@ import json import logging -import sys import time log = logging.getLogger(__name__) From fcfabb352ef794841987afe3afd0de021cfa1c4c Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 25 May 2021 22:39:53 +0300 Subject: [PATCH 017/168] another part of tests added --- karapace/protobuf/extensions_element.py | 14 +- karapace/protobuf/kotlin_wrapper.py | 4 - karapace/protobuf/option_element.py | 41 +- karapace/protobuf/option_reader.py | 8 +- karapace/protobuf/proto_file_element.py | 3 + karapace/protobuf/proto_parser.py | 13 +- karapace/protobuf/reserved_document.py | 7 +- karapace/protobuf/syntax_reader.py | 2 +- karapace/protobuf/utils.py | 6 +- tests/unit/test_proto_parser.py | 1158 ++++++++++++++++++++++- 10 files changed, 1213 insertions(+), 43 deletions(-) diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 72ebcd90d..256c02f13 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -1,4 +1,4 @@ -from karapace.protobuf.kotlin_wrapper import IntRange +from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, MAX_TAG_VALUE @@ -19,18 +19,18 @@ def to_schema(self) -> str: append_documentation(result, self.documentation) result.append("extensions ") - for index in range(len(self.values)): + for index in range(0, len(self.values)): value = self.values[index] if index > 0: result.append(", ") if isinstance(value, int): - result.append(value) + result.append(str(value)) # TODO: maybe replace Kotlin IntRange by list? - elif isinstance(value, IntRange): - result.append(f"{value[0]} to ") - last_value = value[len(value) - 1] + elif isinstance(value, KotlinRange): + result.append(f"{value.minimum} to ") + last_value = value.maximum if last_value < MAX_TAG_VALUE: - result.append(last_value) + result.append(str(last_value)) else: result.append("max") else: diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index e1dc9eb5a..978d01def 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -36,10 +36,6 @@ def options_to_list(a: list) -> list: return a -class IntRange(list): - pass - - class String(str): pass diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 63c8f02ed..f0cdd3a7a 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -4,6 +4,15 @@ from karapace.protobuf.utils import append_indented +def try_to_schema(obj: object) -> str: + try: + return obj.to_schema() + except AttributeError: + if isinstance(obj, str): + return obj + raise AttributeError + + class ListOptionElement(list): pass @@ -38,7 +47,7 @@ def to_schema(self) -> str: elif self.kind in [self.Kind.BOOLEAN, self.Kind.NUMBER, self.Kind.ENUM]: aline = f"{self.formattedName} = {self.value}" elif self.kind == self.Kind.OPTION: - aline = f"{self.formattedName}.{self.value.to_schema()}" + aline = f"{self.formattedName}.{try_to_schema(self.value)}" elif self.kind == self.Kind.MAP: aline = [f"{self.formattedName} = {{\n", self.format_option_map(self.value), "}"] elif self.kind == self.Kind.LIST: @@ -57,7 +66,7 @@ def append_options(options: list): count = len(options) if count == 1: data.append('[') - data.append(options[0].to_schema()) + data.append(try_to_schema(options[0])) data.append(']') return "".join(data) @@ -67,7 +76,7 @@ def append_options(options: list): endl = "," else: endl = "" - append_indented(data, options[i].to_schema() + endl) + append_indented(data, try_to_schema(options[i]) + endl) data.append(']') return "".join(data) @@ -81,11 +90,13 @@ def format_option_map(self, value: dict) -> str: return "".join(result) def format_option_map_value(self, value) -> str: - aline = { - isinstance(value, str): f"\"{value}\"", - isinstance(value, dict): list(["{\n", self.format_option_map_value(value), "}"]), - isinstance(value, list): list(["[\n", self.format_list_map_value(value), "]"]) - }[True] + aline = value + if isinstance(value, str): + aline = f"\"{value}\"" + elif isinstance(value, dict): + aline = ["{\n", self.format_option_map(value), "}"] + elif isinstance(value, list): + aline = ["[\n", self.format_list_map_value(value), "]"] if isinstance(aline, list): return "".join(aline) @@ -94,10 +105,16 @@ def format_option_map_value(self, value) -> str: return value def format_list_map_value(self, value) -> str: - keys = value.keys() + last_index = len(value) - 1 - result: list = list() - for index, key in enumerate(keys): + result: list = [] + for index, elm in enumerate(value): endl = "," if (index != last_index) else "" - append_indented(result, f"{self.format_option_map_value(value[key])}{endl}") + append_indented(result, f"{self.format_option_map_value(elm)}{endl}") return "".join(result) + + def __repr__(self): + return self.to_schema() + + def __eq__(self, other): + return str(self) == str(other) diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index 5f63c5f71..a6825d1b3 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -45,7 +45,7 @@ def read_option(self, key_value_separator: str) -> OptionElement: if is_extension: name = f"[{name}]" - sub_names: list = list() + sub_names: list = [] c = self.reader.read_char() if c == '.': # Read nested field name. For example "baz" in "(foo.bar).baz = 12". @@ -77,7 +77,7 @@ def read_kind_and_value(self) -> KindAndValue: result = KindAndValue(OptionElement.Kind.LIST, self.read_list()) elif peeked in ('"', "'"): result = KindAndValue(OptionElement.Kind.STRING, self.reader.read_string()) - elif peeked.is_digit() or peeked == '-': + elif ord(str(peeked)) in range(ord("0"), ord("9")) or peeked == '-': result = KindAndValue(OptionElement.Kind.NUMBER, self.reader.read_word()) else: word = self.reader.read_word() @@ -105,14 +105,14 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) name = option.name value = option.value if isinstance(value, OptionElement): - nested = result[name] + nested = result.get(name) if not nested: nested = dict() result[name] = nested nested[value.name] = value.value else: # Add the value(s) to any previous values with the same key - previous = result[name] + previous = result.get(name) if not previous: result[name] = value elif isinstance(previous, list): # Add to previous List diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 052959e38..8c2af1d28 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -110,6 +110,9 @@ def __eq__(self, other: 'ProtoFileElement'): return a == b + def __repr__(self): + return self.to_schema() + # return str(self.location) == str(other.location) and \ # self.package_name == other.package_name and \ # str(self.syntax) == str(other.syntax) and \ diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 5376cb857..b39bf80a1 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -572,13 +572,12 @@ def read_rpc(self, location: Location, documentation: str) -> RpcElement: rpc_documentation = self.reader.read_documentation() if self.reader.peek_char('}'): break - - declared = self.read_declaration(rpc_documentation, Context.RPC) - if isinstance(declared, OptionElement): - options.append(declared) - # TODO: add else clause to catch unexpected declarations. - else: - pass + declared = self.read_declaration(rpc_documentation, Context.RPC) + if isinstance(declared, OptionElement): + options.append(declared) + # TODO: add else clause to catch unexpected declarations. + else: + pass else: self.reader.require(';') diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 8f116891f..687272b49 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -1,4 +1,4 @@ -from karapace.protobuf.kotlin_wrapper import IntRange +from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation @@ -28,9 +28,8 @@ def to_schema(self) -> str: result.append(f"\"{value}\"") elif isinstance(value, int): result.append(f"{value}") - elif isinstance(value, IntRange): - last_index = len(value) - 1 - result.append(f"{value[0]} to {value[last_index]}") + elif isinstance(value, KotlinRange): + result.append(f"{value.minimum} to {value.maximum}") else: raise AssertionError() result.append(";\n") diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index dca8b51f3..7c22c8937 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -6,7 +6,7 @@ def hex_digit(c: str) -> int: if ord(c) in range(ord('0'), ord('9') + 1): return ord(c) - ord('0') if ord(c) in range(ord('a'), ord('f') + 1): - return ord('a') + 10 + return ord(c) - ord('a') + 10 if ord(c) in range(ord('A'), ord('F') + 1): return ord(c) - ord('A') + 10 return -1 diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index a07064bce..8f46cfe8f 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -6,11 +6,11 @@ def protobuf_encode(a: str) -> str: def append_documentation(data: list, documentation: str): if not documentation: return - documentation.split() + lines: list = documentation.split("\n") - if len(lines) > 1 and lines[-1]: - lines = lines.pop() + if len(lines) > 1 and not lines[-1]: + lines.pop() for line in lines: data.append("# ") diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py index ac34e4372..82216019d 100644 --- a/tests/unit/test_proto_parser.py +++ b/tests/unit/test_proto_parser.py @@ -1,15 +1,22 @@ +from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.extend_element import ExtendElement +from karapace.protobuf.extensions_element import ExtensionsElement from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement -from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.group_element import GroupElement +from karapace.protobuf.kotlin_wrapper import KotlinRange, trim_margin from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.protobuf.proto_parser import ProtoParser +from karapace.protobuf.rpc_element import RpcElement +from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax +from karapace.protobuf.utils import MAX_TAG_VALUE import unittest @@ -668,3 +675,1152 @@ def test_proto3_message_fields_allow_optional(self): ] ) self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_proto3_message_fields_forbid_required(self): + proto = """ + |syntax = "proto3"; + |message Message { + | required string a = 1; + |} + """ + proto = trim_margin(proto) + with self.assertRaisesRegex( + IllegalStateException, "Syntax error in file.proto:3:3: 'required' label forbidden in proto3 field " + "declarations" + ): + ProtoParser.parse(self.location, proto) + self.fail() + + def test_proto3_extension_fields_allow_optional(self): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | optional string a = 1; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=self.location.at(2, 1), name="Message")], + extend_declarations=[ + ExtendElement( + location=self.location.at(4, 1), + name="Message", + documentation="", + fields=[ + FieldElement( + location=self.location.at(5, 3), + element_type="string", + name="a", + tag=1, + label=Field.Label.OPTIONAL + ) + ], + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_proto3_extension_fields_forbids_required(self): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | required string a = 1; + |} + """ + proto = trim_margin(proto) + with self.assertRaisesRegex( + IllegalStateException, "Syntax error in file.proto:5:3: 'required' label forbidden in proto3 field " + "declarations" + ): + ProtoParser.parse(self.location, proto) + self.fail() + + def test_proto3_message_fields_permit_repeated(self): + proto = """ + |syntax = "proto3"; + |message Message { + | repeated string a = 1; + |} + """ + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[ + MessageElement( + location=self.location.at(2, 1), + name="Message", + fields=[ + FieldElement( + location=self.location.at(3, 3), + label=Field.Label.REPEATED, + element_type="string", + name="a", + tag=1 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_proto3_extension_fields_permit_repeated(self): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | repeated string a = 1; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=self.location.at(2, 1), name="Message")], + extend_declarations=[ + ExtendElement( + location=self.location.at(4, 1), + name="Message", + documentation="", + fields=[ + FieldElement( + location=self.location.at(5, 3), + label=Field.Label.REPEATED, + element_type="string", + name="a", + tag=1 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_parse_message_and_fields(self): + proto = """ + |message SearchRequest { + | required string query = 1; + | optional int32 page_number = 2; + | optional int32 result_per_page = 3; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="query", + tag=1 + ), + FieldElement( + location=self.location.at(3, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="page_number", + tag=2 + ), + FieldElement( + location=self.location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="result_per_page", + tag=3 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_group(self): + proto = """ + |message SearchResponse { + | repeated group Result = 1 { + | required string url = 2; + | optional string title = 3; + | repeated string snippets = 4; + | } + |} + """ + proto = trim_margin(proto) + message = MessageElement( + location=self.location.at(1, 1), + name="SearchResponse", + groups=[ + GroupElement( + location=self.location.at(2, 3), + label=Field.Label.REPEATED, + name="Result", + tag=1, + documentation="", + fields=[ + FieldElement( + location=self.location.at(3, 5), + label=Field.Label.REQUIRED, + element_type="string", + name="url", + tag=2 + ), + FieldElement( + location=self.location.at(4, 5), + label=Field.Label.OPTIONAL, + element_type="string", + name="title", + tag=3 + ), + FieldElement( + location=self.location.at(5, 5), + label=Field.Label.REPEATED, + element_type="string", + name="snippets", + tag=4 + ) + ] + ) + ] + ) + expected = ProtoFileElement(location=self.location, types=[message]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_parse_message_and_one_of(self): + proto = """ + |message SearchRequest { + | required string query = 1; + | oneof page_info { + | int32 page_number = 2; + | int32 result_per_page = 3; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="query", + tag=1 + ) + ], + one_ofs=[ + OneOfElement( + name="page_info", + documentation="", + fields=[ + FieldElement( + location=self.location.at(4, 5), element_type="int32", name="page_number", tag=2 + ), + FieldElement( + location=self.location.at(5, 5), element_type="int32", name="result_per_page", tag=3 + ) + ], + groups=[], + options=[] + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_parse_message_and_one_of_with_group(self): + proto = """ + |message SearchRequest { + | required string query = 1; + | oneof page_info { + | int32 page_number = 2; + | group Stuff = 3 { + | optional int32 result_per_page = 4; + | optional int32 page_count = 5; + | } + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="query", + tag=1 + ) + ], + one_ofs=[ + OneOfElement( + name="page_info", + documentation="", + fields=[ + FieldElement( + location=self.location.at(4, 5), element_type="int32", name="page_number", tag=2 + ) + ], + groups=[ + GroupElement( + label=None, + location=self.location.at(5, 5), + name="Stuff", + tag=3, + documentation="", + fields=[ + FieldElement( + location=self.location.at(6, 7), + label=Field.Label.OPTIONAL, + element_type="int32", + name="result_per_page", + tag=4 + ), + FieldElement( + location=self.location.at(7, 7), + label=Field.Label.OPTIONAL, + element_type="int32", + name="page_count", + tag=5 + ) + ] + ) + ], + options=[] + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_parse_enum(self): + proto = """ + |/** + | * What's on my waffles. + | * Also works on pancakes. + | */ + |enum Topping { + | FRUIT = 1; + | /** Yummy, yummy cream. */ + | CREAM = 2; + | + | // Quebec Maple syrup + | SYRUP = 3; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + EnumElement( + location=self.location.at(5, 1), + name="Topping", + documentation="What's on my waffles.\nAlso works on pancakes.", + constants=[ + EnumConstantElement( + location=self.location.at(6, 3), name="FRUIT", tag=1, documentation="", options=[] + ), + EnumConstantElement( + location=self.location.at(8, 3), + name="CREAM", + tag=2, + documentation="Yummy, yummy cream.", + options=[] + ), + EnumConstantElement( + location=self.location.at(11, 3), + name="SYRUP", + tag=3, + documentation="Quebec Maple syrup", + options=[] + ) + ], + options=[] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_parse_enum_with_options(self): + proto = """ + |/** + | * What's on my waffles. + | * Also works on pancakes. + | */ + |enum Topping { + | option(max_choices) = 2; + | + | FRUIT = 1[(healthy) = true]; + | /** Yummy, yummy cream. */ + | CREAM = 2; + | + | // Quebec Maple syrup + | SYRUP = 3; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + EnumElement( + location=self.location.at(5, 1), + name="Topping", + documentation="What's on my waffles.\nAlso works on pancakes.", + options=[OptionElement("max_choices", OptionElement.Kind.NUMBER, "2", True)], + constants=[ + EnumConstantElement( + location=self.location.at(8, 3), + name="FRUIT", + tag=1, + documentation="", + options=[OptionElement("healthy", OptionElement.Kind.BOOLEAN, "true", True)] + ), + EnumConstantElement( + location=self.location.at(10, 3), + name="CREAM", + tag=2, + documentation="Yummy, yummy cream.", + options=[] + ), + EnumConstantElement( + location=self.location.at(13, 3), + name="SYRUP", + tag=3, + documentation="Quebec Maple syrup", + options=[] + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_package_declaration(self): + proto = """ + |package google.protobuf; + |option java_package = "com.google.protobuf"; + | + |// The protocol compiler can output a FileDescriptorSet containing the .proto + |// files it parses. + |message FileDescriptorSet { + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + package_name="google.protobuf", + types=[ + MessageElement( + location=self.location.at(6, 1), + name="FileDescriptorSet", + documentation="The protocol compiler can output a FileDescriptorSet containing the .proto\nfiles " + "it parses." + ) + ], + options=[OptionElement("java_package", OptionElement.Kind.STRING, "com.google.protobuf")] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_nesting_in_message(self): + proto = """ + |message FieldOptions { + | optional CType ctype = 1[old_default = STRING, deprecated = true]; + | enum CType { + | STRING = 0[(opt_a) = 1, (opt_b) = 2]; + | }; + | // Clients can define custom options in extensions of this message. See above. + | extensions 500; + | extensions 1000 to max; + |} + """ + proto = trim_margin(proto) + enum_element = EnumElement( + location=self.location.at(3, 3), + name="CType", + documentation="", + constants=[ + EnumConstantElement( + location=self.location.at(4, 5), + name="STRING", + tag=0, + documentation="", + options=[ + OptionElement("opt_a", OptionElement.Kind.NUMBER, "1", True), + OptionElement("opt_b", OptionElement.Kind.NUMBER, "2", True) + ] + ) + ], + options=[] + ) + field = FieldElement( + location=self.location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="CType", + name="ctype", + tag=1, + options=[ + OptionElement("old_default", OptionElement.Kind.ENUM, "STRING"), + OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") + ] + ) + + self.assertEqual(len(field.options), 2) + self.assertTrue(OptionElement("old_default", OptionElement.Kind.ENUM, "STRING") in field.options) + self.assertTrue(OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") in field.options) + + message_element = MessageElement( + location=self.location.at(1, 1), + name="FieldOptions", + fields=[field], + nested_types=[enum_element], + extensions=[ + ExtensionsElement( + location=self.location.at(7, 3), + documentation="Clients can define custom options in extensions of this message. See above.", + values=[500] + ), + ExtensionsElement(self.location.at(8, 3), "", [KotlinRange(1000, MAX_TAG_VALUE)]) + ] + ) + expected = ProtoFileElement(location=self.location, types=[message_element]) + actual = ProtoParser.parse(self.location, proto) + self.assertEqual(actual, expected) + + def test_multi_ranges_extensions(self): + proto = """ + |message MeGustaExtensions { + | extensions 1, 5 to 200, 500, 1000 to max; + |} + """ + proto = trim_margin(proto) + message_element = MessageElement( + location=self.location.at(1, 1), + name="MeGustaExtensions", + documentation="", + fields=[], + nested_types=[], + extensions=[ + ExtensionsElement( + location=self.location.at(2, 3), + documentation="", + values=[1] + [KotlinRange(5, 200)] + [500] + [KotlinRange(1000, MAX_TAG_VALUE)] + ) + ] + ) + expected = ProtoFileElement(location=self.location, types=[message_element]) + actual = ProtoParser.parse(self.location, proto) + self.assertEqual(actual, expected) + + def test_option_parentheses(self): + proto = """ + |message Chickens { + | optional bool koka_ko_koka_ko = 1[old_default = true]; + | optional bool coodle_doodle_do = 2[(delay) = 100, old_default = false]; + | optional bool coo_coo_ca_cha = 3[old_default = true, (delay) = 200]; + | optional bool cha_chee_cha = 4; + |} + """ + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="Chickens", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="koka_ko_koka_ko", + tag=1, + options=[OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true")] + ), + FieldElement( + location=self.location.at(3, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="coodle_doodle_do", + tag=2, + options=[ + OptionElement("delay", OptionElement.Kind.NUMBER, "100", True), + OptionElement("old_default", OptionElement.Kind.BOOLEAN, "false") + ] + ), + FieldElement( + location=self.location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="coo_coo_ca_cha", + tag=3, + options=[ + OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true"), + OptionElement("delay", OptionElement.Kind.NUMBER, "200", True) + ] + ), + FieldElement( + location=self.location.at(5, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="cha_chee_cha", + tag=4 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_imports(self): + proto = "import \"src/test/resources/unittest_import.proto\";\n" + expected = ProtoFileElement(location=self.location, imports=["src/test/resources/unittest_import.proto"]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_public_imports(self): + proto = "import public \"src/test/resources/unittest_import.proto\";\n" + expected = ProtoFileElement(location=self.location, public_imports=["src/test/resources/unittest_import.proto"]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_extend(self): + proto = """ + |// Extends Foo + |extend Foo { + | optional int32 bar = 126; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + extend_declarations=[ + ExtendElement( + location=self.location.at(2, 1), + name="Foo", + documentation="Extends Foo", + fields=[ + FieldElement( + location=self.location.at(3, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="bar", + tag=126 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_extend_in_message(self): + proto = """ + |message Bar { + | extend Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[MessageElement(location=self.location.at(1, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=self.location.at(2, 3), + name="Foo", + documentation="", + fields=[ + FieldElement( + location=self.location.at(3, 5), + label=Field.Label.OPTIONAL, + element_type="Bar", + name="bar", + tag=126 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_extend_in_message_with_package(self): + proto = """ + |package kit.kat; + | + |message Bar { + | extend Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + package_name="kit.kat", + types=[MessageElement(location=self.location.at(3, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=self.location.at(4, 3), + name="Foo", + documentation="", + fields=[ + FieldElement( + location=self.location.at(5, 5), + label=Field.Label.OPTIONAL, + element_type="Bar", + name="bar", + tag=126 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_fqcn_extend_in_message(self): + proto = """ + |message Bar { + | extend example.Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[MessageElement(location=self.location.at(1, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=self.location.at(2, 3), + name="example.Foo", + documentation="", + fields=[ + FieldElement( + location=self.location.at(3, 5), + label=Field.Label.OPTIONAL, + element_type="Bar", + name="bar", + tag=126 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_fqcn_extend_in_message_with_package(self): + proto = """ + |package kit.kat; + | + |message Bar { + | extend example.Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + package_name="kit.kat", + types=[MessageElement(location=self.location.at(3, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=self.location.at(4, 3), + name="example.Foo", + documentation="", + fields=[ + FieldElement( + location=self.location.at(5, 5), + label=Field.Label.OPTIONAL, + element_type="Bar", + name="bar", + tag=126 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_default_field_with_paren(self): + proto = """ + |message Foo { + | optional string claim_token = 2[(squareup.redacted) = true]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=self.location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="claim_token", + tag=2, + options=[OptionElement("squareup.redacted", OptionElement.Kind.BOOLEAN, "true", True)] + ) + self.assertTrue(len(field.options) == 1) + self.assertTrue(OptionElement("squareup.redacted", OptionElement.Kind.BOOLEAN, "true", True) in field.options) + + message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=self.location, types=[message_element]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + # Parse \a, \b, \f, \n, \r, \t, \v, \[0-7]{1-3}, and \[xX]{0-9a-fA-F]{1,2} + def test_default_field_with_string_escapes(self): + proto = r""" + |message Foo { + | optional string name = 1 [ + | x = "\a\b\f\n\r\t\v\1f\01\001\11\011\111\xe\Xe\xE\xE\x41\x41" + | ]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=self.location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="name", + tag=1, + options=[ + OptionElement( + "x", OptionElement.Kind.STRING, + "\u0007\b\u000C\n\r\t\u000b\u0001f\u0001\u0001\u0009\u0009I\u000e\u000e\u000e\u000eAA" + ) + ] + ) + self.assertTrue(len(field.options) == 1) + self.assertTrue( + OptionElement( + "x", OptionElement.Kind.STRING, + "\u0007\b\u000C\n\r\t\u000b\u0001f\u0001\u0001\u0009\u0009I\u000e\u000e\u000e\u000eAA" + ) in field.options + ) + + message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=self.location, types=[message_element]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_string_with_single_quotes(self): + proto = r""" + |message Foo { + | optional string name = 1[default = 'single\"quotes']; + |} + """ + proto = trim_margin(proto) + + field = FieldElement( + location=self.location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="name", + tag=1, + default_value="single\"quotes" + ) + message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=self.location, types=[message_element]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_adjacent_strings_concatenated(self): + proto = """ + |message Foo { + | optional string name = 1 [ + | default = "concat " + | 'these ' + | "please" + | ]; + |} + """ + proto = trim_margin(proto) + + field = FieldElement( + location=self.location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="name", + tag=1, + default_value="concat these please" + ) + message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=self.location, types=[message_element]) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_invalid_hex_string_escape(self): + proto = r""" + |message Foo { + | optional string name = 1 [default = "\xW"]; + |} + """ + proto = trim_margin(proto) + with self.assertRaises(IllegalStateException) as re: + ProtoParser.parse(self.location, proto) + self.fail() + self.assertIn("expected a digit after \\x or \\X", re.exception.message) + + def test_service(self): + proto = """ + |service SearchService { + | option (default_timeout) = 30; + | + | rpc Search (SearchRequest) returns (SearchResponse); + | rpc Purchase (PurchaseRequest) returns (PurchaseResponse) { + | option (squareup.sake.timeout) = 15; + | option (squareup.a.b) = { + | value: [ + | FOO, + | BAR + | ] + | }; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + services=[ + ServiceElement( + location=self.location.at(1, 1), + name="SearchService", + documentation="", + options=[OptionElement("default_timeout", OptionElement.Kind.NUMBER, "30", True)], + rpcs=[ + RpcElement( + location=self.location.at(4, 3), + name="Search", + documentation="", + request_type="SearchRequest", + response_type="SearchResponse", + options=[], + response_streaming=False, + request_streaming=False + ), + RpcElement( + location=self.location.at(5, 3), + name="Purchase", + documentation="", + request_type="PurchaseRequest", + response_type="PurchaseResponse", + options=[ + OptionElement("squareup.sake.timeout", OptionElement.Kind.NUMBER, "15", True), + OptionElement("squareup.a.b", OptionElement.Kind.MAP, {"value": ["FOO", "BAR"]}, True) + ], + request_streaming=False, + response_streaming=False + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_streaming_service(self): + proto = """ + |service RouteGuide { + | rpc GetFeature (Point) returns (Feature) {} + | rpc ListFeatures (Rectangle) returns (stream Feature) {} + | rpc RecordRoute (stream Point) returns (RouteSummary) {} + | rpc RouteChat (stream RouteNote) returns (stream RouteNote) {} + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + services=[ + ServiceElement( + location=self.location.at(1, 1), + name="RouteGuide", + documentation="", + rpcs=[ + RpcElement( + location=self.location.at(2, 3), + name="GetFeature", + documentation="", + request_type="Point", + response_type="Feature", + options=[], + response_streaming=False, + request_streaming=False + ), + RpcElement( + location=self.location.at(3, 3), + name="ListFeatures", + documentation="", + request_type="Rectangle", + response_type="Feature", + response_streaming=True, + # TODO: Report Square.Wire there was mistake True instead of False! + request_streaming=False, + options=[] + ), + RpcElement( + location=self.location.at(4, 3), + name="RecordRoute", + documentation="", + request_type="Point", + response_type="RouteSummary", + request_streaming=True, + response_streaming=False, + options=[] + ), + RpcElement( + location=self.location.at(5, 3), + name="RouteChat", + documentation="", + request_type="RouteNote", + response_type="RouteNote", + request_streaming=True, + response_streaming=True, + options=[] + ) + ], + options=[] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_hex_tag(self): + proto = """ + |message HexTag { + | required string hex = 0x10; + | required string uppercase_x_hex = 0X11; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="HexTag", + fields=[ + FieldElement( + location=self.location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="hex", + tag=16 + ), + FieldElement( + location=self.location.at(3, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="uppercase_x_hex", + tag=17 + ) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_structured_option(self): + proto = """ + |message ExoticOptions { + | option (squareup.one) = {name: "Name", class_name:"ClassName"}; + | option (squareup.two.a) = {[squareup.options.type]: EXOTIC}; + | option (squareup.two.b) = {names: ["Foo", "Bar"]}; + |} + """ + # TODO: we do not support it yet + # + # | option (squareup.three) = {x: {y: 1 y: 2 } }; // NOTE: Omitted optional comma + # | option (squareup.four) = {x: {y: {z: 1 }, y: {z: 2 }}}; + # + # + # + proto = trim_margin(proto) + + option_one_map = {"name": "Name", "class_name": "ClassName"} + + option_two_a_map = {"[squareup.options.type]": "EXOTIC"} + + option_two_b_map = {"names": ["Foo", "Bar"]} + + # TODO: we do not support it yet + # need create custom dictionary class to support multiple values for one key + # + # option_three_map = {"x": {"y": 1, "y": 2}} + # option_four_map = {"x": ["y": {"z": 1}, "y": {"z": 2}]} + + expected = ProtoFileElement( + location=self.location, + types=[ + MessageElement( + location=self.location.at(1, 1), + name="ExoticOptions", + options=[ + OptionElement("squareup.one", OptionElement.Kind.MAP, option_one_map, True), + OptionElement("squareup.two.a", OptionElement.Kind.MAP, option_two_a_map, True), + OptionElement("squareup.two.b", OptionElement.Kind.MAP, option_two_b_map, True), + # OptionElement("squareup.three", OptionElement.Kind.MAP, option_three_map, True), + # OptionElement("squareup.four", OptionElement.Kind.MAP, option_four_map, True) + ] + ) + ] + ) + self.assertEqual(ProtoParser.parse(self.location, proto), expected) + + def test_options_with_nested_maps_and_trailing_commas(self): + proto = """ + |message StructuredOption { + | optional field.type has_options = 3 [ + | (option_map) = { + | nested_map: {key:"value", key2:["value2a","value2b"]}, + | }, + | (option_string) = ["string1","string2"] + | ]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=self.location.at(2, 5), + label=Field.Label.OPTIONAL, + element_type="field.type", + name="has_options", + tag=3, + options=[ + OptionElement( + "option_map", OptionElement.Kind.MAP, {"nested_map": { + "key": "value", + "key2": ["value2a", "value2b"] + }}, True + ), + OptionElement("option_string", OptionElement.Kind.LIST, ["string1", "string2"], True) + ] + ) + self.assertTrue(len(field.options) == 2) + self.assertTrue( + OptionElement( + "option_map", OptionElement.Kind.MAP, {"nested_map": { + "key": "value", + "key2": ["value2a", "value2b"] + }}, True + ) in field.options + ) + self.assertTrue( + OptionElement("option_string", OptionElement.Kind.LIST, ["string1", "string2"], True) in field.options + ) + + expected = MessageElement(location=self.location.at(1, 1), name="StructuredOption", fields=[field]) + proto_file = ProtoFileElement(location=self.location, types=[expected]) + self.assertEqual(ProtoParser.parse(self.location, proto), proto_file) From e460350bf741313e468d079244aaa97a6c5a9984 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 31 May 2021 16:48:32 +0300 Subject: [PATCH 018/168] change project to pytest and add references to square/wire project files --- karapace/protobuf/enum_constant_element.py | 2 + karapace/protobuf/enum_element.py | 3 + karapace/protobuf/extend_element.py | 3 + karapace/protobuf/extensions_element.py | 3 + karapace/protobuf/field.py | 5 +- karapace/protobuf/field_element.py | 3 + karapace/protobuf/group_element.py | 2 + karapace/protobuf/location.py | 4 + karapace/protobuf/message_element.py | 3 + karapace/protobuf/one_of_element.py | 3 + karapace/protobuf/option_element.py | 3 + karapace/protobuf/option_reader.py | 3 + karapace/protobuf/proto_file_element.py | 3 + karapace/protobuf/proto_parser.py | 3 + karapace/protobuf/proto_type.py | 2 + karapace/protobuf/reserved_document.py | 3 + karapace/protobuf/rpc_element.py | 3 + karapace/protobuf/schema.py | 5 + karapace/protobuf/service_element.py | 3 + karapace/protobuf/syntax.py | 3 + karapace/protobuf/syntax_reader.py | 3 + karapace/protobuf/type_element.py | 3 + karapace/protobuf/utils.py | 4 + tests/unit/test_proto_parser.py | 3492 ++++++++++---------- 24 files changed, 1768 insertions(+), 1796 deletions(-) diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index b071c11b8..e669fcc8b 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -1,3 +1,5 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumConstantElement.kt from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_options diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 7e346ee45..b7b0bf93c 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt + from karapace.protobuf.location import Location from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index 69be4771f..bd8e29115 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtendedElement.kt + from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 256c02f13..77edb5124 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtensionsElement.kt + from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, MAX_TAG_VALUE diff --git a/karapace/protobuf/field.py b/karapace/protobuf/field.py index 5a387da56..bfa3e407f 100644 --- a/karapace/protobuf/field.py +++ b/karapace/protobuf/field.py @@ -1,4 +1,7 @@ -# TODO: ... +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Field.kt + +# TODO: We ported only code which is used by schema parser. Full class may be ported if required from enum import Enum diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 3fe85fa1d..7ec3c4c7b 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/FieldElement.kt + from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index d04e24f8c..f765a1039 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -1,3 +1,5 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/GroupElement.kt from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index 55fe1f999..b0e5f26d4 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -1,3 +1,7 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Location.kt + + class Location: """ Locates a .proto file, or a self.position within a .proto file, on the file system """ diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index a954cc693..c42308eb6 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt + from karapace.protobuf.location import Location from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index 9a3c323ad..d36f001e3 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OneOfElement.kt + from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index f0cdd3a7a..43d28ed7c 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OptionElement.kt + from enum import Enum # from karapace.protobuf.kotlin_wrapper import * # from karapace.protobuf.kotlin_wrapper import * diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index a6825d1b3..8fb348f25 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OptionReader.kt + from karapace.protobuf.option_element import OptionElement from karapace.protobuf.syntax_reader import SyntaxReader from typing import Union diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 8c2af1d28..2e0f9f9c5 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt + from karapace.protobuf.location import Location from karapace.protobuf.syntax import Syntax diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index b39bf80a1..801bf86a7 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoParser.kt + from builtins import str from enum import Enum from karapace.protobuf.enum_constant_element import EnumConstantElement diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 5228b2803..10a8875dc 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -1,3 +1,5 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/ProtoType.kt """ Names a protocol buffer message, enumerated type, service, map, or a scalar. This class models a fully-qualified name using the protocol buffer package. diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 687272b49..0550d73c3 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ReservedElement.kt + from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index ae812f81e..a61b160c1 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/RpcElement.kt + from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 8a3e9f7a8..7d31c1580 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -1,3 +1,8 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Schema.kt +# Ported partially for required functionality. + + class ProtobufSchema: schema: str diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 95c6b0136..69cd232fa 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ServiceElement.kt + from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/syntax.py b/karapace/protobuf/syntax.py index c8bba80e5..b31b60a6e 100644 --- a/karapace/protobuf/syntax.py +++ b/karapace/protobuf/syntax.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-runtime/src/commonMain/kotlin/com/squareup/wire/Syntax.kt + from enum import Enum from karapace.protobuf.exception import IllegalArgumentException diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 7c22c8937..1b6e5ced3 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/SyntaxReader.kt + from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.location import Location diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 5181154e2..3fbd80869 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/TypeElement.kt + from karapace.protobuf.location import Location diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 8f46cfe8f..8e6c70564 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -1,3 +1,7 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/Util.kt + + def protobuf_encode(a: str) -> str: # TODO: PROTOBUF return a diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py index 82216019d..04636b931 100644 --- a/tests/unit/test_proto_parser.py +++ b/tests/unit/test_proto_parser.py @@ -1,3 +1,6 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ProtoParserTest.kt + from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.exception import IllegalStateException @@ -18,1809 +21,1708 @@ from karapace.protobuf.syntax import Syntax from karapace.protobuf.utils import MAX_TAG_VALUE -import unittest - - -class ProtoParserTest(unittest.TestCase): - location: Location = Location.get("file.proto") - - def test_type_parsing(self, ): - proto: str = """ - |message Types { - | required any f1 = 1; - | required bool f2 = 2; - | required bytes f3 = 3; - | required double f4 = 4; - | required float f5 = 5; - | required fixed32 f6 = 6; - | required fixed64 f7 = 7; - | required int32 f8 = 8; - | required int64 f9 = 9; - | required sfixed32 f10 = 10; - | required sfixed64 f11 = 11; - | required sint32 f12 = 12; - | required sint64 f13 = 13; - | required string f14 = 14; - | required uint32 f15 = 15; - | required uint64 f16 = 16; - | map f17 = 17; - | map f18 = 18; - | required arbitrary f19 = 19; - | required nested.nested f20 = 20; - |} - """ - proto: str = trim_margin(proto) - - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="Types", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="any", - name="f1", - tag=1 - ), - FieldElement( - location=self.location.at(3, 3), - label=Field.Label.REQUIRED, - element_type="bool", - name="f2", - tag=2 - ), - FieldElement( - location=self.location.at(4, 3), - label=Field.Label.REQUIRED, - element_type="bytes", - name="f3", - tag=3 - ), - FieldElement( - location=self.location.at(5, 3), - label=Field.Label.REQUIRED, - element_type="double", - name="f4", - tag=4 - ), - FieldElement( - location=self.location.at(6, 3), - label=Field.Label.REQUIRED, - element_type="float", - name="f5", - tag=5 - ), - FieldElement( - location=self.location.at(7, 3), - label=Field.Label.REQUIRED, - element_type="fixed32", - name="f6", - tag=6 - ), - FieldElement( - location=self.location.at(8, 3), - label=Field.Label.REQUIRED, - element_type="fixed64", - name="f7", - tag=7 - ), - FieldElement( - location=self.location.at(9, 3), - label=Field.Label.REQUIRED, - element_type="int32", - name="f8", - tag=8 - ), - FieldElement( - location=self.location.at(10, 3), - label=Field.Label.REQUIRED, - element_type="int64", - name="f9", - tag=9 - ), - FieldElement( - location=self.location.at(11, 3), - label=Field.Label.REQUIRED, - element_type="sfixed32", - name="f10", - tag=10 - ), - FieldElement( - location=self.location.at(12, 3), - label=Field.Label.REQUIRED, - element_type="sfixed64", - name="f11", - tag=11 - ), - FieldElement( - location=self.location.at(13, 3), - label=Field.Label.REQUIRED, - element_type="sint32", - name="f12", - tag=12 - ), - FieldElement( - location=self.location.at(14, 3), - label=Field.Label.REQUIRED, - element_type="sint64", - name="f13", - tag=13 - ), - FieldElement( - location=self.location.at(15, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="f14", - tag=14 - ), - FieldElement( - location=self.location.at(16, 3), - label=Field.Label.REQUIRED, - element_type="uint32", - name="f15", - tag=15 - ), - FieldElement( - location=self.location.at(17, 3), - label=Field.Label.REQUIRED, - element_type="uint64", - name="f16", - tag=16 - ), - FieldElement(location=self.location.at(18, 3), element_type="map", name="f17", tag=17), - FieldElement( - location=self.location.at(19, 3), - element_type="map", - name="f18", - tag=18 - ), - FieldElement( - location=self.location.at(20, 3), - label=Field.Label.REQUIRED, - element_type="arbitrary", - name="f19", - tag=19 - ), - FieldElement( - location=self.location.at(21, 3), - label=Field.Label.REQUIRED, - element_type="nested.nested", - name="f20", - tag=20 - ) - ] - ) - ] - ) - my = ProtoParser.parse(self.location, proto) - self.assertEqual(my, expected) - - def test_map_with_label_throws(self): - with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:15: 'map' type cannot have label"): - ProtoParser.parse(self.location, "message Hey { required map a = 1; }") - self.fail() - - with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:15: 'map' type cannot have label"): - ProtoParser.parse(self.location, "message Hey { optional map a = 1; }") - self.fail() - - with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:15: 'map' type cannot have label"): - ProtoParser.parse(self.location, "message Hey { repeated map a = 1; }") - self.fail() - - def test_default_field_option_is_special(self): - """ It looks like an option, but 'default' is special. It's not defined as an option. +import pytest + +location: Location = Location.get("file.proto") + + +def test_type_parsing(): + proto: str = """ + |message Types { + | required any f1 = 1; + | required bool f2 = 2; + | required bytes f3 = 3; + | required double f4 = 4; + | required float f5 = 5; + | required fixed32 f6 = 6; + | required fixed64 f7 = 7; + | required int32 f8 = 8; + | required int64 f9 = 9; + | required sfixed32 f10 = 10; + | required sfixed64 f11 = 11; + | required sint32 f12 = 12; + | required sint64 f13 = 13; + | required string f14 = 14; + | required uint32 f15 = 15; + | required uint64 f16 = 16; + | map f17 = 17; + | map f18 = 18; + | required arbitrary f19 = 19; + | required nested.nested f20 = 20; + |} + """ + proto: str = trim_margin(proto) + + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="Types", + fields=[ + FieldElement( + location=location.at(2, 3), label=Field.Label.REQUIRED, element_type="any", name="f1", tag=1 + ), + FieldElement( + location=location.at(3, 3), label=Field.Label.REQUIRED, element_type="bool", name="f2", tag=2 + ), + FieldElement( + location=location.at(4, 3), label=Field.Label.REQUIRED, element_type="bytes", name="f3", tag=3 + ), + FieldElement( + location=location.at(5, 3), label=Field.Label.REQUIRED, element_type="double", name="f4", tag=4 + ), + FieldElement( + location=location.at(6, 3), label=Field.Label.REQUIRED, element_type="float", name="f5", tag=5 + ), + FieldElement( + location=location.at(7, 3), label=Field.Label.REQUIRED, element_type="fixed32", name="f6", tag=6 + ), + FieldElement( + location=location.at(8, 3), label=Field.Label.REQUIRED, element_type="fixed64", name="f7", tag=7 + ), + FieldElement( + location=location.at(9, 3), label=Field.Label.REQUIRED, element_type="int32", name="f8", tag=8 + ), + FieldElement( + location=location.at(10, 3), label=Field.Label.REQUIRED, element_type="int64", name="f9", tag=9 + ), + FieldElement( + location=location.at(11, 3), label=Field.Label.REQUIRED, element_type="sfixed32", name="f10", tag=10 + ), + FieldElement( + location=location.at(12, 3), label=Field.Label.REQUIRED, element_type="sfixed64", name="f11", tag=11 + ), + FieldElement( + location=location.at(13, 3), label=Field.Label.REQUIRED, element_type="sint32", name="f12", tag=12 + ), + FieldElement( + location=location.at(14, 3), label=Field.Label.REQUIRED, element_type="sint64", name="f13", tag=13 + ), + FieldElement( + location=location.at(15, 3), label=Field.Label.REQUIRED, element_type="string", name="f14", tag=14 + ), + FieldElement( + location=location.at(16, 3), label=Field.Label.REQUIRED, element_type="uint32", name="f15", tag=15 + ), + FieldElement( + location=location.at(17, 3), label=Field.Label.REQUIRED, element_type="uint64", name="f16", tag=16 + ), + FieldElement(location=location.at(18, 3), element_type="map", name="f17", tag=17), + FieldElement( + location=location.at(19, 3), element_type="map", name="f18", tag=18 + ), + FieldElement( + location=location.at(20, 3), + label=Field.Label.REQUIRED, + element_type="arbitrary", + name="f19", + tag=19 + ), + FieldElement( + location=location.at(21, 3), + label=Field.Label.REQUIRED, + element_type="nested.nested", + name="f20", + tag=20 + ) + ] + ) + ] + ) + my = ProtoParser.parse(location, proto) + assert my == expected + + +def test_map_with_label_throws(): + with pytest.raises(IllegalStateException, match="Syntax error in file.proto:1:15: 'map' type cannot have label"): + ProtoParser.parse(location, "message Hey { required map a = 1; }") + pytest.fail("") + + with pytest.raises(IllegalStateException, match="Syntax error in file.proto:1:15: 'map' type cannot have label"): + ProtoParser.parse(location, "message Hey { optional map a = 1; }") + pytest.fail("") + + with pytest.raises(IllegalStateException, match="Syntax error in file.proto:1:15: 'map' type cannot have label"): + ProtoParser.parse(location, "message Hey { repeated map a = 1; }") + pytest.fail("") + + +def test_default_field_option_is_special(): + """ It looks like an option, but 'default' is special. It's not defined as an option. + """ + proto = """ + |message Message { + | required string a = 1 [default = "b", faulted = "c"]; + |} + |""" + + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="Message", + fields=[ + FieldElement( + location=location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="a", + default_value="b", + options=[OptionElement("faulted", OptionElement.Kind.STRING, "c")], + tag=1 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_json_name_option_is_special(): + """ It looks like an option, but 'json_name' is special. It's not defined as an option. + """ + proto = """ + |message Message { + | required string a = 1 [json_name = "b", faulted = "c"]; + |} + |""" + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="Message", + fields=[ + FieldElement( + location=location.at(2, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="a", + json_name="b", + tag=1, + options=[OptionElement("faulted", OptionElement.Kind.STRING, "c")] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_single_line_comment(): + proto = """ + |// Test all the things! + |message Test {} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == "Test all the things!" + + +def test_multiple_single_line_comments(): + proto = """ + |// Test all + |// the things! + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test all + |the things! + """ + expected = trim_margin(expected) + + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == expected + + +def test_single_line_javadoc_comment(): + proto = """ + |/** Test */ + |message Test {} + |""" + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == "Test" + + +def test_multiline_javadoc_comment(): + proto = """ + |/** + | * Test + | * + | * Foo + | */ + |message Test {} + |""" + proto = trim_margin(proto) + expected = """ + |Test + | + |Foo + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == expected + + +def test_multiple_single_line_comments_with_leading_whitespace(): + proto = """ + |// Test + |// All + |// The + |// Things! + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test + | All + | The + | Things! + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == expected + + +def test_multiline_javadoc_comment_with_leading_whitespace(): + proto = """ + |/** + | * Test + | * All + | * The + | * Things! + | */ + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test + | All + | The + | Things! + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == expected + + +def test_multiline_javadoc_comment_without_leading_asterisks(): + # We do not honor leading whitespace when the comment lacks leading asterisks. + proto = """ + |/** + | Test + | All + | The + | Things! + | */ + |message Test {} + """ + proto = trim_margin(proto) + expected = """ + |Test + |All + |The + |Things! + """ + expected = trim_margin(expected) + parsed = ProtoParser.parse(location, proto) + element_type = parsed.types[0] + assert element_type.documentation == expected + + +def test_message_field_trailing_comment(): + # Trailing message field comment. + proto = """ + |message Test { + | optional string name = 1; // Test all the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + message: MessageElement = parsed.types[0] + field = message.fields[0] + assert field.documentation == "Test all the things!" + + +def test_message_field_leading_and_trailing_comment_are_combined(): + proto = """ + |message Test { + | // Test all... + | optional string name = 1; // ...the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + message: MessageElement = parsed.types[0] + field = message.fields[0] + assert field.documentation == "Test all...\n...the things!" + + +def test_trailing_comment_not_assigned_to_following_field(): + proto = """ + |message Test { + | optional string first_name = 1; // Testing! + | optional string last_name = 2; + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + message: MessageElement = parsed.types[0] + field1 = message.fields[0] + assert field1.documentation == "Testing!" + field2 = message.fields[1] + assert field2.documentation == "" + + +def test_enum_value_trailing_comment(): + proto = """ + |enum Test { + | FOO = 1; // Test all the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + assert value.documentation == "Test all the things!" + + +def test_trailing_singleline_comment(): + proto = """ + |enum Test { + | FOO = 1; /* Test all the things! */ + | BAR = 2;/*Test all the things!*/ + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + enum_element: EnumElement = parsed.types[0] + c_foo = enum_element.constants[0] + assert c_foo.documentation == "Test all the things!" + c_bar = enum_element.constants[1] + assert c_bar.documentation == "Test all the things!" + + +def test_trailing_multiline_comment(): + proto = """ + |enum Test { + | FOO = 1; /* Test all the + |things! */ + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + assert value.documentation == "Test all the\nthings!" + + +def test_trailing_multiline_comment_must_be_last_on_line_throws(): + proto = """ + |enum Test { + | FOO = 1; /* Test all the things! */ BAR = 2; + |} + """ + proto = trim_margin(proto) + with pytest.raises( + IllegalStateException, match="Syntax error in file.proto:2:40: no syntax may follow trailing comment" + ): + ProtoParser.parse(location, proto) + pytest.fail("") + + +def test_invalid_trailing_comment(): + proto = """ + |enum Test { + | FOO = 1; / + |} + """ + proto = trim_margin(proto) + # try : + # ProtoParser.parse(location, proto) + # except IllegalStateException as e : + # if e.message != "Syntax error in file.proto:2:12: expected '//' or '/*'" : + # pytest.fail("") + + with pytest.raises(IllegalStateException) as re: + # TODO: this test in Kotlin source contains "2:13:" Need compile square.wire and check how it can be? + + ProtoParser.parse(location, proto) + pytest.fail("") + assert re.value.message == "Syntax error in file.proto:2:12: expected '//' or '/*'" + + +def test_enum_value_leading_and_trailing_comments_are_combined(): + proto = """ + |enum Test { + | // Test all... + | FOO = 1; // ...the things! + |} + """ + proto = trim_margin(proto) + parsed = ProtoParser.parse(location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + assert value.documentation == "Test all...\n...the things!" + + +def test_trailing_comment_not_combined_when_empty(): + """ (Kotlin) Can't use raw strings here; otherwise, the formatter removes the trailing whitespace on line 3. """ + proto = "enum Test {\n" \ + " // Test all...\n" \ + " FOO = 1; // \n" \ + "}" + parsed = ProtoParser.parse(location, proto) + enum_element: EnumElement = parsed.types[0] + value = enum_element.constants[0] + assert value.documentation == "Test all..." + + +def test_syntax_not_required(): + proto = "message Foo {}" + parsed = ProtoParser.parse(location, proto) + assert parsed.syntax is None + + +def test_syntax_specified(): + proto = """ + |syntax = "proto3"; + |message Foo {} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, syntax=Syntax.PROTO_3, types=[MessageElement(location=location.at(2, 1), name="Foo")] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_invalid_syntax_value_throws(): + proto = """ + |syntax = "proto4"; + |message Foo {} + """ + proto = trim_margin(proto) + with pytest.raises(IllegalStateException, match="Syntax error in file.proto:1:1: unexpected syntax: proto4"): + ProtoParser.parse(location, proto) + pytest.fail("") + + +def test_syntax_not_first_declaration_throws(): + proto = """ + |message Foo {} + |syntax = "proto3"; + """ + proto = trim_margin(proto) + with pytest.raises( + IllegalStateException, + match="Syntax error in file.proto:2:1: 'syntax' element must be the first declaration " + "in a file" + ): + ProtoParser.parse(location, proto) + pytest.fail("") + + +def test_syntax_may_follow_comments_and_empty_lines(): + proto = """ + |/* comment 1 */ + |// comment 2 + | + |syntax = "proto3"; + |message Foo {} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, syntax=Syntax.PROTO_3, types=[MessageElement(location=location.at(5, 1), name="Foo")] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto3_message_fields_do_not_require_labels(): + proto = """ + |syntax = "proto3"; + |message Message { + | string a = 1; + | int32 b = 2; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_3, + types=[ + MessageElement( + location=location.at(2, 1), + name="Message", + fields=[ + FieldElement(location=location.at(3, 3), element_type="string", name="a", tag=1), + FieldElement(location=location.at(4, 3), element_type="int32", name="b", tag=2) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto3_extension_fields_do_not_require_labels(): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | string a = 1; + | int32 b = 2; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=location.at(2, 1), name="Message")], + extend_declarations=[ + ExtendElement( + location=location.at(4, 1), + name="Message", + documentation="", + fields=[ + FieldElement(location=location.at(5, 3), element_type="string", name="a", tag=1), + FieldElement(location=location.at(6, 3), element_type="int32", name="b", tag=2) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto3_message_fields_allow_optional(): + proto = """ + |syntax = "proto3"; + |message Message { + | optional string a = 1; + |} + """ + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_3, + types=[ + MessageElement( + location=location.at(2, 1), + name="Message", + fields=[ + FieldElement( + location=location.at(3, 3), element_type="string", name="a", tag=1, label=Field.Label.OPTIONAL + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto3_message_fields_forbid_required(): + proto = """ + |syntax = "proto3"; + |message Message { + | required string a = 1; + |} """ - proto = """ - |message Message { - | required string a = 1 [default = "b", faulted = "c"]; - |} - |""" - - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="Message", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="a", - default_value="b", - options=[OptionElement("faulted", OptionElement.Kind.STRING, "c")], - tag=1 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_json_name_option_is_special(self): - """ It looks like an option, but 'json_name' is special. It's not defined as an option. + proto = trim_margin(proto) + with pytest.raises( + IllegalStateException, + match="Syntax error in file.proto:3:3: 'required' label forbidden in proto3 field " + "declarations" + ): + ProtoParser.parse(location, proto) + pytest.fail("") + + +def test_proto3_extension_fields_allow_optional(): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | optional string a = 1; + |} """ - proto = """ - |message Message { - | required string a = 1 [json_name = "b", faulted = "c"]; - |} - |""" - proto = trim_margin(proto) - - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="Message", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="a", - json_name="b", - tag=1, - options=[OptionElement("faulted", OptionElement.Kind.STRING, "c")] - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_single_line_comment(self): - proto = """ - |// Test all the things! - |message Test {} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, "Test all the things!") - - def test_multiple_single_line_comments(self): - proto = """ - |// Test all - |// the things! - |message Test {} - """ - proto = trim_margin(proto) - expected = """ - |Test all - |the things! - """ - expected = trim_margin(expected) - - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, expected) - - def test_single_line_javadoc_comment(self): - proto = """ - |/** Test */ - |message Test {} - |""" - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, "Test") - - def test_multiline_javadoc_comment(self): - proto = """ - |/** - | * Test - | * - | * Foo - | */ - |message Test {} - |""" - proto = trim_margin(proto) - expected = """ - |Test - | - |Foo - """ - expected = trim_margin(expected) - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, expected) - - def test_multiple_single_line_comments_with_leading_whitespace(self): - proto = """ - |// Test - |// All - |// The - |// Things! - |message Test {} - """ - proto = trim_margin(proto) - expected = """ - |Test - | All - | The - | Things! - """ - expected = trim_margin(expected) - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, expected) - - def test_multiline_javadoc_comment_with_leading_whitespace(self): - proto = """ - |/** - | * Test - | * All - | * The - | * Things! - | */ - |message Test {} - """ - proto = trim_margin(proto) - expected = """ - |Test - | All - | The - | Things! - """ - expected = trim_margin(expected) - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, expected) - - def test_multiline_javadoc_comment_without_leading_asterisks(self): - # We do not honor leading whitespace when the comment lacks leading asterisks. - proto = """ - |/** - | Test - | All - | The - | Things! - | */ - |message Test {} - """ - proto = trim_margin(proto) - expected = """ - |Test - |All - |The - |Things! - """ - expected = trim_margin(expected) - parsed = ProtoParser.parse(self.location, proto) - element_type = parsed.types[0] - self.assertEqual(element_type.documentation, expected) - - def test_message_field_trailing_comment(self): - # Trailing message field comment. - proto = """ - |message Test { - | optional string name = 1; // Test all the things! - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - message: MessageElement = parsed.types[0] - field = message.fields[0] - self.assertEqual(field.documentation, "Test all the things!") - - def test_message_field_leading_and_trailing_comment_are_combined(self): - proto = """ - |message Test { - | // Test all... - | optional string name = 1; // ...the things! - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - message: MessageElement = parsed.types[0] - field = message.fields[0] - self.assertEqual(field.documentation, "Test all...\n...the things!") - - def test_trailing_comment_not_assigned_to_following_field(self): - proto = """ - |message Test { - | optional string first_name = 1; // Testing! - | optional string last_name = 2; - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - message: MessageElement = parsed.types[0] - field1 = message.fields[0] - self.assertEqual(field1.documentation, "Testing!") - field2 = message.fields[1] - self.assertEqual(field2.documentation, "") - - def test_enum_value_trailing_comment(self): - proto = """ - |enum Test { - | FOO = 1; // Test all the things! - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - enum_element: EnumElement = parsed.types[0] - value = enum_element.constants[0] - self.assertEqual(value.documentation, "Test all the things!") - - def test_trailing_singleline_comment(self): - proto = """ - |enum Test { - | FOO = 1; /* Test all the things! */ - | BAR = 2;/*Test all the things!*/ - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - enum_element: EnumElement = parsed.types[0] - c_foo = enum_element.constants[0] - self.assertEqual(c_foo.documentation, "Test all the things!") - c_bar = enum_element.constants[1] - self.assertEqual(c_bar.documentation, "Test all the things!") - - def test_trailing_multiline_comment(self): - proto = """ - |enum Test { - | FOO = 1; /* Test all the - |things! */ - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - enum_element: EnumElement = parsed.types[0] - value = enum_element.constants[0] - self.assertEqual(value.documentation, "Test all the\nthings!") - - def test_trailing_multiline_comment_must_be_last_on_line_throws(self): - proto = """ - |enum Test { - | FOO = 1; /* Test all the things! */ BAR = 2; - |} - """ - proto = trim_margin(proto) - with self.assertRaisesRegex( - IllegalStateException, "Syntax error in file.proto:2:40: no syntax may follow trailing comment" - ): - ProtoParser.parse(self.location, proto) - self.fail() - - def test_invalid_trailing_comment(self): - proto = """ - |enum Test { - | FOO = 1; / - |} - """ - proto = trim_margin(proto) - # try : - # ProtoParser.parse(self.location, proto) - # except IllegalStateException as e : - # if e.message != "Syntax error in file.proto:2:12: expected '//' or '/*'" : - # self.fail() - - with self.assertRaises(IllegalStateException) as re: - # TODO: this test in Kotlin source contains "2:13:" Need compile square.wire and check how it can be? - - ProtoParser.parse(self.location, proto) - self.fail() - self.assertEqual(re.exception.message, "Syntax error in file.proto:2:12: expected '//' or '/*'") - - def test_enum_value_leading_and_trailing_comments_are_combined(self): - proto = """ - |enum Test { - | // Test all... - | FOO = 1; // ...the things! - |} - """ - proto = trim_margin(proto) - parsed = ProtoParser.parse(self.location, proto) - enum_element: EnumElement = parsed.types[0] - value = enum_element.constants[0] - self.assertEqual(value.documentation, "Test all...\n...the things!") - - def test_trailing_comment_not_combined_when_empty(self): - """ (Kotlin) Can't use raw strings here; otherwise, the formatter removes the trailing whitespace on line 3. """ - proto = "enum Test {\n" \ - " // Test all...\n" \ - " FOO = 1; // \n" \ - "}" - parsed = ProtoParser.parse(self.location, proto) - enum_element: EnumElement = parsed.types[0] - value = enum_element.constants[0] - self.assertEqual(value.documentation, "Test all...") - - def test_syntax_not_required(self): - proto = "message Foo {}" - parsed = ProtoParser.parse(self.location, proto) - self.assertIsNone(parsed.syntax) - - def test_syntax_specified(self): - proto = """ - |syntax = "proto3"; - |message Foo {} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[MessageElement(location=self.location.at(2, 1), name="Foo")] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_invalid_syntax_value_throws(self): - proto = """ - |syntax = "proto4"; - |message Foo {} - """ - proto = trim_margin(proto) - with self.assertRaisesRegex(IllegalStateException, "Syntax error in file.proto:1:1: unexpected syntax: proto4"): - ProtoParser.parse(self.location, proto) - self.fail() - - def test_syntax_not_first_declaration_throws(self): - proto = """ - |message Foo {} - |syntax = "proto3"; - """ - proto = trim_margin(proto) - with self.assertRaisesRegex( - IllegalStateException, "Syntax error in file.proto:2:1: 'syntax' element must be the first declaration " - "in a file" - ): - ProtoParser.parse(self.location, proto) - self.fail() - - def test_syntax_may_follow_comments_and_empty_lines(self): - proto = """ - |/* comment 1 */ - |// comment 2 - | - |syntax = "proto3"; - |message Foo {} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[MessageElement(location=self.location.at(5, 1), name="Foo")] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_proto3_message_fields_do_not_require_labels(self): - proto = """ - |syntax = "proto3"; - |message Message { - | string a = 1; - | int32 b = 2; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[ - MessageElement( - location=self.location.at(2, 1), - name="Message", - fields=[ - FieldElement(location=self.location.at(3, 3), element_type="string", name="a", tag=1), - FieldElement(location=self.location.at(4, 3), element_type="int32", name="b", tag=2) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_proto3_extension_fields_do_not_require_labels(self): - proto = """ - |syntax = "proto3"; - |message Message { - |} - |extend Message { - | string a = 1; - | int32 b = 2; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[MessageElement(location=self.location.at(2, 1), name="Message")], - extend_declarations=[ - ExtendElement( - location=self.location.at(4, 1), - name="Message", - documentation="", - fields=[ - FieldElement(location=self.location.at(5, 3), element_type="string", name="a", tag=1), - FieldElement(location=self.location.at(6, 3), element_type="int32", name="b", tag=2) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_proto3_message_fields_allow_optional(self): - proto = """ - |syntax = "proto3"; - |message Message { - | optional string a = 1; - |} - """ - proto = trim_margin(proto) - - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[ - MessageElement( - location=self.location.at(2, 1), - name="Message", - fields=[ - FieldElement( - location=self.location.at(3, 3), - element_type="string", - name="a", - tag=1, - label=Field.Label.OPTIONAL - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_proto3_message_fields_forbid_required(self): - proto = """ - |syntax = "proto3"; - |message Message { - | required string a = 1; - |} - """ - proto = trim_margin(proto) - with self.assertRaisesRegex( - IllegalStateException, "Syntax error in file.proto:3:3: 'required' label forbidden in proto3 field " - "declarations" - ): - ProtoParser.parse(self.location, proto) - self.fail() - - def test_proto3_extension_fields_allow_optional(self): - proto = """ - |syntax = "proto3"; - |message Message { - |} - |extend Message { - | optional string a = 1; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[MessageElement(location=self.location.at(2, 1), name="Message")], - extend_declarations=[ - ExtendElement( - location=self.location.at(4, 1), - name="Message", - documentation="", - fields=[ - FieldElement( - location=self.location.at(5, 3), - element_type="string", - name="a", - tag=1, - label=Field.Label.OPTIONAL - ) - ], - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_proto3_extension_fields_forbids_required(self): - proto = """ - |syntax = "proto3"; - |message Message { - |} - |extend Message { - | required string a = 1; - |} - """ - proto = trim_margin(proto) - with self.assertRaisesRegex( - IllegalStateException, "Syntax error in file.proto:5:3: 'required' label forbidden in proto3 field " - "declarations" - ): - ProtoParser.parse(self.location, proto) - self.fail() - - def test_proto3_message_fields_permit_repeated(self): - proto = """ - |syntax = "proto3"; - |message Message { - | repeated string a = 1; - |} - """ - proto = trim_margin(proto) - - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[ - MessageElement( - location=self.location.at(2, 1), - name="Message", - fields=[ - FieldElement( - location=self.location.at(3, 3), - label=Field.Label.REPEATED, - element_type="string", - name="a", - tag=1 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_proto3_extension_fields_permit_repeated(self): - proto = """ - |syntax = "proto3"; - |message Message { - |} - |extend Message { - | repeated string a = 1; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - syntax=Syntax.PROTO_3, - types=[MessageElement(location=self.location.at(2, 1), name="Message")], - extend_declarations=[ - ExtendElement( - location=self.location.at(4, 1), - name="Message", - documentation="", - fields=[ - FieldElement( - location=self.location.at(5, 3), - label=Field.Label.REPEATED, - element_type="string", - name="a", - tag=1 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_parse_message_and_fields(self): - proto = """ - |message SearchRequest { - | required string query = 1; - | optional int32 page_number = 2; - | optional int32 result_per_page = 3; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="SearchRequest", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="query", - tag=1 - ), - FieldElement( - location=self.location.at(3, 3), - label=Field.Label.OPTIONAL, - element_type="int32", - name="page_number", - tag=2 - ), - FieldElement( - location=self.location.at(4, 3), - label=Field.Label.OPTIONAL, - element_type="int32", - name="result_per_page", - tag=3 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_group(self): - proto = """ - |message SearchResponse { - | repeated group Result = 1 { - | required string url = 2; - | optional string title = 3; - | repeated string snippets = 4; - | } - |} - """ - proto = trim_margin(proto) - message = MessageElement( - location=self.location.at(1, 1), - name="SearchResponse", - groups=[ - GroupElement( - location=self.location.at(2, 3), - label=Field.Label.REPEATED, - name="Result", - tag=1, - documentation="", - fields=[ - FieldElement( - location=self.location.at(3, 5), - label=Field.Label.REQUIRED, - element_type="string", - name="url", - tag=2 - ), - FieldElement( - location=self.location.at(4, 5), - label=Field.Label.OPTIONAL, - element_type="string", - name="title", - tag=3 - ), - FieldElement( - location=self.location.at(5, 5), - label=Field.Label.REPEATED, - element_type="string", - name="snippets", - tag=4 - ) - ] - ) - ] - ) - expected = ProtoFileElement(location=self.location, types=[message]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_parse_message_and_one_of(self): - proto = """ - |message SearchRequest { - | required string query = 1; - | oneof page_info { - | int32 page_number = 2; - | int32 result_per_page = 3; - | } - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="SearchRequest", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="query", - tag=1 - ) - ], - one_ofs=[ - OneOfElement( - name="page_info", - documentation="", - fields=[ - FieldElement( - location=self.location.at(4, 5), element_type="int32", name="page_number", tag=2 - ), - FieldElement( - location=self.location.at(5, 5), element_type="int32", name="result_per_page", tag=3 - ) - ], - groups=[], - options=[] - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_parse_message_and_one_of_with_group(self): - proto = """ - |message SearchRequest { - | required string query = 1; - | oneof page_info { - | int32 page_number = 2; - | group Stuff = 3 { - | optional int32 result_per_page = 4; - | optional int32 page_count = 5; - | } - | } - |} + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=location.at(2, 1), name="Message")], + extend_declarations=[ + ExtendElement( + location=location.at(4, 1), + name="Message", + documentation="", + fields=[ + FieldElement( + location=location.at(5, 3), element_type="string", name="a", tag=1, label=Field.Label.OPTIONAL + ) + ], + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto3_extension_fields_forbids_required(): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | required string a = 1; + |} """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="SearchRequest", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="query", - tag=1 - ) - ], - one_ofs=[ - OneOfElement( - name="page_info", - documentation="", - fields=[ - FieldElement( - location=self.location.at(4, 5), element_type="int32", name="page_number", tag=2 - ) - ], - groups=[ - GroupElement( - label=None, - location=self.location.at(5, 5), - name="Stuff", - tag=3, - documentation="", - fields=[ - FieldElement( - location=self.location.at(6, 7), - label=Field.Label.OPTIONAL, - element_type="int32", - name="result_per_page", - tag=4 - ), - FieldElement( - location=self.location.at(7, 7), - label=Field.Label.OPTIONAL, - element_type="int32", - name="page_count", - tag=5 - ) - ] - ) - ], - options=[] - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_parse_enum(self): - proto = """ - |/** - | * What's on my waffles. - | * Also works on pancakes. - | */ - |enum Topping { - | FRUIT = 1; - | /** Yummy, yummy cream. */ - | CREAM = 2; - | - | // Quebec Maple syrup - | SYRUP = 3; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - EnumElement( - location=self.location.at(5, 1), - name="Topping", - documentation="What's on my waffles.\nAlso works on pancakes.", - constants=[ - EnumConstantElement( - location=self.location.at(6, 3), name="FRUIT", tag=1, documentation="", options=[] - ), - EnumConstantElement( - location=self.location.at(8, 3), - name="CREAM", - tag=2, - documentation="Yummy, yummy cream.", - options=[] - ), - EnumConstantElement( - location=self.location.at(11, 3), - name="SYRUP", - tag=3, - documentation="Quebec Maple syrup", - options=[] - ) - ], - options=[] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_parse_enum_with_options(self): - proto = """ - |/** - | * What's on my waffles. - | * Also works on pancakes. - | */ - |enum Topping { - | option(max_choices) = 2; - | - | FRUIT = 1[(healthy) = true]; - | /** Yummy, yummy cream. */ - | CREAM = 2; - | - | // Quebec Maple syrup - | SYRUP = 3; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - EnumElement( - location=self.location.at(5, 1), - name="Topping", - documentation="What's on my waffles.\nAlso works on pancakes.", - options=[OptionElement("max_choices", OptionElement.Kind.NUMBER, "2", True)], - constants=[ - EnumConstantElement( - location=self.location.at(8, 3), - name="FRUIT", - tag=1, - documentation="", - options=[OptionElement("healthy", OptionElement.Kind.BOOLEAN, "true", True)] - ), - EnumConstantElement( - location=self.location.at(10, 3), - name="CREAM", - tag=2, - documentation="Yummy, yummy cream.", - options=[] - ), - EnumConstantElement( - location=self.location.at(13, 3), - name="SYRUP", - tag=3, - documentation="Quebec Maple syrup", - options=[] - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_package_declaration(self): - proto = """ - |package google.protobuf; - |option java_package = "com.google.protobuf"; - | - |// The protocol compiler can output a FileDescriptorSet containing the .proto - |// files it parses. - |message FileDescriptorSet { - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - package_name="google.protobuf", - types=[ - MessageElement( - location=self.location.at(6, 1), - name="FileDescriptorSet", - documentation="The protocol compiler can output a FileDescriptorSet containing the .proto\nfiles " - "it parses." - ) - ], - options=[OptionElement("java_package", OptionElement.Kind.STRING, "com.google.protobuf")] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_nesting_in_message(self): - proto = """ - |message FieldOptions { - | optional CType ctype = 1[old_default = STRING, deprecated = true]; - | enum CType { - | STRING = 0[(opt_a) = 1, (opt_b) = 2]; - | }; - | // Clients can define custom options in extensions of this message. See above. - | extensions 500; - | extensions 1000 to max; - |} - """ - proto = trim_margin(proto) - enum_element = EnumElement( - location=self.location.at(3, 3), - name="CType", - documentation="", - constants=[ - EnumConstantElement( - location=self.location.at(4, 5), - name="STRING", - tag=0, - documentation="", - options=[ - OptionElement("opt_a", OptionElement.Kind.NUMBER, "1", True), - OptionElement("opt_b", OptionElement.Kind.NUMBER, "2", True) - ] - ) - ], - options=[] - ) - field = FieldElement( - location=self.location.at(2, 3), - label=Field.Label.OPTIONAL, - element_type="CType", - name="ctype", - tag=1, - options=[ - OptionElement("old_default", OptionElement.Kind.ENUM, "STRING"), - OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") - ] - ) - - self.assertEqual(len(field.options), 2) - self.assertTrue(OptionElement("old_default", OptionElement.Kind.ENUM, "STRING") in field.options) - self.assertTrue(OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") in field.options) - - message_element = MessageElement( - location=self.location.at(1, 1), - name="FieldOptions", - fields=[field], - nested_types=[enum_element], - extensions=[ - ExtensionsElement( - location=self.location.at(7, 3), - documentation="Clients can define custom options in extensions of this message. See above.", - values=[500] - ), - ExtensionsElement(self.location.at(8, 3), "", [KotlinRange(1000, MAX_TAG_VALUE)]) - ] - ) - expected = ProtoFileElement(location=self.location, types=[message_element]) - actual = ProtoParser.parse(self.location, proto) - self.assertEqual(actual, expected) - - def test_multi_ranges_extensions(self): - proto = """ - |message MeGustaExtensions { - | extensions 1, 5 to 200, 500, 1000 to max; - |} - """ - proto = trim_margin(proto) - message_element = MessageElement( - location=self.location.at(1, 1), - name="MeGustaExtensions", - documentation="", - fields=[], - nested_types=[], - extensions=[ - ExtensionsElement( - location=self.location.at(2, 3), - documentation="", - values=[1] + [KotlinRange(5, 200)] + [500] + [KotlinRange(1000, MAX_TAG_VALUE)] - ) - ] - ) - expected = ProtoFileElement(location=self.location, types=[message_element]) - actual = ProtoParser.parse(self.location, proto) - self.assertEqual(actual, expected) - - def test_option_parentheses(self): - proto = """ - |message Chickens { - | optional bool koka_ko_koka_ko = 1[old_default = true]; - | optional bool coodle_doodle_do = 2[(delay) = 100, old_default = false]; - | optional bool coo_coo_ca_cha = 3[old_default = true, (delay) = 200]; - | optional bool cha_chee_cha = 4; - |} - """ - proto = trim_margin(proto) - - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="Chickens", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.OPTIONAL, - element_type="bool", - name="koka_ko_koka_ko", - tag=1, - options=[OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true")] - ), - FieldElement( - location=self.location.at(3, 3), - label=Field.Label.OPTIONAL, - element_type="bool", - name="coodle_doodle_do", - tag=2, - options=[ - OptionElement("delay", OptionElement.Kind.NUMBER, "100", True), - OptionElement("old_default", OptionElement.Kind.BOOLEAN, "false") - ] - ), - FieldElement( - location=self.location.at(4, 3), - label=Field.Label.OPTIONAL, - element_type="bool", - name="coo_coo_ca_cha", - tag=3, - options=[ - OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true"), - OptionElement("delay", OptionElement.Kind.NUMBER, "200", True) - ] - ), - FieldElement( - location=self.location.at(5, 3), - label=Field.Label.OPTIONAL, - element_type="bool", - name="cha_chee_cha", - tag=4 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_imports(self): - proto = "import \"src/test/resources/unittest_import.proto\";\n" - expected = ProtoFileElement(location=self.location, imports=["src/test/resources/unittest_import.proto"]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_public_imports(self): - proto = "import public \"src/test/resources/unittest_import.proto\";\n" - expected = ProtoFileElement(location=self.location, public_imports=["src/test/resources/unittest_import.proto"]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_extend(self): - proto = """ - |// Extends Foo - |extend Foo { - | optional int32 bar = 126; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - extend_declarations=[ - ExtendElement( - location=self.location.at(2, 1), - name="Foo", - documentation="Extends Foo", - fields=[ - FieldElement( - location=self.location.at(3, 3), - label=Field.Label.OPTIONAL, - element_type="int32", - name="bar", - tag=126 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_extend_in_message(self): - proto = """ - |message Bar { - | extend Foo { - | optional Bar bar = 126; - | } - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[MessageElement(location=self.location.at(1, 1), name="Bar")], - extend_declarations=[ - ExtendElement( - location=self.location.at(2, 3), - name="Foo", - documentation="", - fields=[ - FieldElement( - location=self.location.at(3, 5), - label=Field.Label.OPTIONAL, - element_type="Bar", - name="bar", - tag=126 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_extend_in_message_with_package(self): - proto = """ - |package kit.kat; - | - |message Bar { - | extend Foo { - | optional Bar bar = 126; - | } - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - package_name="kit.kat", - types=[MessageElement(location=self.location.at(3, 1), name="Bar")], - extend_declarations=[ - ExtendElement( - location=self.location.at(4, 3), - name="Foo", - documentation="", - fields=[ - FieldElement( - location=self.location.at(5, 5), - label=Field.Label.OPTIONAL, - element_type="Bar", - name="bar", - tag=126 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_fqcn_extend_in_message(self): - proto = """ - |message Bar { - | extend example.Foo { - | optional Bar bar = 126; - | } - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[MessageElement(location=self.location.at(1, 1), name="Bar")], - extend_declarations=[ - ExtendElement( - location=self.location.at(2, 3), - name="example.Foo", - documentation="", - fields=[ - FieldElement( - location=self.location.at(3, 5), - label=Field.Label.OPTIONAL, - element_type="Bar", - name="bar", - tag=126 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_fqcn_extend_in_message_with_package(self): - proto = """ - |package kit.kat; - | - |message Bar { - | extend example.Foo { - | optional Bar bar = 126; - | } - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - package_name="kit.kat", - types=[MessageElement(location=self.location.at(3, 1), name="Bar")], - extend_declarations=[ - ExtendElement( - location=self.location.at(4, 3), - name="example.Foo", - documentation="", - fields=[ - FieldElement( - location=self.location.at(5, 5), - label=Field.Label.OPTIONAL, - element_type="Bar", - name="bar", - tag=126 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_default_field_with_paren(self): - proto = """ - |message Foo { - | optional string claim_token = 2[(squareup.redacted) = true]; - |} - """ - proto = trim_margin(proto) - field = FieldElement( - location=self.location.at(2, 3), - label=Field.Label.OPTIONAL, - element_type="string", - name="claim_token", - tag=2, - options=[OptionElement("squareup.redacted", OptionElement.Kind.BOOLEAN, "true", True)] - ) - self.assertTrue(len(field.options) == 1) - self.assertTrue(OptionElement("squareup.redacted", OptionElement.Kind.BOOLEAN, "true", True) in field.options) - - message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) - expected = ProtoFileElement(location=self.location, types=[message_element]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - # Parse \a, \b, \f, \n, \r, \t, \v, \[0-7]{1-3}, and \[xX]{0-9a-fA-F]{1,2} - def test_default_field_with_string_escapes(self): - proto = r""" - |message Foo { - | optional string name = 1 [ - | x = "\a\b\f\n\r\t\v\1f\01\001\11\011\111\xe\Xe\xE\xE\x41\x41" - | ]; - |} - """ - proto = trim_margin(proto) - field = FieldElement( - location=self.location.at(2, 3), - label=Field.Label.OPTIONAL, - element_type="string", - name="name", - tag=1, - options=[ - OptionElement( - "x", OptionElement.Kind.STRING, - "\u0007\b\u000C\n\r\t\u000b\u0001f\u0001\u0001\u0009\u0009I\u000e\u000e\u000e\u000eAA" - ) - ] - ) - self.assertTrue(len(field.options) == 1) - self.assertTrue( + proto = trim_margin(proto) + with pytest.raises( + IllegalStateException, + match="Syntax error in file.proto:5:3: 'required' label forbidden in proto3 field " + "declarations" + ): + ProtoParser.parse(location, proto) + pytest.fail("") + + +def test_proto3_message_fields_permit_repeated(): + proto = """ + |syntax = "proto3"; + |message Message { + | repeated string a = 1; + |} + """ + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_3, + types=[ + MessageElement( + location=location.at(2, 1), + name="Message", + fields=[ + FieldElement( + location=location.at(3, 3), label=Field.Label.REPEATED, element_type="string", name="a", tag=1 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto3_extension_fields_permit_repeated(): + proto = """ + |syntax = "proto3"; + |message Message { + |} + |extend Message { + | repeated string a = 1; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_3, + types=[MessageElement(location=location.at(2, 1), name="Message")], + extend_declarations=[ + ExtendElement( + location=location.at(4, 1), + name="Message", + documentation="", + fields=[ + FieldElement( + location=location.at(5, 3), label=Field.Label.REPEATED, element_type="string", name="a", tag=1 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_parse_message_and_fields(): + proto = """ + |message SearchRequest { + | required string query = 1; + | optional int32 page_number = 2; + | optional int32 result_per_page = 3; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=location.at(2, 3), label=Field.Label.REQUIRED, element_type="string", name="query", tag=1 + ), + FieldElement( + location=location.at(3, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="page_number", + tag=2 + ), + FieldElement( + location=location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="result_per_page", + tag=3 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_group(): + proto = """ + |message SearchResponse { + | repeated group Result = 1 { + | required string url = 2; + | optional string title = 3; + | repeated string snippets = 4; + | } + |} + """ + proto = trim_margin(proto) + message = MessageElement( + location=location.at(1, 1), + name="SearchResponse", + groups=[ + GroupElement( + location=location.at(2, 3), + label=Field.Label.REPEATED, + name="Result", + tag=1, + documentation="", + fields=[ + FieldElement( + location=location.at(3, 5), label=Field.Label.REQUIRED, element_type="string", name="url", tag=2 + ), + FieldElement( + location=location.at(4, 5), label=Field.Label.OPTIONAL, element_type="string", name="title", tag=3 + ), + FieldElement( + location=location.at(5, 5), + label=Field.Label.REPEATED, + element_type="string", + name="snippets", + tag=4 + ) + ] + ) + ] + ) + expected = ProtoFileElement(location=location, types=[message]) + assert ProtoParser.parse(location, proto) == expected + + +def test_parse_message_and_one_of(): + proto = """ + |message SearchRequest { + | required string query = 1; + | oneof page_info { + | int32 page_number = 2; + | int32 result_per_page = 3; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=location.at(2, 3), label=Field.Label.REQUIRED, element_type="string", name="query", tag=1 + ) + ], + one_ofs=[ + OneOfElement( + name="page_info", + documentation="", + fields=[ + FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2), + FieldElement(location=location.at(5, 5), element_type="int32", name="result_per_page", tag=3) + ], + groups=[], + options=[] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_parse_message_and_one_of_with_group(): + proto = """ + |message SearchRequest { + | required string query = 1; + | oneof page_info { + | int32 page_number = 2; + | group Stuff = 3 { + | optional int32 result_per_page = 4; + | optional int32 page_count = 5; + | } + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=location.at(2, 3), label=Field.Label.REQUIRED, element_type="string", name="query", tag=1 + ) + ], + one_ofs=[ + OneOfElement( + name="page_info", + documentation="", + fields=[FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2)], + groups=[ + GroupElement( + label=None, + location=location.at(5, 5), + name="Stuff", + tag=3, + documentation="", + fields=[ + FieldElement( + location=location.at(6, 7), + label=Field.Label.OPTIONAL, + element_type="int32", + name="result_per_page", + tag=4 + ), + FieldElement( + location=location.at(7, 7), + label=Field.Label.OPTIONAL, + element_type="int32", + name="page_count", + tag=5 + ) + ] + ) + ], + options=[] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_parse_enum(): + proto = """ + |/** + | * What's on my waffles. + | * Also works on pancakes. + | */ + |enum Topping { + | FRUIT = 1; + | /** Yummy, yummy cream. */ + | CREAM = 2; + | + | // Quebec Maple syrup + | SYRUP = 3; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + EnumElement( + location=location.at(5, 1), + name="Topping", + documentation="What's on my waffles.\nAlso works on pancakes.", + constants=[ + EnumConstantElement(location=location.at(6, 3), name="FRUIT", tag=1, documentation="", options=[]), + EnumConstantElement( + location=location.at(8, 3), name="CREAM", tag=2, documentation="Yummy, yummy cream.", options=[] + ), + EnumConstantElement( + location=location.at(11, 3), name="SYRUP", tag=3, documentation="Quebec Maple syrup", options=[] + ) + ], + options=[] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_parse_enum_with_options(): + proto = """ + |/** + | * What's on my waffles. + | * Also works on pancakes. + | */ + |enum Topping { + | option(max_choices) = 2; + | + | FRUIT = 1[(healthy) = true]; + | /** Yummy, yummy cream. */ + | CREAM = 2; + | + | // Quebec Maple syrup + | SYRUP = 3; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + EnumElement( + location=location.at(5, 1), + name="Topping", + documentation="What's on my waffles.\nAlso works on pancakes.", + options=[OptionElement("max_choices", OptionElement.Kind.NUMBER, "2", True)], + constants=[ + EnumConstantElement( + location=location.at(8, 3), + name="FRUIT", + tag=1, + documentation="", + options=[OptionElement("healthy", OptionElement.Kind.BOOLEAN, "true", True)] + ), + EnumConstantElement( + location=location.at(10, 3), name="CREAM", tag=2, documentation="Yummy, yummy cream.", options=[] + ), + EnumConstantElement( + location=location.at(13, 3), name="SYRUP", tag=3, documentation="Quebec Maple syrup", options=[] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_package_declaration(): + proto = """ + |package google.protobuf; + |option java_package = "com.google.protobuf"; + | + |// The protocol compiler can output a FileDescriptorSet containing the .proto + |// files it parses. + |message FileDescriptorSet { + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + package_name="google.protobuf", + types=[ + MessageElement( + location=location.at(6, 1), + name="FileDescriptorSet", + documentation="The protocol compiler can output a FileDescriptorSet containing the .proto\nfiles " + "it parses." + ) + ], + options=[OptionElement("java_package", OptionElement.Kind.STRING, "com.google.protobuf")] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_nesting_in_message(): + proto = """ + |message FieldOptions { + | optional CType ctype = 1[old_default = STRING, deprecated = true]; + | enum CType { + | STRING = 0[(opt_a) = 1, (opt_b) = 2]; + | }; + | // Clients can define custom options in extensions of this message. See above. + | extensions 500; + | extensions 1000 to max; + |} + """ + proto = trim_margin(proto) + enum_element = EnumElement( + location=location.at(3, 3), + name="CType", + documentation="", + constants=[ + EnumConstantElement( + location=location.at(4, 5), + name="STRING", + tag=0, + documentation="", + options=[ + OptionElement("opt_a", OptionElement.Kind.NUMBER, "1", True), + OptionElement("opt_b", OptionElement.Kind.NUMBER, "2", True) + ] + ) + ], + options=[] + ) + field = FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="CType", + name="ctype", + tag=1, + options=[ + OptionElement("old_default", OptionElement.Kind.ENUM, "STRING"), + OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") + ] + ) + + assert len(field.options) == 2 + assert OptionElement("old_default", OptionElement.Kind.ENUM, "STRING") in field.options + assert OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") in field.options + + message_element = MessageElement( + location=location.at(1, 1), + name="FieldOptions", + fields=[field], + nested_types=[enum_element], + extensions=[ + ExtensionsElement( + location=location.at(7, 3), + documentation="Clients can define custom options in extensions of this message. See above.", + values=[500] + ), + ExtensionsElement(location.at(8, 3), "", [KotlinRange(1000, MAX_TAG_VALUE)]) + ] + ) + expected = ProtoFileElement(location=location, types=[message_element]) + actual = ProtoParser.parse(location, proto) + assert actual == expected + + +def test_multi_ranges_extensions(): + proto = """ + |message MeGustaExtensions { + | extensions 1, 5 to 200, 500, 1000 to max; + |} + """ + proto = trim_margin(proto) + message_element = MessageElement( + location=location.at(1, 1), + name="MeGustaExtensions", + documentation="", + fields=[], + nested_types=[], + extensions=[ + ExtensionsElement( + location=location.at(2, 3), + documentation="", + values=[1] + [KotlinRange(5, 200)] + [500] + [KotlinRange(1000, MAX_TAG_VALUE)] + ) + ] + ) + expected = ProtoFileElement(location=location, types=[message_element]) + actual = ProtoParser.parse(location, proto) + assert actual == expected + + +def test_option_parentheses(): + proto = """ + |message Chickens { + | optional bool koka_ko_koka_ko = 1[old_default = true]; + | optional bool coodle_doodle_do = 2[(delay) = 100, old_default = false]; + | optional bool coo_coo_ca_cha = 3[old_default = true, (delay) = 200]; + | optional bool cha_chee_cha = 4; + |} + """ + proto = trim_margin(proto) + + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="Chickens", + fields=[ + FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="koka_ko_koka_ko", + tag=1, + options=[OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true")] + ), + FieldElement( + location=location.at(3, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="coodle_doodle_do", + tag=2, + options=[ + OptionElement("delay", OptionElement.Kind.NUMBER, "100", True), + OptionElement("old_default", OptionElement.Kind.BOOLEAN, "false") + ] + ), + FieldElement( + location=location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="coo_coo_ca_cha", + tag=3, + options=[ + OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true"), + OptionElement("delay", OptionElement.Kind.NUMBER, "200", True) + ] + ), + FieldElement( + location=location.at(5, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="cha_chee_cha", + tag=4 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_imports(): + proto = "import \"src/test/resources/unittest_import.proto\";\n" + expected = ProtoFileElement(location=location, imports=["src/test/resources/unittest_import.proto"]) + assert ProtoParser.parse(location, proto) == expected + + +def test_public_imports(): + proto = "import public \"src/test/resources/unittest_import.proto\";\n" + expected = ProtoFileElement(location=location, public_imports=["src/test/resources/unittest_import.proto"]) + assert ProtoParser.parse(location, proto) == expected + + +def test_extend(): + proto = """ + |// Extends Foo + |extend Foo { + | optional int32 bar = 126; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + extend_declarations=[ + ExtendElement( + location=location.at(2, 1), + name="Foo", + documentation="Extends Foo", + fields=[ + FieldElement( + location=location.at(3, 3), label=Field.Label.OPTIONAL, element_type="int32", name="bar", tag=126 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_extend_in_message(): + proto = """ + |message Bar { + | extend Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[MessageElement(location=location.at(1, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=location.at(2, 3), + name="Foo", + documentation="", + fields=[ + FieldElement( + location=location.at(3, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_extend_in_message_with_package(): + proto = """ + |package kit.kat; + | + |message Bar { + | extend Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + package_name="kit.kat", + types=[MessageElement(location=location.at(3, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=location.at(4, 3), + name="Foo", + documentation="", + fields=[ + FieldElement( + location=location.at(5, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_fqcn_extend_in_message(): + proto = """ + |message Bar { + | extend example.Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[MessageElement(location=location.at(1, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=location.at(2, 3), + name="example.Foo", + documentation="", + fields=[ + FieldElement( + location=location.at(3, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_fqcn_extend_in_message_with_package(): + proto = """ + |package kit.kat; + | + |message Bar { + | extend example.Foo { + | optional Bar bar = 126; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + package_name="kit.kat", + types=[MessageElement(location=location.at(3, 1), name="Bar")], + extend_declarations=[ + ExtendElement( + location=location.at(4, 3), + name="example.Foo", + documentation="", + fields=[ + FieldElement( + location=location.at(5, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_default_field_with_paren(): + proto = """ + |message Foo { + | optional string claim_token = 2[(squareup.redacted) = true]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="claim_token", + tag=2, + options=[OptionElement("squareup.redacted", OptionElement.Kind.BOOLEAN, "true", True)] + ) + assert len(field.options) == 1 + assert OptionElement("squareup.redacted", OptionElement.Kind.BOOLEAN, "true", True) in field.options + + message_element = MessageElement(location=location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=location, types=[message_element]) + assert ProtoParser.parse(location, proto) == expected + + +# Parse \a, \b, \f, \n, \r, \t, \v, \[0-7]{1-3}, and \[xX]{0-9a-fA-F]{1,2} +def test_default_field_with_string_escapes(): + proto = r""" + |message Foo { + | optional string name = 1 [ + | x = "\a\b\f\n\r\t\v\1f\01\001\11\011\111\xe\Xe\xE\xE\x41\x41" + | ]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="name", + tag=1, + options=[ OptionElement( "x", OptionElement.Kind.STRING, "\u0007\b\u000C\n\r\t\u000b\u0001f\u0001\u0001\u0009\u0009I\u000e\u000e\u000e\u000eAA" - ) in field.options - ) - - message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) - expected = ProtoFileElement(location=self.location, types=[message_element]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_string_with_single_quotes(self): - proto = r""" - |message Foo { - | optional string name = 1[default = 'single\"quotes']; - |} - """ - proto = trim_margin(proto) - - field = FieldElement( - location=self.location.at(2, 3), - label=Field.Label.OPTIONAL, - element_type="string", - name="name", - tag=1, - default_value="single\"quotes" - ) - message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) - expected = ProtoFileElement(location=self.location, types=[message_element]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_adjacent_strings_concatenated(self): - proto = """ - |message Foo { - | optional string name = 1 [ - | default = "concat " - | 'these ' - | "please" - | ]; - |} - """ - proto = trim_margin(proto) - - field = FieldElement( - location=self.location.at(2, 3), - label=Field.Label.OPTIONAL, - element_type="string", - name="name", - tag=1, - default_value="concat these please" - ) - message_element = MessageElement(location=self.location.at(1, 1), name="Foo", fields=[field]) - expected = ProtoFileElement(location=self.location, types=[message_element]) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_invalid_hex_string_escape(self): - proto = r""" - |message Foo { - | optional string name = 1 [default = "\xW"]; - |} - """ - proto = trim_margin(proto) - with self.assertRaises(IllegalStateException) as re: - ProtoParser.parse(self.location, proto) - self.fail() - self.assertIn("expected a digit after \\x or \\X", re.exception.message) - - def test_service(self): - proto = """ - |service SearchService { - | option (default_timeout) = 30; - | - | rpc Search (SearchRequest) returns (SearchResponse); - | rpc Purchase (PurchaseRequest) returns (PurchaseResponse) { - | option (squareup.sake.timeout) = 15; - | option (squareup.a.b) = { - | value: [ - | FOO, - | BAR - | ] - | }; - | } - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - services=[ - ServiceElement( - location=self.location.at(1, 1), - name="SearchService", - documentation="", - options=[OptionElement("default_timeout", OptionElement.Kind.NUMBER, "30", True)], - rpcs=[ - RpcElement( - location=self.location.at(4, 3), - name="Search", - documentation="", - request_type="SearchRequest", - response_type="SearchResponse", - options=[], - response_streaming=False, - request_streaming=False - ), - RpcElement( - location=self.location.at(5, 3), - name="Purchase", - documentation="", - request_type="PurchaseRequest", - response_type="PurchaseResponse", - options=[ - OptionElement("squareup.sake.timeout", OptionElement.Kind.NUMBER, "15", True), - OptionElement("squareup.a.b", OptionElement.Kind.MAP, {"value": ["FOO", "BAR"]}, True) - ], - request_streaming=False, - response_streaming=False - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_streaming_service(self): - proto = """ - |service RouteGuide { - | rpc GetFeature (Point) returns (Feature) {} - | rpc ListFeatures (Rectangle) returns (stream Feature) {} - | rpc RecordRoute (stream Point) returns (RouteSummary) {} - | rpc RouteChat (stream RouteNote) returns (stream RouteNote) {} - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - services=[ - ServiceElement( - location=self.location.at(1, 1), - name="RouteGuide", - documentation="", - rpcs=[ - RpcElement( - location=self.location.at(2, 3), - name="GetFeature", - documentation="", - request_type="Point", - response_type="Feature", - options=[], - response_streaming=False, - request_streaming=False - ), - RpcElement( - location=self.location.at(3, 3), - name="ListFeatures", - documentation="", - request_type="Rectangle", - response_type="Feature", - response_streaming=True, - # TODO: Report Square.Wire there was mistake True instead of False! - request_streaming=False, - options=[] - ), - RpcElement( - location=self.location.at(4, 3), - name="RecordRoute", - documentation="", - request_type="Point", - response_type="RouteSummary", - request_streaming=True, - response_streaming=False, - options=[] - ), - RpcElement( - location=self.location.at(5, 3), - name="RouteChat", - documentation="", - request_type="RouteNote", - response_type="RouteNote", - request_streaming=True, - response_streaming=True, - options=[] - ) - ], - options=[] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_hex_tag(self): - proto = """ - |message HexTag { - | required string hex = 0x10; - | required string uppercase_x_hex = 0X11; - |} - """ - proto = trim_margin(proto) - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="HexTag", - fields=[ - FieldElement( - location=self.location.at(2, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="hex", - tag=16 - ), - FieldElement( - location=self.location.at(3, 3), - label=Field.Label.REQUIRED, - element_type="string", - name="uppercase_x_hex", - tag=17 - ) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_structured_option(self): - proto = """ - |message ExoticOptions { - | option (squareup.one) = {name: "Name", class_name:"ClassName"}; - | option (squareup.two.a) = {[squareup.options.type]: EXOTIC}; - | option (squareup.two.b) = {names: ["Foo", "Bar"]}; - |} - """ - # TODO: we do not support it yet - # - # | option (squareup.three) = {x: {y: 1 y: 2 } }; // NOTE: Omitted optional comma - # | option (squareup.four) = {x: {y: {z: 1 }, y: {z: 2 }}}; - # - # - # - proto = trim_margin(proto) - - option_one_map = {"name": "Name", "class_name": "ClassName"} - - option_two_a_map = {"[squareup.options.type]": "EXOTIC"} - - option_two_b_map = {"names": ["Foo", "Bar"]} - - # TODO: we do not support it yet - # need create custom dictionary class to support multiple values for one key - # - # option_three_map = {"x": {"y": 1, "y": 2}} - # option_four_map = {"x": ["y": {"z": 1}, "y": {"z": 2}]} - - expected = ProtoFileElement( - location=self.location, - types=[ - MessageElement( - location=self.location.at(1, 1), - name="ExoticOptions", - options=[ - OptionElement("squareup.one", OptionElement.Kind.MAP, option_one_map, True), - OptionElement("squareup.two.a", OptionElement.Kind.MAP, option_two_a_map, True), - OptionElement("squareup.two.b", OptionElement.Kind.MAP, option_two_b_map, True), - # OptionElement("squareup.three", OptionElement.Kind.MAP, option_three_map, True), - # OptionElement("squareup.four", OptionElement.Kind.MAP, option_four_map, True) - ] - ) - ] - ) - self.assertEqual(ProtoParser.parse(self.location, proto), expected) - - def test_options_with_nested_maps_and_trailing_commas(self): - proto = """ - |message StructuredOption { - | optional field.type has_options = 3 [ - | (option_map) = { - | nested_map: {key:"value", key2:["value2a","value2b"]}, - | }, - | (option_string) = ["string1","string2"] - | ]; - |} - """ - proto = trim_margin(proto) - field = FieldElement( - location=self.location.at(2, 5), - label=Field.Label.OPTIONAL, - element_type="field.type", - name="has_options", - tag=3, - options=[ - OptionElement( - "option_map", OptionElement.Kind.MAP, {"nested_map": { - "key": "value", - "key2": ["value2a", "value2b"] - }}, True - ), - OptionElement("option_string", OptionElement.Kind.LIST, ["string1", "string2"], True) - ] - ) - self.assertTrue(len(field.options) == 2) - self.assertTrue( + ) + ] + ) + assert len(field.options) == 1 + assert OptionElement( + "x", OptionElement.Kind.STRING, + "\u0007\b\u000C\n\r\t\u000b\u0001f\u0001\u0001\u0009\u0009I\u000e\u000e\u000e\u000eAA" + ) in field.options + + message_element = MessageElement(location=location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=location, types=[message_element]) + assert ProtoParser.parse(location, proto) == expected + + +def test_string_with_single_quotes(): + proto = r""" + |message Foo { + | optional string name = 1[default = 'single\"quotes']; + |} + """ + proto = trim_margin(proto) + + field = FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="name", + tag=1, + default_value="single\"quotes" + ) + message_element = MessageElement(location=location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=location, types=[message_element]) + assert ProtoParser.parse(location, proto) == expected + + +def test_adjacent_strings_concatenated(): + proto = """ + |message Foo { + | optional string name = 1 [ + | default = "concat " + | 'these ' + | "please" + | ]; + |} + """ + proto = trim_margin(proto) + + field = FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="name", + tag=1, + default_value="concat these please" + ) + message_element = MessageElement(location=location.at(1, 1), name="Foo", fields=[field]) + expected = ProtoFileElement(location=location, types=[message_element]) + assert ProtoParser.parse(location, proto) == expected + + +def test_invalid_hex_string_escape(): + proto = r""" + |message Foo { + | optional string name = 1 [default = "\xW"]; + |} + """ + proto = trim_margin(proto) + with pytest.raises(IllegalStateException) as re: + ProtoParser.parse(location, proto) + pytest.fail("") + assert "expected a digit after \\x or \\X" in re.value.message + + +def test_service(): + proto = """ + |service SearchService { + | option (default_timeout) = 30; + | + | rpc Search (SearchRequest) returns (SearchResponse); + | rpc Purchase (PurchaseRequest) returns (PurchaseResponse) { + | option (squareup.sake.timeout) = 15; + | option (squareup.a.b) = { + | value: [ + | FOO, + | BAR + | ] + | }; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + services=[ + ServiceElement( + location=location.at(1, 1), + name="SearchService", + documentation="", + options=[OptionElement("default_timeout", OptionElement.Kind.NUMBER, "30", True)], + rpcs=[ + RpcElement( + location=location.at(4, 3), + name="Search", + documentation="", + request_type="SearchRequest", + response_type="SearchResponse", + options=[], + response_streaming=False, + request_streaming=False + ), + RpcElement( + location=location.at(5, 3), + name="Purchase", + documentation="", + request_type="PurchaseRequest", + response_type="PurchaseResponse", + options=[ + OptionElement("squareup.sake.timeout", OptionElement.Kind.NUMBER, "15", True), + OptionElement("squareup.a.b", OptionElement.Kind.MAP, {"value": ["FOO", "BAR"]}, True) + ], + request_streaming=False, + response_streaming=False + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_streaming_service(): + proto = """ + |service RouteGuide { + | rpc GetFeature (Point) returns (Feature) {} + | rpc ListFeatures (Rectangle) returns (stream Feature) {} + | rpc RecordRoute (stream Point) returns (RouteSummary) {} + | rpc RouteChat (stream RouteNote) returns (stream RouteNote) {} + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + services=[ + ServiceElement( + location=location.at(1, 1), + name="RouteGuide", + documentation="", + rpcs=[ + RpcElement( + location=location.at(2, 3), + name="GetFeature", + documentation="", + request_type="Point", + response_type="Feature", + options=[], + response_streaming=False, + request_streaming=False + ), + RpcElement( + location=location.at(3, 3), + name="ListFeatures", + documentation="", + request_type="Rectangle", + response_type="Feature", + response_streaming=True, + # TODO: Report Square.Wire there was mistake True instead of False! + request_streaming=False, + options=[] + ), + RpcElement( + location=location.at(4, 3), + name="RecordRoute", + documentation="", + request_type="Point", + response_type="RouteSummary", + request_streaming=True, + response_streaming=False, + options=[] + ), + RpcElement( + location=location.at(5, 3), + name="RouteChat", + documentation="", + request_type="RouteNote", + response_type="RouteNote", + request_streaming=True, + response_streaming=True, + options=[] + ) + ], + options=[] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_hex_tag(): + proto = """ + |message HexTag { + | required string hex = 0x10; + | required string uppercase_x_hex = 0X11; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="HexTag", + fields=[ + FieldElement( + location=location.at(2, 3), label=Field.Label.REQUIRED, element_type="string", name="hex", tag=16 + ), + FieldElement( + location=location.at(3, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="uppercase_x_hex", + tag=17 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_structured_option(): + proto = """ + |message ExoticOptions { + | option (squareup.one) = {name: "Name", class_name:"ClassName"}; + | option (squareup.two.a) = {[squareup.options.type]: EXOTIC}; + | option (squareup.two.b) = {names: ["Foo", "Bar"]}; + |} + """ + # TODO: we do not support it yet + # + # | option (squareup.three) = {x: {y: 1 y: 2 } }; // NOTE: Omitted optional comma + # | option (squareup.four) = {x: {y: {z: 1 }, y: {z: 2 }}}; + # + # + # + proto = trim_margin(proto) + + option_one_map = {"name": "Name", "class_name": "ClassName"} + + option_two_a_map = {"[squareup.options.type]": "EXOTIC"} + + option_two_b_map = {"names": ["Foo", "Bar"]} + + # TODO: we do not support it yet + # need create custom dictionary class to support multiple values for one key + # + # option_three_map = {"x": {"y": 1, "y": 2}} + # option_four_map = {"x": ["y": {"z": 1}, "y": {"z": 2}]} + + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="ExoticOptions", + options=[ + OptionElement("squareup.one", OptionElement.Kind.MAP, option_one_map, True), + OptionElement("squareup.two.a", OptionElement.Kind.MAP, option_two_a_map, True), + OptionElement("squareup.two.b", OptionElement.Kind.MAP, option_two_b_map, True), + # OptionElement("squareup.three", OptionElement.Kind.MAP, option_three_map, True), + # OptionElement("squareup.four", OptionElement.Kind.MAP, option_four_map, True) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_options_with_nested_maps_and_trailing_commas(): + proto = """ + |message StructuredOption { + | optional field.type has_options = 3 [ + | (option_map) = { + | nested_map: {key:"value", key2:["value2a","value2b"]}, + | }, + | (option_string) = ["string1","string2"] + | ]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=location.at(2, 5), + label=Field.Label.OPTIONAL, + element_type="field.type", + name="has_options", + tag=3, + options=[ OptionElement( "option_map", OptionElement.Kind.MAP, {"nested_map": { "key": "value", "key2": ["value2a", "value2b"] }}, True - ) in field.options - ) - self.assertTrue( - OptionElement("option_string", OptionElement.Kind.LIST, ["string1", "string2"], True) in field.options - ) - - expected = MessageElement(location=self.location.at(1, 1), name="StructuredOption", fields=[field]) - proto_file = ProtoFileElement(location=self.location, types=[expected]) - self.assertEqual(ProtoParser.parse(self.location, proto), proto_file) + ), + OptionElement("option_string", OptionElement.Kind.LIST, ["string1", "string2"], True) + ] + ) + assert len(field.options) == 2 + assert OptionElement( + "option_map", OptionElement.Kind.MAP, {"nested_map": { + "key": "value", + "key2": ["value2a", "value2b"] + }}, True + ) in field.options + assert OptionElement("option_string", OptionElement.Kind.LIST, ["string1", "string2"], True) in field.options + + expected = MessageElement(location=location.at(1, 1), name="StructuredOption", fields=[field]) + proto_file = ProtoFileElement(location=location, types=[expected]) + assert ProtoParser.parse(location, proto) == proto_file From a9dda6f1b6b88cbef9cdaa6ef7a8106d64e9023a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 3 Jun 2021 23:08:22 +0300 Subject: [PATCH 019/168] finished porting of test_proto_parser module, ported test_proto_file_element module --- karapace/protobuf/enum_constant_element.py | 14 +- karapace/protobuf/enum_element.py | 2 +- karapace/protobuf/extend_element.py | 11 +- karapace/protobuf/field_element.py | 4 +- karapace/protobuf/group_element.py | 13 +- karapace/protobuf/kotlin_wrapper.py | 12 +- karapace/protobuf/message_element.py | 23 +- karapace/protobuf/one_of_element.py | 18 +- karapace/protobuf/option_element.py | 3 + karapace/protobuf/proto_file_element.py | 43 +- karapace/protobuf/proto_parser.py | 2 +- karapace/protobuf/reserved_document.py | 7 +- karapace/protobuf/rpc_element.py | 17 +- karapace/protobuf/service_element.py | 14 +- karapace/protobuf/syntax.py | 6 + tests/unit/test_proto_file_element.py | 574 +++++++++++++ tests/unit/test_proto_parser.py | 917 +++++++++++++++++++++ 17 files changed, 1598 insertions(+), 82 deletions(-) create mode 100644 tests/unit/test_proto_file_element.py diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index e669fcc8b..116bd42c1 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -8,7 +8,7 @@ class EnumConstantElement: location: Location name: str tag: int - documentation: str + documentation: str = "" options: list = [] def __init__( @@ -16,17 +16,17 @@ def __init__( location: Location, name: str, tag: int, - documentation: str, - options: list, + documentation: str = "", + options: list = None, ): self.location = location self.name = name self.tag = tag - self.options = options - if not documentation: - self.documentation = "" - else: + if options: + self.options = options + + if documentation: self.documentation = documentation def to_schema(self) -> str: diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index b7b0bf93c..41d7ca28e 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -9,7 +9,7 @@ class EnumElement(TypeElement): constants: list = [] - def __init__(self, location: Location, name: str, documentation: str, options: list, constants: list): + def __init__(self, location: Location, name: str, documentation: str = "", options: list = None, constants: list = None): self.location = location self.name = name self.documentation = documentation diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index bd8e29115..d6164ad47 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -8,14 +8,15 @@ class ExtendElement: location: Location name: str - documentation: str - fields: list + documentation: str = "" + fields: list = [] - def __init__(self, location: Location, name: str, documentation: str, fields: list): + def __init__(self, location: Location, name: str, documentation: str = "", fields: list = None): self.location = location self.name = name self.documentation = documentation - self.fields = fields + if fields: + self.fields = fields def to_schema(self): result: list = list() @@ -27,4 +28,4 @@ def to_schema(self): append_indented(result, field.to_schema()) result.append("}\n") - return result + return "".join(result) diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 7ec3c4c7b..7ab213fde 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -39,9 +39,7 @@ def __init__( self.json_name = json_name self.tag = tag self.documentation = documentation - if not options: - self.options = [] - else: + if options: self.options = options def to_schema(self): diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index f765a1039..5e9e01b94 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -12,17 +12,24 @@ class GroupElement: name: str tag: int documentation: str = "" - fields: list = list() + fields: list = [] def __init__( - self, label: Union[None, Field.Label], location: Location, name: str, tag: int, documentation: str, fields: list + self, + label: Union[None, Field.Label], + location: Location, + name: str, + tag: int, + documentation: str = "", + fields: list = None ): self.label = label self.location = location self.name = name self.tag = tag self.documentation = documentation - self.fields = fields + if self.fields: + self.fields = fields def to_schema(self) -> str: result: list = [] diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index 978d01def..fab12f3fc 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -10,6 +10,12 @@ def trim_margin(s: str) -> str: lines = s.split("\n") new_lines = list() + if not lines[0].strip(): + del lines[0] + + if not lines[-1].strip(): + del lines[-1] + for line in lines: idx = line.find("|") if idx < 0: @@ -17,12 +23,6 @@ def trim_margin(s: str) -> str: else: new_lines.append(line[idx + 1:].rstrip()) - if not new_lines[0].strip(): - del new_lines[0] - - if not new_lines[-1].strip(): - del new_lines[-1] - return "\n".join(new_lines) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index c42308eb6..d188b007c 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -12,6 +12,8 @@ class MessageElement(TypeElement): one_ofs: list = [] extensions: list = [] groups: list = [] + options: list = [] + nested_types: list = [] def __init__( self, @@ -29,13 +31,20 @@ def __init__( self.location = location self.name = name self.documentation = documentation - self.nested_types = nested_types - self.options = options - self.reserveds = reserveds - self.fields = fields - self.one_ofs = one_ofs - self.extensions = extensions - self.groups = groups + if nested_types: + self.nested_types = nested_types + if options: + self.options = options + if reserveds: + self.reserveds = reserveds + if fields: + self.fields = fields + if one_ofs: + self.one_ofs = one_ofs + if extensions: + self.extensions = extensions + if groups: + self.groups = groups def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index d36f001e3..faf886abf 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -7,16 +7,20 @@ class OneOfElement: name: str documentation: str = "" - fields: list = list() - groups: list = list() - options: list = list() + fields: list = [] + groups: list = [] + options: list = [] - def __init__(self, name: str, documentation: str, fields: list, groups: list, options: list): + def __init__(self, name: str, documentation: str = "", fields=None, groups=None, options=None): self.name = name self.documentation = documentation - self.fields = fields - self.groups = groups - self.options = options + + if fields: + self.fields = fields + if options: + self.options = options + if groups: + self.groups = groups def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 43d28ed7c..732ee0420 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -121,3 +121,6 @@ def __repr__(self): def __eq__(self, other): return str(self) == str(other) + + +PACKED_OPTION_ELEMENT = OptionElement("packed", OptionElement.Kind.BOOLEAN, value="true", is_parenthesized=False) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 2e0f9f9c5..cb6cd2d79 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -9,12 +9,12 @@ class ProtoFileElement: location: Location package_name: str syntax: Syntax - imports: list - public_imports: list - types: list - services: list - extend_declarations: list - options: list + imports: list = [] + public_imports: list = [] + types: list = [] + services: list = [] + extend_declarations: list = [] + options: list = [] def __init__( self, @@ -28,28 +28,21 @@ def __init__( extend_declarations=None, options=None ): - - if not options: - options = [] - if not extend_declarations: - extend_declarations = [] - if not services: - services = [] - if not types: - types = [] - if not public_imports: - public_imports = [] - if not imports: - imports = [] self.location = location self.package_name = package_name self.syntax = syntax - self.imports = imports - self.public_imports = public_imports - self.types = types - self.services = services - self.extend_declarations = extend_declarations - self.options = options + if options: + self.options = options + if extend_declarations: + self.extend_declarations = extend_declarations + if services: + self.services = services + if types: + self.types = types + if public_imports: + self.public_imports = public_imports + if imports: + self.imports = imports def to_schema(self): strings: list = [ diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 801bf86a7..8dcca1551 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -58,7 +58,7 @@ def permits_message(self) -> bool: return self in [Context.FILE, Context.MESSAGE] def permits_service(self) -> bool: - return self in [Context.FILE, Context.MESSAGE] + return self in [Context.FILE] def permits_enum(self) -> bool: return self in [Context.FILE, Context.MESSAGE] diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 0550d73c3..11f186a91 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -10,12 +10,13 @@ class ReservedElement: location: Location documentation: str """ A [String] name or [Int] or [IntRange] tag. """ - values: list + values: list = [] - def __init__(self, location: Location, documentation: str, values: list): + def __init__(self, location: Location, documentation: str = "", values: list = None): self.location = location self.documentation = documentation - self.values = values + if values: + self.values = values def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index a61b160c1..d1a7c7bfe 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -13,18 +13,18 @@ class RpcElement: response_type: str request_streaming: bool response_streaming: bool - options: list + options: list = [] def __init__( self, location: Location, name: str, - documentation: str, - request_type: str, - response_type: str, - request_streaming: bool, - response_streaming: bool, - options: list, + documentation: str = "", + request_type: str = "", + response_type: str = "", + request_streaming: bool = False, + response_streaming: bool = False, + options: list = None ): self.location = location self.name = name @@ -33,7 +33,8 @@ def __init__( self.response_type = response_type self.request_streaming = request_streaming self.response_streaming = response_streaming - self.options = options + if options: + self.options = options def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 69cd232fa..af19dce19 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -9,15 +9,17 @@ class ServiceElement: location: Location name: str documentation: str - rpcs: list - options: list + rpcs: list = [] + options: list = [] - def __init__(self, location: Location, name: str, documentation: str, rpcs: list, options: list): + def __init__(self, location: Location, name: str, documentation: str = "", rpcs: list = None, options: list = None): self.location = location self.name = name self.documentation = documentation - self.rpcs = rpcs - self.options = options + if rpcs: + self.rpcs = rpcs + if options: + self.options = options def to_schema(self): result: list = list() @@ -34,4 +36,4 @@ def to_schema(self): append_indented(result, rpc.to_schema()) result.append("}\n") - return result + return "".join(result) diff --git a/karapace/protobuf/syntax.py b/karapace/protobuf/syntax.py index b31b60a6e..85f54e52d 100644 --- a/karapace/protobuf/syntax.py +++ b/karapace/protobuf/syntax.py @@ -12,3 +12,9 @@ class Syntax(Enum): @classmethod def _missing_(cls, string): raise IllegalArgumentException(f"unexpected syntax: {string}") + + def __str__(self): + return self.value + + def __repr__(self): + return self.value diff --git a/tests/unit/test_proto_file_element.py b/tests/unit/test_proto_file_element.py new file mode 100644 index 000000000..445d648d4 --- /dev/null +++ b/tests/unit/test_proto_file_element.py @@ -0,0 +1,574 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElementTest.kt +import copy + +from karapace.protobuf.extend_element import ExtendElement +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.option_element import OptionElement, PACKED_OPTION_ELEMENT +from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.proto_parser import ProtoParser +from karapace.protobuf.service_element import ServiceElement +from karapace.protobuf.syntax import Syntax + +location: Location = Location.get("some/folder", "file.proto") + + +def test_empty_to_schema(): + file = ProtoFileElement(location=location) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_empty_with_package_to_schema(): + file = ProtoFileElement( + location=location, + package_name="example.simple" + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |package example.simple; + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_simple_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + file = ProtoFileElement( + location=location, + types=[element] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |message Message {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_simple_with_imports_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + file = ProtoFileElement( + location=location, + imports=["example.other"], + types=[element] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |import "example.other"; + | + |message Message {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_add_multiple_dependencies(): + element = MessageElement( + location=location, + name="Message" + ) + file = ProtoFileElement( + location=location, + imports=["example.other", "example.another"], + types=[element] + ) + assert len(file.imports) == 2 + + +def test_simple_with_public_imports_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + file = ProtoFileElement( + location=location, + public_imports=["example.other"], + types=[element] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |import public "example.other"; + | + |message Message {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_add_multiple_public_dependencies(): + element = MessageElement( + location=location, + name="Message" + ) + file = ProtoFileElement(location=location, + public_imports=["example.other", "example.another"], + types=[element] + ) + + assert len(file.public_imports) == 2 + + +def test_simple_with_both_imports_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + file = ProtoFileElement(location=location, + imports=["example.thing"], + public_imports=["example.other"], + types=[element] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |import "example.thing"; + |import public "example.other"; + | + |message Message {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_simple_with_services_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + service = ServiceElement( + location=location, + name="Service" + ) + file = ProtoFileElement( + location=location, + types=[element], + services=[service] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |message Message {} + | + |service Service {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_add_multiple_services(): + service1 = ServiceElement( + location=location, + name="Service1" + ) + service2 = ServiceElement( + location=location, + name="Service2" + ) + file = ProtoFileElement( + location=location, + services=[service1, service2] + ) + assert len(file.services) == 2 + + +def test_simple_with_options_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + option = OptionElement("kit", OptionElement.Kind.STRING, "kat") + file = ProtoFileElement( + location=location, + options=[option], + types=[element] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |option kit = "kat"; + | + |message Message {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_add_multiple_options(): + element = MessageElement( + location=location, + name="Message" + ) + kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") + foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") + file = ProtoFileElement( + location=location, + options=[kit_kat, foo_bar], + types=[element] + ) + assert len(file.options) == 2 + + +def test_simple_with_extends_to_schema(): + file = ProtoFileElement( + location=location, + extend_declarations=[ExtendElement(location=location.at(5, 1), name="Extend")], + types=[MessageElement(location=location, name="Message")] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |message Message {} + | + |extend Extend {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_add_multiple_extends(): + extend1 = ExtendElement(location=location, name="Extend1") + extend2 = ExtendElement(location=location, name="Extend2") + file = ProtoFileElement( + location=location, + extend_declarations=[extend1, extend2] + ) + assert len(file.extend_declarations) == 2 + + +def test_multiple_everything_to_schema(): + element1 = MessageElement(location=location.at(12, 1), name="Message1") + element2 = MessageElement(location=location.at(14, 1), name="Message2") + extend1 = ExtendElement(location=location.at(16, 1), name="Extend1") + extend2 = ExtendElement(location=location.at(18, 1), name="Extend2") + option1 = OptionElement("kit", OptionElement.Kind.STRING, "kat") + option2 = OptionElement("foo", OptionElement.Kind.STRING, "bar") + service1 = ServiceElement( + location=location.at(20, 1), + name="Service1" + ) + service2 = ServiceElement( + location=location.at(22, 1), + name="Service2" + ) + file = ProtoFileElement( + location=location, + package_name="example.simple", + imports=["example.thing"], + public_imports=["example.other"], + types=[element1, element2], + services=[service1, service2], + extend_declarations=[extend1, extend2], + options=[option1, option2] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |package example.simple; + | + |import "example.thing"; + |import public "example.other"; + | + |option kit = "kat"; + |option foo = "bar"; + | + |message Message1 {} + | + |message Message2 {} + | + |extend Extend1 {} + | + |extend Extend2 {} + | + |service Service1 {} + | + |service Service2 {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + # Re-parse the expected string into a ProtoFile and ensure they're equal. + parsed = ProtoParser.parse(location, expected) + assert parsed == file + + +def test_syntax_to_schema(): + element = MessageElement(location=location, name="Message") + file = ProtoFileElement( + location=location, + syntax=Syntax.PROTO_2, + types=[element] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |syntax = "proto2"; + | + |message Message {} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + +def test_default_is_set_in_proto2(): + field = FieldElement( + location=location.at(12, 3), + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + default_value="defaultValue" + ) + message = MessageElement( + location=location.at(11, 1), + name="Message", + fields=[field] + ) + file = ProtoFileElement( + syntax=Syntax.PROTO_2, + location=location, + package_name="example.simple", + imports=["example.thing"], + public_imports=["example.other"], + types=[message] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |syntax = "proto2"; + | + |package example.simple; + | + |import "example.thing"; + |import public "example.other"; + | + |message Message { + | required string name = 1 [default = "defaultValue"]; + |} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + # Re-parse the expected string into a ProtoFile and ensure they're equal. + parsed = ProtoParser.parse(location, expected) + assert parsed == file + + +def test_convert_packed_option_from_wire_schema_in_proto2(): + field_numeric = FieldElement( + location=location.at(9, 3), + label=Field.Label.REPEATED, + element_type="int32", + name="numeric_without_packed_option", + tag=1 + ) + field_numeric_packed_true = FieldElement( + location=location.at(11, 3), + label=Field.Label.REPEATED, + element_type="int32", + name="numeric_packed_true", + tag=2, + options=[PACKED_OPTION_ELEMENT] + ) + el = copy.copy(PACKED_OPTION_ELEMENT) + el.value = "false" + field_numeric_packed_false = FieldElement( + location=location.at(13, 3), + label=Field.Label.REPEATED, + element_type="int32", + name="numeric_packed_false", + tag=3, + options=[el] + ) + field_string = FieldElement( + location=location.at(15, 3), + label=Field.Label.REPEATED, + element_type="string", + name="string_without_packed_option", + tag=4 + ) + field_string_packed_true = FieldElement( + location=location.at(17, 3), + label=Field.Label.REPEATED, + element_type="string", + name="string_packed_true", + tag=5, + options=[PACKED_OPTION_ELEMENT] + ) + el = copy.copy(PACKED_OPTION_ELEMENT) + el.value = "false" + field_string_packed_false = FieldElement( + location=location.at(19, 3), + label=Field.Label.REPEATED, + element_type="string", + name="string_packed_false", + tag=6, + options=[el] + ) + + message = MessageElement( + location=location.at(8, 1), + name="Message", + fields=[field_numeric, field_numeric_packed_true, field_numeric_packed_false, field_string, + field_string_packed_true, field_string_packed_false]) + file = ProtoFileElement( + syntax=Syntax.PROTO_2, + location=location, + package_name="example.simple", + imports=[], + public_imports=[], + types=[message] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |syntax = "proto2"; + | + |package example.simple; + | + |message Message { + | repeated int32 numeric_without_packed_option = 1; + | + | repeated int32 numeric_packed_true = 2 [packed = true]; + | + | repeated int32 numeric_packed_false = 3 [packed = false]; + | + | repeated string string_without_packed_option = 4; + | + | repeated string string_packed_true = 5 [packed = true]; + | + | repeated string string_packed_false = 6 [packed = false]; + |} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + # Re-parse the expected string into a ProtoFile and ensure they're equal. + parsed = ProtoParser.parse(location, expected) + assert parsed == file + + +def test_convert_packed_option_from_wire_schema_in_proto3(): + field_numeric = FieldElement( + location=location.at(9, 3), + label=Field.Label.REPEATED, + element_type="int32", + name="numeric_without_packed_option", + tag=1 + ) + field_numeric_packed_true = FieldElement( + location=location.at(11, 3), + label=Field.Label.REPEATED, + element_type="int32", + name="numeric_packed_true", + tag=2, + options=[PACKED_OPTION_ELEMENT] + ) + el = copy.copy(PACKED_OPTION_ELEMENT) + el.value = "false" + field_numeric_packed_false = FieldElement( + location=location.at(13, 3), + label=Field.Label.REPEATED, + element_type="int32", + name="numeric_packed_false", + tag=3, + options=[el] + ) + field_string = FieldElement( + location=location.at(15, 3), + label=Field.Label.REPEATED, + element_type="string", + name="string_without_packed_option", + tag=4 + ) + field_string_packed_true = FieldElement( + location=location.at(17, 3), + label=Field.Label.REPEATED, + element_type="string", + name="string_packed_true", + tag=5, + options=[PACKED_OPTION_ELEMENT] + ) + el = copy.copy(PACKED_OPTION_ELEMENT) + el.value = "false" + field_string_packed_false = FieldElement( + location=location.at(19, 3), + label=Field.Label.REPEATED, + element_type="string", + name="string_packed_false", + tag=6, + options=[el] + ) + + message = MessageElement( + location=location.at(8, 1), + name="Message", + fields=[field_numeric, field_numeric_packed_true, field_numeric_packed_false, field_string, + field_string_packed_true, field_string_packed_false] + ) + file = ProtoFileElement( + syntax=Syntax.PROTO_3, + location=location, + package_name="example.simple", + imports=[], + public_imports=[], + types=[message] + ) + expected = """ + |// Proto schema formatted by Wire, do not edit. + |// Source: file.proto + | + |syntax = "proto3"; + | + |package example.simple; + | + |message Message { + | repeated int32 numeric_without_packed_option = 1; + | + | repeated int32 numeric_packed_true = 2 [packed = true]; + | + | repeated int32 numeric_packed_false = 3 [packed = false]; + | + | repeated string string_without_packed_option = 4; + | + | repeated string string_packed_true = 5 [packed = true]; + | + | repeated string string_packed_false = 6 [packed = false]; + |} + |""" + expected = trim_margin(expected) + assert file.to_schema() == expected + + # Re-parse the expected string into a ProtoFile and ensure they're equal. + parsed = ProtoParser.parse(location, expected) + assert parsed == file diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py index 04636b931..2dbbbdbbd 100644 --- a/tests/unit/test_proto_parser.py +++ b/tests/unit/test_proto_parser.py @@ -16,6 +16,7 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.protobuf.proto_parser import ProtoParser +from karapace.protobuf.reserved_document import ReservedElement from karapace.protobuf.rpc_element import RpcElement from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax @@ -1726,3 +1727,919 @@ def test_options_with_nested_maps_and_trailing_commas(): expected = MessageElement(location=location.at(1, 1), name="StructuredOption", fields=[field]) proto_file = ProtoFileElement(location=location, types=[expected]) assert ProtoParser.parse(location, proto) == proto_file + + +def test_option_numerical_bounds(): + proto = r""" + |message Test { + | optional int32 default_int32 = 401 [x = 2147483647]; + | optional uint32 default_uint32 = 402 [x = 4294967295]; + | optional sint32 default_sint32 = 403 [x = -2147483648]; + | optional fixed32 default_fixed32 = 404 [x = 4294967295]; + | optional sfixed32 default_sfixed32 = 405 [x = -2147483648]; + | optional int64 default_int64 = 406 [x = 9223372036854775807]; + | optional uint64 default_uint64 = 407 [x = 18446744073709551615]; + | optional sint64 default_sint64 = 408 [x = -9223372036854775808]; + | optional fixed64 default_fixed64 = 409 [x = 18446744073709551615]; + | optional sfixed64 default_sfixed64 = 410 [x = -9223372036854775808]; + | optional bool default_bool = 411 [x = true]; + | optional float default_float = 412 [x = 123.456e7]; + | optional double default_double = 413 [x = 123.456e78]; + | optional string default_string = 414 """ + \ + r"""[x = "çok\a\b\f\n\r\t\v\1\01\001\17\017\176\x1\x01\x11\X1\X01\X11güzel" ]; + | optional bytes default_bytes = 415 """ + \ + r"""[x = "çok\a\b\f\n\r\t\v\1\01\001\17\017\176\x1\x01\x11\X1\X01\X11güzel" ]; + | optional NestedEnum default_nested_enum = 416 [x = A ]; + |}""" + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="Test", + fields=[ + FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="default_int32", + tag=401, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "2147483647")] + ), + FieldElement( + location=location.at(3, 3), + label=Field.Label.OPTIONAL, + element_type="uint32", + name="default_uint32", + tag=402, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "4294967295")] + ), + FieldElement( + location=location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="sint32", + name="default_sint32", + tag=403, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "-2147483648")] + ), + FieldElement( + location=location.at(5, 3), + label=Field.Label.OPTIONAL, + element_type="fixed32", + name="default_fixed32", + tag=404, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "4294967295")] + ), + FieldElement( + location=location.at(6, 3), + label=Field.Label.OPTIONAL, + element_type="sfixed32", + name="default_sfixed32", + tag=405, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "-2147483648")] + ), + FieldElement( + location=location.at(7, 3), + label=Field.Label.OPTIONAL, + element_type="int64", + name="default_int64", + tag=406, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "9223372036854775807")] + ), + FieldElement( + location=location.at(8, 3), + label=Field.Label.OPTIONAL, + element_type="uint64", + name="default_uint64", + tag=407, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "18446744073709551615")] + ), + FieldElement( + location=location.at(9, 3), + label=Field.Label.OPTIONAL, + element_type="sint64", + name="default_sint64", + tag=408, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "-9223372036854775808")] + ), + FieldElement( + location=location.at(10, 3), + label=Field.Label.OPTIONAL, + element_type="fixed64", + name="default_fixed64", + tag=409, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "18446744073709551615")] + ), + FieldElement( + location=location.at(11, 3), + label=Field.Label.OPTIONAL, + element_type="sfixed64", + name="default_sfixed64", + tag=410, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "-9223372036854775808")] + ), + FieldElement( + location=location.at(12, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="default_bool", + tag=411, + options=[OptionElement("x", OptionElement.Kind.BOOLEAN, "true")] + ), + FieldElement( + location=location.at(13, 3), + label=Field.Label.OPTIONAL, + element_type="float", + name="default_float", + tag=412, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "123.456e7")] + ), + FieldElement( + location=location.at(14, 3), + label=Field.Label.OPTIONAL, + element_type="double", + name="default_double", + tag=413, + options=[OptionElement("x", OptionElement.Kind.NUMBER, "123.456e78")] + ), + FieldElement( + location=location.at(15, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="default_string", + tag=414, + options=[ + OptionElement( + "x", OptionElement.Kind.STRING, + "çok\u0007\b\u000C\n\r\t\u000b\u0001\u0001\u0001\u000f\u000f~\u0001\u0001\u0011" + "\u0001\u0001\u0011güzel" + ) + ] + ), + FieldElement( + location=location.at(17, 3), + label=Field.Label.OPTIONAL, + element_type="bytes", + name="default_bytes", + tag=415, + options=[ + OptionElement( + "x", OptionElement.Kind.STRING, + "çok\u0007\b\u000C\n\r\t\u000b\u0001\u0001\u0001\u000f\u000f~\u0001\u0001\u0011" + "\u0001\u0001\u0011güzel" + ) + ] + ), + FieldElement( + location=location.at(19, 3), + label=Field.Label.OPTIONAL, + element_type="NestedEnum", + name="default_nested_enum", + tag=416, + options=[OptionElement("x", OptionElement.Kind.ENUM, "A")] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_extension_with_nested_message(): + proto = """ + |message Foo { + | optional int32 bar = 1[ + | (validation.range).min = 1, + | (validation.range).max = 100, + | old_default = 20 + | ]; + |} + """ + proto = trim_margin(proto) + field = FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="int32", + name="bar", + tag=1, + options=[ + OptionElement( + "validation.range", OptionElement.Kind.OPTION, OptionElement("min", OptionElement.Kind.NUMBER, "1"), True + ), + OptionElement( + "validation.range", OptionElement.Kind.OPTION, OptionElement("max", OptionElement.Kind.NUMBER, "100"), True + ), + OptionElement("old_default", OptionElement.Kind.NUMBER, "20") + ] + ) + assert len(field.options) == 3 + assert OptionElement( + "validation.range", OptionElement.Kind.OPTION, OptionElement("min", OptionElement.Kind.NUMBER, "1"), True + ) in field.options + + assert OptionElement( + "validation.range", OptionElement.Kind.OPTION, OptionElement("max", OptionElement.Kind.NUMBER, "100"), True + ) in field.options + + assert OptionElement("old_default", OptionElement.Kind.NUMBER, "20") in field.options + + expected = MessageElement(location=location.at(1, 1), name="Foo", fields=[field]) + proto_file = ProtoFileElement(location=location, types=[expected]) + assert ProtoParser.parse(location, proto) == proto_file + + +def test_reserved(): + proto = """ + |message Foo { + | reserved 10, 12 to 14, 'foo'; + |} + """ + proto = trim_margin(proto) + message = MessageElement( + location=location.at(1, 1), + name="Foo", + reserveds=[ReservedElement(location=location.at(2, 3), values=[10, KotlinRange(12, 14), "foo"], documentation="")] + ) + expected = ProtoFileElement(location=location, types=[message]) + assert ProtoParser.parse(location, proto) == expected + + +def test_reserved_with_comments(): + proto = """ + |message Foo { + | optional string a = 1; // This is A. + | reserved 2; // This is reserved. + | optional string c = 3; // This is C. + |} + """ + proto = trim_margin(proto) + message = MessageElement( + location=location.at(1, 1), + name="Foo", + fields=[ + FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="a", + tag=1, + documentation="This is A." + ), + FieldElement( + location=location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="c", + tag=3, + documentation="This is C." + ) + ], + reserveds=[ReservedElement(location=location.at(3, 3), values=[2], documentation="This is reserved.")] + ) + expected = ProtoFileElement(location=location, types=[message]) + assert ProtoParser.parse(location, proto) == expected + + +def test_no_whitespace(): + proto = "message C {optional A.B ab = 1;}" + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="C", + fields=[ + FieldElement( + location=location.at(1, 12), label=Field.Label.OPTIONAL, element_type="A.B", name="ab", tag=1 + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_deep_option_assignments(): + proto = """ + |message Foo { + | optional string a = 1 [(wire.my_field_option).baz.value = "a"]; + |} + |""" + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="Foo", + fields=[ + FieldElement( + location=location.at(2, 3), + label=Field.Label.OPTIONAL, + element_type="string", + name="a", + tag=1, + options=[ + OptionElement( + name="wire.my_field_option", + kind=OptionElement.Kind.OPTION, + is_parenthesized=True, + value=OptionElement( + name="baz", + kind=OptionElement.Kind.OPTION, + is_parenthesized=False, + value=OptionElement( + name="value", kind=OptionElement.Kind.STRING, is_parenthesized=False, value="a" + ) + ) + ) + ] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto_keyword_as_enum_constants(): + # Note: this is consistent with protoc. + proto = """ + |enum Foo { + | syntax = 0; + | import = 1; + | package = 2; + | // option = 3; + | // reserved = 4; + | message = 5; + | enum = 6; + | service = 7; + | extend = 8; + | rpc = 9; + | oneof = 10; + | extensions = 11; + |} + |""" + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + EnumElement( + location=location.at(1, 1), + name="Foo", + constants=[ + EnumConstantElement(location.at(2, 3), "syntax", 0), + EnumConstantElement(location.at(3, 3), "import", 1), + EnumConstantElement(location.at(4, 3), "package", 2), + EnumConstantElement(location.at(7, 3), "message", 5, documentation="option = 3;\nreserved = 4;"), + EnumConstantElement(location.at(8, 3), "enum", 6), + EnumConstantElement(location.at(9, 3), "service", 7), + EnumConstantElement(location.at(10, 3), "extend", 8), + EnumConstantElement(location.at(11, 3), "rpc", 9), + EnumConstantElement(location.at(12, 3), "oneof", 10), + EnumConstantElement(location.at(13, 3), "extensions", 11), + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto_keyword_as_message_name_and_field_proto2(): + # Note: this is consistent with protoc. + proto = """ + |message syntax { + | optional syntax syntax = 1; + |} + |message import { + | optional import import = 1; + |} + |message package { + | optional package package = 1; + |} + |message option { + | optional option option = 1; + |} + |message reserved { + | optional reserved reserved = 1; + |} + |message message { + | optional message message = 1; + |} + |message enum { + | optional enum enum = 1; + |} + |message service { + | optional service service = 1; + |} + |message extend { + | optional extend extend = 1; + |} + |message rpc { + | optional rpc rpc = 1; + |} + |message oneof { + | optional oneof oneof = 1; + |} + |message extensions { + | optional extensions extensions = 1; + |} + |""" + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="syntax", + fields=[ + FieldElement(location.at(2, 3), label=Field.Label.OPTIONAL, element_type="syntax", name="syntax", tag=1) + ] + ), + MessageElement( + location=location.at(4, 1), + name="import", + fields=[ + FieldElement(location.at(5, 3), label=Field.Label.OPTIONAL, element_type="import", name="import", tag=1) + ] + ), + MessageElement( + location=location.at(7, 1), + name="package", + fields=[ + FieldElement( + location.at(8, 3), label=Field.Label.OPTIONAL, element_type="package", name="package", tag=1 + ) + ] + ), + MessageElement( + location=location.at(10, 1), + name="option", + fields=[ + FieldElement( + location.at(11, 3), label=Field.Label.OPTIONAL, element_type="option", name="option", tag=1 + ) + ] + ), + MessageElement( + location=location.at(13, 1), + name="reserved", + fields=[ + FieldElement( + location.at(14, 3), label=Field.Label.OPTIONAL, element_type="reserved", name="reserved", tag=1 + ) + ] + ), + MessageElement( + location=location.at(16, 1), + name="message", + fields=[ + FieldElement( + location.at(17, 3), label=Field.Label.OPTIONAL, element_type="message", name="message", tag=1 + ) + ] + ), + MessageElement( + location=location.at(19, 1), + name="enum", + fields=[ + FieldElement(location.at(20, 3), label=Field.Label.OPTIONAL, element_type="enum", name="enum", tag=1) + ] + ), + MessageElement( + location=location.at(22, 1), + name="service", + fields=[ + FieldElement( + location.at(23, 3), label=Field.Label.OPTIONAL, element_type="service", name="service", tag=1 + ) + ] + ), + MessageElement( + location=location.at(25, 1), + name="extend", + fields=[ + FieldElement( + location.at(26, 3), label=Field.Label.OPTIONAL, element_type="extend", name="extend", tag=1 + ) + ] + ), + MessageElement( + location=location.at(28, 1), + name="rpc", + fields=[FieldElement(location.at(29, 3), label=Field.Label.OPTIONAL, element_type="rpc", name="rpc", tag=1)] + ), + MessageElement( + location=location.at(31, 1), + name="oneof", + fields=[ + FieldElement(location.at(32, 3), label=Field.Label.OPTIONAL, element_type="oneof", name="oneof", tag=1) + ] + ), + MessageElement( + location=location.at(34, 1), + name="extensions", + fields=[ + FieldElement( + location.at(35, 3), label=Field.Label.OPTIONAL, element_type="extensions", name="extensions", tag=1 + ) + ] + ), + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto_keyword_as_message_name_and_field_proto3(): + # Note: this is consistent with protoc. + proto = """ + |syntax = "proto3"; + |message syntax { + | syntax syntax = 1; + |} + |message import { + | import import = 1; + |} + |message package { + | package package = 1; + |} + |message option { + | option option = 1; + |} + |message reserved { + | // reserved reserved = 1; + |} + |message message { + | // message message = 1; + |} + |message enum { + | // enum enum = 1; + |} + |message service { + | service service = 1; + |} + |message extend { + | // extend extend = 1; + |} + |message rpc { + | rpc rpc = 1; + |} + |message oneof { + | // oneof oneof = 1; + |} + |message extensions { + | // extensions extensions = 1; + |} + |""" + + proto = trim_margin(proto) + expected = ProtoFileElement( + syntax=Syntax.PROTO_3, + location=location, + types=[ + MessageElement( + location=location.at(2, 1), + name="syntax", + fields=[FieldElement(location.at(3, 3), element_type="syntax", name="syntax", tag=1)] + ), + MessageElement( + location=location.at(5, 1), + name="import", + fields=[FieldElement(location.at(6, 3), element_type="import", name="import", tag=1)] + ), + MessageElement( + location=location.at(8, 1), + name="package", + fields=[FieldElement(location.at(9, 3), element_type="package", name="package", tag=1)] + ), + MessageElement( + location=location.at(11, 1), + name="option", + options=[OptionElement(name="option", kind=OptionElement.Kind.NUMBER, value="1", is_parenthesized=False)], + ), + MessageElement( + location=location.at(14, 1), + name="reserved", + ), + MessageElement( + location=location.at(17, 1), + name="message", + ), + MessageElement( + location=location.at(20, 1), + name="enum", + ), + MessageElement( + location=location.at(23, 1), + name="service", + fields=[FieldElement(location.at(24, 3), element_type="service", name="service", tag=1)] + ), + MessageElement( + location=location.at(26, 1), + name="extend", + ), + MessageElement( + location=location.at(29, 1), + name="rpc", + fields=[FieldElement(location.at(30, 3), element_type="rpc", name="rpc", tag=1)] + ), + MessageElement( + location=location.at(32, 1), + name="oneof", + ), + MessageElement( + location=location.at(35, 1), + name="extensions", + ), + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_proto_keyword_as_service_name_and_rpc(): + # Note: this is consistent with protoc. + proto = """ + |service syntax { + | rpc syntax (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service import { + | rpc import (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service package { + | rpc package (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service option { + | rpc option (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service reserved { + | rpc reserved (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service message { + | rpc message (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service enum { + | rpc enum (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service service { + | rpc service (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service extend { + | rpc extend (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service rpc { + | rpc rpc (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service oneof { + | rpc oneof (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |service extensions { + | rpc extensions (google.protobuf.StringValue) returns (google.protobuf.StringValue); + |} + |""" + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + services=[ + ServiceElement( + location=location.at(1, 1), + name="syntax", + rpcs=[ + RpcElement( + location.at(2, 3), + name="syntax", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + ) + ] + ), + ServiceElement( + location=location.at(4, 1), + name="import", + rpcs=[ + RpcElement( + location.at(5, 3), + name="import", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(7, 1), + name="package", + rpcs=[ + RpcElement( + location.at(8, 3), + name="package", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(10, 1), + name="option", + rpcs=[ + RpcElement( + location.at(11, 3), + name="option", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(13, 1), + name="reserved", + rpcs=[ + RpcElement( + location.at(14, 3), + name="reserved", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(16, 1), + name="message", + rpcs=[ + RpcElement( + location.at(17, 3), + name="message", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(19, 1), + name="enum", + rpcs=[ + RpcElement( + location.at(20, 3), + name="enum", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(22, 1), + name="service", + rpcs=[ + RpcElement( + location.at(23, 3), + name="service", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(25, 1), + name="extend", + rpcs=[ + RpcElement( + location.at(26, 3), + name="extend", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(28, 1), + name="rpc", + rpcs=[ + RpcElement( + location.at(29, 3), + name="rpc", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue", + documentation="" + ) + ] + ), + ServiceElement( + location=location.at(31, 1), + name="oneof", + rpcs=[ + RpcElement( + location.at(32, 3), + name="oneof", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue" + ) + ] + ), + ServiceElement( + location=location.at(34, 1), + name="extensions", + rpcs=[ + RpcElement( + location.at(35, 3), + name="extensions", + request_type="google.protobuf.StringValue", + response_type="google.protobuf.StringValue" + ) + ] + ), + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_forbid_multiple_syntax_definitions(): + proto = """ + | syntax = "proto2"; + | syntax = "proto2"; + """ + proto = trim_margin(proto) + with pytest.raises(IllegalStateException, match="Syntax error in file.proto:2:3: too many syntax definitions"): + # TODO: this test in Kotlin source contains "2:13:" Need compile square.wire and check how it can be? + ProtoParser.parse(location, proto) + pytest.fail("") + + +def test_one_of_options(): + proto = """ + |message SearchRequest { + | required string query = 1; + | oneof page_info { + | option (my_option) = true; + | int32 page_number = 2; + | int32 result_per_page = 3; + | } + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + types=[ + MessageElement( + location=location.at(1, 1), + name="SearchRequest", + fields=[ + FieldElement( + location=location.at(2, 3), label=Field.Label.REQUIRED, element_type="string", name="query", tag=1 + ) + ], + one_ofs=[ + OneOfElement( + name="page_info", + fields=[ + FieldElement(location=location.at(5, 5), element_type="int32", name="page_number", tag=2), + FieldElement(location=location.at(6, 5), element_type="int32", name="result_per_page", tag=3) + ], + options=[ + OptionElement("my_option", OptionElement.Kind.BOOLEAN, value="true", is_parenthesized=True) + ] + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected + + +def test_semi_colon_as_options_delimiters(): + proto = """ + |service MyService { + | option (custom_rule) = { + | my_string: "abc"; my_int: 3; + | my_list: ["a", "b", "c"]; + | }; + |} + """ + proto = trim_margin(proto) + expected = ProtoFileElement( + location=location, + services=[ + ServiceElement( + location=location.at(1, 1), + name="MyService", + options=[ + OptionElement( + "custom_rule", + OptionElement.Kind.MAP, { + "my_string": "abc", + "my_int": "3", + "my_list": ["a", "b", "c"] + }, + is_parenthesized=True + ) + ] + ) + ] + ) + assert ProtoParser.parse(location, proto) == expected From 9ff458024c7e982042585704d5ac37e0305ed377 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 4 Jun 2021 02:17:16 +0300 Subject: [PATCH 020/168] Merge with master of aiven/karapace to our repository (#8) * tests/test_schema.py: splitting test_schema() Split test_schema() to multiple single-purpose tests No essential functional changes in the tests * Added information how to run integration tests against Confluence stack Instructions in README.rst Docker Compose file to start the Confluence stack * Kafka REST fixed version to 6.1.1 to match Schema Registry * README.rst: clarified compatibility Changed the claim that Karapace is compatible to that aims to be compatible with 6.1.1 and added a list of known incompabilities. * Configuration Keys as table * fixed content table * Fixed small spelling bugs * test_schema.py removed assert_schema_versions from test_schema_repost, unrelated * test_schema.py added -> None to all test method signatures. * test_schema.py: added annotations to all functions * test_schema.py duplicate code removal * test_schema.py moved a comment to a an assert message * test_schema.py removed unneeded f-string wrappings * utils.py AVRO name compatible (http://avro.apache.org/docs/current/spec.html#names). Must not have '-'. * test_schema.py test_schema_version_numbering uses 'name' in the Avro to make the schema unique * test_schema.py: str format() -> f-strings * test_schema.py no more JSONs as strings, instead dicts that are dumped as JSON strings * utils.py add create_schema_name_factory, create safer names For example in Avro field names '-' is not allowed. Using underscore instead. * test_schema.py: split test_schema_versions into two tests New ones: test_schema_versions_multiple_subjects_same_schema and test_schema_versions_deleting The tests use unique schema names * test_schema.py: test_schema_remains_constant fixes Wasn't using a unique schema id. Added doc * test_schema.py removed test_enum_schema_compatibility Essentially a duplicate of test_enum_schema * test_schema.py: fix test_schema_repost Compares JSONs now, not strings. * test_schema.py test_compatibility_endpoint fix Now uses a dynamic unique schema name. Was clashing before. Added documentation on what the test does. * test_schema.py test_record_schema_compatibility_backward split into two The new ones: test_record_schema_compatibility_backward and test_record_schema_compatibility_forward * test_schema_version_number_existing_schema takes version ids from response Now compatible with SR * test_schema.py: test_schema_subject_version_schema fix Changed to use a proper Avro schema * test_schema.py: test_schema_same_subject fix No longer expects the exact same string schema to be returned. The str parsed as JSON needs to match. * Handle gracefully if no node is master eligible Karapace configuration allows configuring node to not be eligible for master. Handle gracefully ie. read-only mode if all nodes are configured non-eligible for master. * schema_registry: breaking change in an error message The error message in POST to /subject/ when schema is not specified in the request changed. Fixes test_schema_subject_post_invalid to run in Karapace and against Schema Registry * schema_registry: breaking change in subjects/{}/versions/{} Fixed the error message in subjects/{}/versions/{} to match Schema Registry Now test_schema_subject_invalid_id works against SR * test_schema.py test_version_number_validation fix Error message check matches the error from SR (was breaking the test) Dynamically fetches the version number Added description for the test * Add some typing, rename eligible master flag for clarification * schema_registry: breaking change in POST subjects/{subject}/versions In the case the endpoint is submitted without body, changed the HTTP status code, error_code and message match the ones in Schema Registry. Made the necessary changes so that Karapace also returns correct values. test_schema.py: test_schema_missing_body fixed accordingly. * schema_registry: breaking changes in some HTTP error messages Now HTTP error messages match with the ones coming from Schema Registry. Adjusted test_http_headers in test_schema.py to correctly check the messages. * schema_registry: breaking change in /schemas/ids/<>/versions /schemas/ids//versions now returns empty list in case nothing is found. This is the behaviour of SR. Karapace used to fail in this case before this change. The tests test_schema_lifecycle and test_schema_versions_deleting now works against Schema Registry (in addition to Karapace) * test_schema.py: test_schema_versions_deleting: No unique field Unique field name not needed, schema name is enough. Using a fixed one. * readme: clarified and separated readme moved documentation about development to the CONTRIBUTING.md file, and tried to make the README.rst a bit more concise. * Remove explicit master eligibility flag and utilize optional master_url * CONTRIBUTING.md small fixes Only minor changes, no essential content change: Changed some rst formattings to md Some typos fixed such as karapace -> Karapace A few small tweaks * doc: fixed grammar * KarapaceAll: startup fix When started from KarapaceAll, the __init__ of KarapaceSchemaRegistry is not called. schema_lock is initialized in __init__. Thus it's not called when using KarapaceAll. Fix is to move schema_lock init to _init() which gets called also when using KarapaceAll. * docs: locahost -> localhost Co-authored-by: Juha Mynttinen Co-authored-by: Francesco Co-authored-by: Tommi Vainikainen Co-authored-by: Augusto Hack --- CONTRIBUTING.md | 101 +- README.rst | 450 +++---- karapace/master_coordinator.py | 36 +- karapace/rapu.py | 8 +- karapace/schema_reader.py | 9 +- karapace/schema_registry_apis.py | 46 +- .../integration/confluent-docker-compose.yml | 51 + tests/integration/test_master_coordinator.py | 27 +- tests/integration/test_schema.py | 1035 +++++++++++------ tests/utils.py | 8 +- 10 files changed, 1029 insertions(+), 742 deletions(-) create mode 100644 tests/integration/confluent-docker-compose.yml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9f1be72b6..176e7f107 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,17 +1,102 @@ # Welcome! -Contributions are very welcome on Karapace. Please follow the guidelines: +Contributions are very welcome on Karapace. When contributing please keep this in mind: -- It's recommended to open an issue to discuss a feature before putting in a lot of effort. +- Open an issue to discuss new bigger features. +- Write code consistent with the project style and make sure the tests are passing. +- Stay in touch with us if we have follow up questions or requests for further changes. -- We use [GitHub Flow](https://guides.github.com/introduction/flow/), check that your main branch is up to date, and create a new branch for changes. +# Development -- Commit messages should describe the changes, not the filenames. Win our admiration by following the [excellent advice from Chris Beams](https://chris.beams.io/posts/git-commit/) when composing commit messages. +## Local Environment -- Choose a meaningful title for your pull request. +There is very little you need to get started coding for Karapace: -- The pull request description should focus on what changed and why. +- Use [one of the supported python versions](https://github.com/aiven/karapace/blob/master/setup.py) + documented in the `setup.py` classifiers. +- Create [a virtual environment](https://docs.python.org/3/tutorial/venv.html) and install the dev dependencies in it: -- Check that the tests pass (and add test coverage for your changes if appropriate). +```python +python -m venv +source /bin/activate +pip install -r ./requirements-dev.txt +pip install -e . +``` -- Stay in touch with us if we have follow up questions or requests for further changes. +## Tests + +Tests are written with the [pytest](https://docs.pytest.org/) framework, and All PRs are tested for +each supported Python version using [GitHub Flow](https://guides.github.com/introduction/flow/). + +There are two flavors of tests, unit tests and integration tests: + +- Unit: These are faster and very useful for quick iterations. They are usually testing pure + functions. +- Integration: Are slower but more complete. These tests run Karapace, ZooKeeper, and Kafka servers, + pytest's fixtures are used to start/stop these for you. + +Both flavors run in parallel using [pytest-xdist](https://github.com/pytest-dev/pytest-xdist). New +tests should be engineered with this in mind: + +- Don't reuse schema/subject/topic names +- Expect other clients to be interacting with the servers at the same time. + +To run the tests use `make`. It will download Kafka to be used in the tests for you: + +```sh +make unittest +make integrationtest +``` + +### PyCharm + +If you want to run the tests from within the IDE, first download Kafka using `make fetch-kafka`, and +use the project root as the working directory. + +### Compatibility tests + +The integration tests can be configured to use an external REST (`--rest-url`), Registry +(`--registry-url`) and Kafka (`--kafka-bootstrap-servers`). These can be used to make sure the +tests conform to the Kafka REST or Schema Registry APIs, and then that Karapace conform to the +tests: + +```sh +docker-compose -f ./tests/integration/confluent-docker-compose.yml up -d +pytest --kafka-bootstrap-servers localhost:9092 --registry-url http://localhost:8081 --rest-url http://localhost:8082/ tests/integration +``` + +## Static checking and Linting + +The code is statically checked and formatted using [a few +tools](https://github.com/aiven/karapace/blob/master/requirements-dev.txt). To run these +automatically on each commit please enable the [pre-commit](https://pre-commit.com) hooks. +Alternatively you can run it manually with `make pre-commit`. + +## Manual testing + +To use your development code, you just need to set up a Kafka server and run Karapace from you +virtual environment: + +``` +docker-compose -f ./container/docker-compose.yml up -d kafka +karapace karapace.config.json +``` + +### Configuration + +To see descriptions of configuration keys see our +[README](https://github.com/aiven/karapace#configuration-keys). + +Each configuration key can be overridden with an environment variable prefixed with `KARAPACE_`, +exception being configuration keys that actually start with the `karapace` string. For example, to +override the `bootstrap_uri` config value, one would use the environment variable +`KARAPACE_BOOTSTRAP_URI`. + +# Opening a PR + +- Commit messages should describe the changes, not the filenames. Win our admiration by following + the [excellent advice from Chris Beams](https://chris.beams.io/posts/git-commit/) when composing + commit messages. +- Choose a meaningful title for your pull request. +- The pull request description should focus on what changed and why. +- Check that the tests pass (and add test coverage for your changes if appropriate). diff --git a/README.rst b/README.rst index b083700d4..d541a07bb 100644 --- a/README.rst +++ b/README.rst @@ -3,195 +3,64 @@ Karapace ``karapace`` Your Kafka essentials in one tool +.. image:: https://github.com/aiven/karapace/actions/workflows/tests.yml/badge.svg + +Overview +======== + +Karapace supports the storing of schemas in a central repository, which clients can access to +serialize and deserialize messages. The schemas also maintain their own version histories and can be +checked for compatibility between their different respective versions. + +Karapace rest provides a RESTful interface to your Kafka cluster, allowing you to perform tasks such +as producing and consuming messages and perform administrative cluster work, all the while using the +language of the WEB. + +Karapace is compatible with Schema Registry 6.1.1 on API level. Features ======== -* Schema Registry and Rest Proxy that are 1:1 Compatible with the pre-existing proprietary - Confluent Schema Registry and Kafka Rest Proxy * Drop in replacement both on pre-existing Schema Registry / Kafka Rest Proxy client and server-sides * Moderate memory consumption * Asynchronous architecture based on aiohttp +* Supports Avro and JSON Schema. Protobuf development is tracked with `Issue 67`_. - -Overview -======== - -Karapace supports the storing of schemas in a central repository, which -clients can access to serialize and deserialize messages. The schemas also -maintain their own version histories and can be checked for compatibility -between their different respective versions. - -Karapace rest provides a RESTful interface to your Kafka cluster, allowing you to perform -tasks such as producing and consuming messages and perform administrative cluster work, -all the while using the language of the WEB. +.. _Issue 67: https://github.com/aiven/karapace/issues/67 Setup ===== -Karapace is a Python project, and requires Kafka for its backend storage. There is also a `Docker setup for development`_. - -Requirements ------------- - -Karapace requires Python 3.6 or later and some additional components in -order to operate: - -* aiohttp_ for serving schemas over HTTP in an asynchronous fashion -* avro-python3_ for Avro serialization -* kafka-python_ to read, write and coordinate Karapace's persistence in Kafka -* raven-python_ (optional) to report exceptions to sentry -* aiokafka_ for some components of the rest proxy - -.. _`aiohttp`: https://github.com/aio-libs/aiohttp -.. _`aiokafka`: https://github.com/aio-libs/aiokafka -.. _`avro-python3`: https://github.com/apache/avro -.. _`kafka-python`: https://github.com/dpkp/kafka-python -.. _`raven-python`: https://github.com/getsentry/raven-python - -Developing and testing Karapace also requires the following utilities: -requests_, flake8_, pylint_ and pytest_. - -.. _`flake8`: https://flake8.readthedocs.io/ -.. _`requests`: http://www.python-requests.org/en/latest/ -.. _`pylint`: https://www.pylint.org/ -.. _`pytest`: http://pytest.org/ - -Karapace has been developed and tested on modern Linux x86-64 systems, but -should work on other platforms that provide the required modules. - - -Building --------- - -To build an installation package for your distribution, go to the root -directory of a Karapace Git checkout and run:: - - python3 setup.py bdist_egg - -This will produce an egg file into a dist directory within the same folder. - -Installation +Using Docker ------------ -Python/Other:: - - easy_install dist/karapace-0.1.0-py3.6.egg - -On Linux systems it is recommended to simply run ``karapace`` under -``systemd``:: +To get you up and running with the latest release of Karapace, a docker setup is available:: - systemctl enable karapace.service + docker-compose -f ./container/docker-compose.yml up -d -and eventually after the setup section, you can just run:: +Then you should be able to reach two sets of endpoints: - systemctl start karapace.service +* Karapace schema registry on http://localhost:8081 +* Karapace REST on http://localhost:8082 Configuration -------------- - -After this you need to create a suitable JSON configuration file for your -installation. Keys to take special care are the ones needed to configure -Kafka and advertised_hostname. - -Each configuration key can be overridden with an environment variable prefixed with -``KARAPACE_``, exception being configuration keys that actually start with the ``karapace`` string. -For example, to override the ``bootstrap_uri`` config value, one would use the environment variable -``KARAPACE_BOOTSTRAP_URI`` - - -To see descriptions of configuration keys see section ``config``. Here's an -example configuration file to give you an idea what you need to change:: - - { - "advertised_hostname": "localhost", - "bootstrap_uri": "127.0.0.1:9092", - "client_id": "sr-1", - "compatibility": "FULL", - "group_id": "schema-registry", - "host": "127.0.0.1", - "log_level": "DEBUG", - "port": 8081, - "master_eligibility": true, - "replication_factor": 1, - "security_protocol": "PLAINTEXT", - "ssl_cafile": null, - "ssl_certfile": null, - "ssl_keyfile": null, - "topic_name": "_schemas" - } - -Local Development ------------------ - -Currently Karapace runs on the Python major versions 3.7, 3.8 and 3.9. You can use any of these for development. -Naturally, independently of Python version you use, the code needs to run on all the supported versions. -The CI pipeline in GitHub actions will run the tests on all these Python versions to ensure this. - -To run Karapace locally, or develop it, first install the dependencies. -If you only need the runtime, i.e. you're not running tests or committing to Git, -it's enough to install the runtime dependencies:: - - # Runtime dependencies - python3 -m pip install -r requirements.txt - -If you are developing and e.g. running tests, install the development dependencies. -This will install also the runtime dependencies:: - - # Development dependencies, contains runtime dependencies - python3 -m pip install -r requirements-dev.txt - -To run the local/current version of the code, set up the configuration file in ``karapace.config.json`` to include connection details for Kafka and any other config you want to change, then run:: - - python3 -m karapace.karapace_all karapace.config.json - -There are two flavors of tests, unit tests and integration tests. The unit tests are standalone, -i.e. can be run without anything outside of the test running. The integration tests in turn need -a running ZooKeeper and Kafka, but take internally care of starting and stopping them. +^^^^^^^^^^^^^ -The tests can be run from the command line using :code:`make`:: +Each configuration key can be overridden with an environment variable prefixed with ``KARAPACE_``, +exception being configuration keys that actually start with the ``karapace`` string. For example, to +override the ``bootstrap_uri`` config value, one would use the environment variable +``KARAPACE_BOOTSTRAP_URI``. Here_ you can find an example configuration file to give you an idea +what you need to change. - # Running unit tests - make unittest +.. _`Here`: https://github.com/aiven/karapace/blob/master/karapace.config.json - # Running integration tests - make integrationtest +Source install +-------------- -To run the tests in an IDE, you need once download and untar Kafka -by :code:`make fetch-kafka`. Additionally ensure that the working directory -when running tests, is set to Git root, e.g. in PyCharm you can -create a configuration template with the correct working directory. +Alternatively you can do a source install using:: -The integration tests are run in parallel e.g. in the CI-pipeline. -The tests need to be engineered taking this in mind. - -There are several coding style checks in `GitHub Actions `_. -Your code changes need to pass these tests. To run the checks locally, -you can run them manually:: - - # Runs all coding style checks - make pre-commit - -Alternatively,you can use `pre-commit `_ to automatically run the checks on commit time:: - - pre-commit install - -Docker setup for development ----------------------------- - -To get you up and running with a development copy of Karapace, a docker setup -is available. You can find everything you need for this in the ``container/`` -folder. - -Get the containers running:: - - docker-compose up - -Then you should be able to reach two sets of endpoints: - -* Karapace schema registry on http://localhost:8081 -* Karapace REST on http://localhost:8082 + python setup.py install Quickstart ========== @@ -303,6 +172,7 @@ Delete consumer:: $ curl -X DELETE -H "Accept: application/vnd.kafka.v2+json" \ http://localhost:8081/consumers/avro_consumers/instances/my_consumer + Backing up your Karapace ======================== @@ -318,7 +188,6 @@ consumer:: ./kafka-console-consumer.sh --bootstrap-server brokerhostname:9092 --topic _schemas --from-beginning --property print.key=true --timeout-ms 1000 1> schemas.log - Restoring Karapace from backup ============================== @@ -333,7 +202,6 @@ You can restore the data from the previous step by running:: ./kafka-console-producer.sh --broker-list brokerhostname:9092 --topic _schemas --property parse.key=true < schemas.log - Performance comparison to Confluent stack ========================================== Latency @@ -392,7 +260,6 @@ Ram consumption, different consumer count, over 300s 20 83 530 =========== =================== ================ - Commands ======== @@ -400,144 +267,121 @@ Once installed, the ``karapace`` program should be in your path. It is the main daemon process that should be run under a service manager such as ``systemd`` to serve clients. - Configuration keys ================== -``advertised_hostname`` (default ``socket.gethostname()``) - -The hostname being advertised to other instances of Karapace that are -attached to the same Kafka group. All nodes within the cluster need to have -their advertised_hostname's set so that they can all reach each other. - -``bootstrap_uri`` (default ``localhost:9092``) - -The URI to the Kafka service where to store the schemas and to run -coordination among the Karapace instances. - -``client_id`` (default ``sr-1``) - -The client_id name by which the Karapace will use when coordinating with -other Karapaces who is master. The one with the name that sorts as the -first alphabetically is chosen as master from among the services with -master_eligibility set to true. - -``consumer_enable_autocommit`` (default ``True``) - -Enable auto commit on rest proxy consumers - -``consumer_request_timeout_ms`` (default ``11000``) - -Rest proxy consumers timeout for reads that do not limit the max bytes or provide their own timeout - -``consumer_request_max_bytes`` (default ``67108864``) - -Rest proxy consumers maximum bytes to be fetched per request - -``fetch_min_bytes`` (default ``-1``) - -Rest proxy consumers minimum bytes to be fetched per request. -1 means no limit - -``group_id`` (default ``schema-registry``) - -The Kafka group name used for selecting a master service to coordinate the -storing of Schemas. - -``master_eligibility`` (``true``) - -Should the service instance be considered for promotion to be the master -service. Reason to turn this off would be to have an instances of Karapace -running somewhere else for HA purposes but which you wouldn't want to -automatically promote to master if the primary instances were to become -unavailable. - -``producer_compression_type`` (default ``None``) - -Type of compression to be used by rest proxy producers - -``producer_acks`` (default ``1``) - -Level of consistency desired by each producer message sent on the rest proxy -More on https://kafka.apache.org/10/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html - -``producer_linger_ms`` (default ``0``) - -Time to wait for grouping together requests -More on https://kafka.apache.org/10/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html - -``security_protocol`` (default ``PLAINTEXT``) - -Default Kafka security protocol needed to communicate with the Kafka -cluster. Other options is to use SSL for SSL client certificate -authentication. - -``sentry`` (default ``None``) - -Used to configure parameters for sentry integration (dsn, tags, ...). Setting the -environment variable ``SENTRY_DSN`` will also enable sentry integration. - -``ssl_cafile`` (default ``Path to CA certificate``) - -Used when security_protocol is set to SSL, the path to the SSL CA certificate. - -``ssl_certfile`` (default ``/path/to/certfile``) - -Used when security_protocol is set to SSL, the path to the SSL certfile. - -``ssl_keyfile`` (default ``/path/to/keyfile``) - -Used when security_protocol is set to SSL, the path to the SSL keyfile. - -``topic_name`` (default ``_schemas``) - -The name of the Kafka topic where to store the schemas. - -``replication_factor`` (default ``1``) - -The replication factor to be used with the schema topic. - -``host`` (default ``"127.0.0.1"``) - -Address to bind the Karapace HTTP server to. Set to an empty string to -listen to all available addresses. - -``registry_host`` (default ``"127.0.0.1"``) - -Kafka Registry host, used by Kafka Rest for avro related requests. -If running both in the same process, it should be left to its default value - -``port`` (default ``8081``) - -HTTP webserver port to bind the Karapace to. - -``registry_port`` (default ``8081``) - -Kafka Registry port, used by Kafka Rest for avro related requests. -If running both in the same process, it should be left to its default value - -``metadata_max_age_ms`` (default ``60000``) - -Preiod of time in milliseconds after Kafka metadata is force refreshed. - -``karapace_rest`` (default ``true``) - -If the rest part of the app should be included in the starting process -At least one of this and karapace_registry options need to be enabled in order -for the service to start - -``karapace_registry`` (default ``true``) - -If the registry part of the app should be included in the starting process -At least one of this and karapace_registry options need to be enabled in order -for the service to start - -``name_strategy`` (default ``subject_name``) - -Name strategy to use when storing schemas from the kafka rest proxy service - -``master_election_strategy`` (default ``lowest``) - -Decides on what basis the karapace cluster master is chosen (only relevant in a multi node setup) +Keys to take special care are the ones needed to configure Kafka and advertised_hostname. + +.. list-table:: + :header-rows: 1 + + * - Parameter + - Default Value + - Description + * - ``advertised_hostname`` + - ``socket.gethostname()`` + - The hostname being advertised to other instances of Karapace that are attached to the same Kafka group. All nodes within the cluster need to have their ``advertised_hostname``'s set so that they can all reach each other. + * - ``bootstrap_uri`` + - ``localhost:9092`` + - The URI to the Kafka service where to store the schemas and to run + coordination among the Karapace instances. + * - ``client_id`` + - ``sr-1`` + - The ``client_id`` name by which the Karapace will use when coordinating with + other Karapaces who is master. The one with the name that sorts as the + first alphabetically is chosen as master from among the services with + master_eligibility set to true. + * - ``consumer_enable_autocommit`` + - ``True`` + - Enable auto commit on rest proxy consumers + * - ``consumer_request_timeout_ms`` + - ``11000`` + - Rest proxy consumers timeout for reads that do not limit the max bytes or provide their own timeout + * - ``consumer_request_max_bytes`` + - ``67108864`` + - Rest proxy consumers maximum bytes to be fetched per request + * - ``fetch_min_bytes`` + - ``-1`` + - Rest proxy consumers minimum bytes to be fetched per request. ``-1`` means no limit + * - ``group_id`` + - ``schema-registry`` + - The Kafka group name used for selecting a master service to coordinate the storing of Schemas. + * - ``master_eligibility`` + - ``true`` + - Should the service instance be considered for promotion to be the master + service. Reason to turn this off would be to have an instances of Karapace + running somewhere else for HA purposes but which you wouldn't want to + automatically promote to master if the primary instances were to become + unavailable. + * - ``producer_compression_type`` + - ``None`` + - Type of compression to be used by rest proxy producers + * - ``producer_acks`` + - ``1`` + - Level of consistency desired by each producer message sent on the rest proxy. + More on `Kafka Producer `_ + * - ``producer_linger_ms`` + - ``0`` + - Time to wait for grouping together requests. + More on `Kafka Producer `_ + * - ``security_protocol`` + - ``PLAINTEXT`` + - Default Kafka security protocol needed to communicate with the Kafka + cluster. Other options is to use SSL for SSL client certificate + authentication. + * - ``sentry`` + - ``None`` + - Used to configure parameters for sentry integration (dsn, tags, ...). Setting the + environment variable ``SENTRY_DSN`` will also enable sentry integration. + * - ``ssl_cafile`` + - ``/path/to/cafile`` + - Used when ``security_protocol`` is set to SSL, the path to the SSL CA certificate. + * - ``ssl_certfile`` + - ``/path/to/certfile`` + - Used when ``security_protocol`` is set to SSL, the path to the SSL certfile. + * - ``ssl_keyfile`` + - ``/path/to/keyfile`` + - Used when ``security_protocol`` is set to SSL, the path to the SSL keyfile. + * - ``topic_name`` + - ``_schemas`` + - The name of the Kafka topic where to store the schemas. + * - ``replication_factor`` + - ``1`` + - The replication factor to be used with the schema topic. + * - ``host`` + - ``127.0.0.1`` + - Address to bind the Karapace HTTP server to. Set to an empty string to + listen to all available addresses. + * - ``registry_host`` + - ``127.0.0.1`` + - Kafka Registry host, used by Kafka Rest for avro related requests. + If running both in the same process, it should be left to its default value + * - ``port`` + - ``8081`` + - HTTP webserver port to bind the Karapace to. + * - ``registry_port`` + - ``8081`` + - Kafka Registry port, used by Kafka Rest for avro related requests. + If running both in the same process, it should be left to its default value + * - ``metadata_max_age_ms`` + - ``60000`` + - Period of time in milliseconds after Kafka metadata is force refreshed. + * - ``karapace_rest`` + - ``true`` + - If the rest part of the app should be included in the starting process + At least one of this and ``karapace_registry`` options need to be enabled in order + for the service to start + * - ``karapace_registry`` + - ``true`` + - If the registry part of the app should be included in the starting process + At least one of this and ``karapace_rest`` options need to be enabled in order + for the service to start + * - ``name_strategy`` + - ``subject_name`` + - Name strategy to use when storing schemas from the kafka rest proxy service + * - ``master_election_strategy`` + - ``lowest`` + - Decides on what basis the Karapace cluster master is chosen (only relevant in a multi node setup) License ======= @@ -548,7 +392,6 @@ available in the ``LICENSE`` file. Please note that the project explicitly does not require a CLA (Contributor License Agreement) from its contributors. - Contact ======= @@ -557,7 +400,6 @@ and pull requests at https://github.com/aiven/karapace . Any possible vulnerabilities or other serious issues should be reported directly to the maintainers . - Credits ======= diff --git a/karapace/master_coordinator.py b/karapace/master_coordinator.py index a2f993155..8d1b8545d 100644 --- a/karapace/master_coordinator.py +++ b/karapace/master_coordinator.py @@ -11,6 +11,7 @@ from karapace import constants from karapace.utils import KarapaceKafkaClient from threading import Lock, Thread +from typing import Optional, Tuple import json import logging @@ -30,7 +31,7 @@ class SchemaCoordinator(BaseCoordinator): hostname = None port = None scheme = None - master = None + are_we_master = None master_url = None master_eligibility = True log = logging.getLogger("SchemaCoordinator") @@ -49,16 +50,25 @@ def group_protocols(self): def _perform_assignment(self, leader_id, protocol, members): self.log.info("Creating assignment: %r, protocol: %r, members: %r", leader_id, protocol, members) - self.master = None + self.are_we_master = None error = NO_ERROR urls = {} + fallback_urls = {} for member_id, member_data in members: member_identity = json.loads(member_data.decode("utf8")) if member_identity["master_eligibility"] is True: urls[get_identity_url(member_identity["scheme"], member_identity["host"], member_identity["port"])] = (member_id, member_data) - self.master_url = sorted(urls, reverse=self.election_strategy.lower() == "highest")[0] - schema_master_id, member_data = urls[self.master_url] + else: + fallback_urls[get_identity_url(member_identity["scheme"], member_identity["host"], + member_identity["port"])] = (member_id, member_data) + if len(urls) > 0: + chosen_url = sorted(urls, reverse=self.election_strategy.lower() == "highest")[0] + schema_master_id, member_data = urls[chosen_url] + else: + # Protocol guarantees there is at least one member thus if urls is empty, fallback_urls cannot be + chosen_url = sorted(fallback_urls, reverse=self.election_strategy.lower() == "highest")[0] + schema_master_id, member_data = fallback_urls[chosen_url] member_identity = json.loads(member_data.decode("utf8")) identity = self.get_identity( host=member_identity["host"], @@ -66,7 +76,7 @@ def _perform_assignment(self, leader_id, protocol, members): scheme=member_identity["scheme"], json_encode=False, ) - self.log.info("Chose: %r with url: %r as the master", schema_master_id, self.master_url) + self.log.info("Chose: %r with url: %r as the master", schema_master_id, chosen_url) assignments = {} for member_id, member_data in members: @@ -90,12 +100,16 @@ def _on_join_complete(self, generation, member_id, protocol, member_assignment_b host=member_identity["host"], port=member_identity["port"], ) - if member_assignment["master"] == member_id: + # On Kafka protocol we can be assigned to be master, but if not master eligible, then we're not master for real + if member_assignment["master"] == member_id and member_identity["master_eligibility"]: self.master_url = master_url - self.master = True + self.are_we_master = True + elif not member_identity["master_eligibility"]: + self.master_url = None + self.are_we_master = False else: self.master_url = master_url - self.master = False + self.are_we_master = False return super(SchemaCoordinator, self)._on_join_complete(generation, member_id, protocol, member_assignment_bytes) def _on_join_follower(self): @@ -157,10 +171,10 @@ def init_schema_coordinator(self): self.sc.master_eligibility = self.config["master_eligibility"] self.lock.release() # self.sc now exists, we get to release the lock - def get_master_info(self): + def get_master_info(self) -> Tuple[bool, Optional[str]]: """Return whether we're the master, and the actual master url that can be used if we're not""" with self.lock: - return self.sc.master, self.sc.master_url + return self.sc.are_we_master, self.sc.master_url def close(self): self.log.info("Closing master_coordinator") @@ -179,7 +193,7 @@ def run(self): self.sc.ensure_active_group() self.sc.poll_heartbeat() - self.log.debug("We're master: %r: master_uri: %r", self.sc.master, self.sc.master_url) + self.log.debug("We're master: %r: master_uri: %r", self.sc.are_we_master, self.sc.master_url) time.sleep(min(_hb_interval, self.sc.time_to_next_heartbeat())) except: # pylint: disable=bare-except self.log.exception("Exception in master_coordinator") diff --git a/karapace/rapu.py b/karapace/rapu.py index e34b0a465..348dabe75 100644 --- a/karapace/rapu.py +++ b/karapace/rapu.py @@ -202,7 +202,7 @@ def check_rest_headers(self, request: HTTPRequest) -> dict: # pylint:disable=in if method in {"POST", "PUT"}: if not content_matcher: http_error( - message=HTTPStatus.UNSUPPORTED_MEDIA_TYPE.description, + message="HTTP 415 Unsupported Media Type", content_type=result["content_type"], code=HTTPStatus.UNSUPPORTED_MEDIA_TYPE, ) @@ -214,7 +214,7 @@ def check_rest_headers(self, request: HTTPRequest) -> dict: # pylint:disable=in return result self.log.error("Not acceptable: %r", request.get_header("accept")) http_error( - message=HTTPStatus.NOT_ACCEPTABLE.description, + message="HTTP 406 Not Acceptable", content_type=result["content_type"], code=HTTPStatus.NOT_ACCEPTABLE, ) @@ -226,7 +226,7 @@ def check_schema_headers(self, request: HTTPRequest): if method in {"POST", "PUT"} and cgi.parse_header(content_type)[0] not in SCHEMA_CONTENT_TYPES: http_error( - message=HTTPStatus.UNSUPPORTED_MEDIA_TYPE.description, + message="HTTP 415 Unsupported Media Type", content_type=response_default_content_type, code=HTTPStatus.UNSUPPORTED_MEDIA_TYPE, ) @@ -238,7 +238,7 @@ def check_schema_headers(self, request: HTTPRequest): if not content_type_match: self.log.debug("Unexpected Accept value: %r", accept_val) http_error( - message=HTTPStatus.NOT_ACCEPTABLE.description, + message="HTTP 406 Not Acceptable", content_type=response_default_content_type, code=HTTPStatus.NOT_ACCEPTABLE, ) diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index f49a4be88..c7645ea31 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -268,11 +268,12 @@ def handle_messages(self): self.ready = True add_offsets = False if self.master_coordinator is not None: - master, _ = self.master_coordinator.get_master_info() - # keep old behavior for True. When master is False, then we are a follower, so we should not accept direct - # writes anyway. When master is None, then this particular node is waiting for a stable value, so any + are_we_master, _ = self.master_coordinator.get_master_info() + # keep old behavior for True. When are_we_master is False, then we are a follower, so we should not accept direct + # writes anyway. When are_we_master is None, then this particular node is waiting for a stable value, so any # messages off the topic are writes performed by another node - if master is True: + # Also if master_elibility is disabled by configuration, disable writes too + if are_we_master is True: add_offsets = True for _, msgs in raw_msgs.items(): diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index c621615c7..6456119b4 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -10,7 +10,7 @@ from karapace.rapu import HTTPRequest from karapace.schema_reader import InvalidSchema, KafkaSchemaReader, SchemaType, TypedSchema from karapace.utils import json_encode -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple import argparse import asyncio @@ -21,6 +21,7 @@ @unique class SchemaErrorCodes(Enum): + EMPTY_SCHEMA = 42201 HTTP_NOT_FOUND = HTTPStatus.NOT_FOUND.value HTTP_CONFLICT = HTTPStatus.CONFLICT.value HTTP_UNPROCESSABLE_ENTITY = HTTPStatus.UNPROCESSABLE_ENTITY.value @@ -48,7 +49,6 @@ def __init__(self, config_file_path: str, config: dict) -> None: super().__init__(config_file_path=config_file_path, config=config) self._add_routes() self._init(config=config) - self.schema_lock = asyncio.Lock() def _init(self, config: dict) -> None: # pylint: disable=unused-argument self.ksr = None @@ -56,6 +56,7 @@ def _init(self, config: dict) -> None: # pylint: disable=unused-argument self.producer = self._create_producer() self._create_master_coordinator() self._create_schema_reader() + self.schema_lock = asyncio.Lock() def _add_routes(self): self.route( @@ -172,7 +173,7 @@ def _validate_version(self, content_type, version): # pylint: disable=inconsist body={ "error_code": SchemaErrorCodes.INVALID_VERSION_ID.value, "message": ( - "The specified version is not a valid version id. " + f"The specified version '{version}' is not a valid version id. " "Allowed values are between [1, 2^31-1] and the string \"latest\"" ), }, @@ -361,17 +362,6 @@ async def schemas_get_versions(self, content_type, *, schema_id): for version, schema in schemas.items(): if int(schema["id"]) == schema_id_int and not schema["deleted"]: subject_versions.append({"subject": subject, "version": int(version)}) - - if not subject_versions: - self.r( - body={ - "error_code": SchemaErrorCodes.HTTP_NOT_FOUND.value, - "message": "HTTP 404 Not Found", - }, - content_type=content_type, - status=HTTPStatus.NOT_FOUND, - ) - subject_versions = sorted(subject_versions, key=lambda s: (s["subject"], s["version"])) self.r(subject_versions, content_type) @@ -401,7 +391,7 @@ async def config_set(self, content_type, *, request): are_we_master, master_url = await self.get_master() if are_we_master: self.send_config_message(compatibility_level=compatibility_level, subject=None) - elif are_we_master is None: + elif not master_url: self.no_master_error(content_type) else: url = f"{master_url}/config" @@ -452,7 +442,7 @@ async def config_subject_set(self, content_type, *, request, subject): are_we_master, master_url = await self.get_master() if are_we_master: self.send_config_message(compatibility_level=compatibility_level, subject=subject) - elif are_we_master is None: + elif not master_url: self.no_master_error(content_type) else: url = f"{master_url}/config/{subject}" @@ -499,7 +489,7 @@ async def subject_delete(self, content_type, *, subject, request: HTTPRequest): if are_we_master: async with self.schema_lock: await self._subject_delete_local(content_type, subject, permanent) - elif are_we_master is None: + elif not master_url: self.no_master_error(content_type) else: url = f"{master_url}/subjects/{subject}?permanent={permanent}" @@ -603,7 +593,7 @@ async def subject_version_delete(self, content_type, *, subject, version, reques if are_we_master: async with self.schema_lock: await self._subject_version_delete_local(content_type, subject, version, permanent) - elif are_we_master is None: + elif not master_url: self.no_master_error(content_type) else: url = f"{master_url}/subjects/{subject}/versions/{version}?permanent={permanent}" @@ -633,16 +623,16 @@ async def subject_versions_list(self, content_type, *, subject): subject_data = self._subject_get(subject, content_type) self.r(list(subject_data["schemas"]), content_type, status=HTTPStatus.OK) - async def get_master(self): + async def get_master(self) -> Tuple[bool, Optional[str]]: async with self.master_lock: while True: - master, master_url = self.mc.get_master_info() - if master is None: - self.log.info("No master set: %r, url: %r", master, master_url) + are_we_master, master_url = self.mc.get_master_info() + if are_we_master is None: + self.log.info("No master set: %r, url: %r", are_we_master, master_url) elif self.ksr.ready is False: self.log.info("Schema reader isn't ready yet: %r", self.ksr.ready) else: - return master, master_url + return are_we_master, master_url await asyncio.sleep(1.0) def _validate_schema_request_body(self, content_type, body) -> None: @@ -682,11 +672,11 @@ def _validate_schema_key(self, content_type, body) -> None: if "schema" not in body: self.r( body={ - "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, - "message": "Internal Server Error", + "error_code": SchemaErrorCodes.EMPTY_SCHEMA.value, + "message": "Empty schema", }, content_type=content_type, - status=HTTPStatus.INTERNAL_SERVER_ERROR, + status=HTTPStatus.UNPROCESSABLE_ENTITY, ) async def subjects_schema_post(self, content_type, *, subject, request): @@ -698,7 +688,7 @@ async def subjects_schema_post(self, content_type, *, subject, request): self.r( body={ "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, - "message": "Internal Server Error", + "message": f"Error while looking up schema under subject {subject}", }, content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, @@ -750,7 +740,7 @@ async def subject_post(self, content_type, *, subject, request): if are_we_master: async with self.schema_lock: await self.write_new_schema_local(subject, body, content_type) - elif are_we_master is None: + elif not master_url: self.no_master_error(content_type) else: url = f"{master_url}/subjects/{subject}/versions" diff --git a/tests/integration/confluent-docker-compose.yml b/tests/integration/confluent-docker-compose.yml new file mode 100644 index 000000000..c755bb03f --- /dev/null +++ b/tests/integration/confluent-docker-compose.yml @@ -0,0 +1,51 @@ +--- +version: '2' +services: + zookeeper: + image: confluentinc/cp-zookeeper:latest + hostname: zookeeper + ports: + - "2181:2181" + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + + kafka: + image: confluentinc/cp-kafka:latest + hostname: kafka + depends_on: + - zookeeper + ports: + - "9092:9092" + - "9101:9101" + environment: + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_CONFLUENT_SCHEMA_REGISTRY_URL: http://schema-registry:8081 + + schema-registry: + image: confluentinc/cp-schema-registry:6.1.1 + + hostname: schema-registry + depends_on: + - kafka + ports: + - "8081:8081" + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'PLAINTEXT://kafka:29092' + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081 + + rest: + image: confluentinc/cp-kafka-rest:6.1.1 + depends_on: + - kafka + ports: + - "8082:8082" + environment: + KAFKA_REST_HOST_NAME: confluent-rest + KAFKA_REST_BOOTSTRAP_SERVERS: 'kafka:29092' + KAFKA_REST_LISTENERS: "http://rest:8082" + KAFKA_REST_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081' diff --git a/tests/integration/test_master_coordinator.py b/tests/integration/test_master_coordinator.py index a70dcaa7e..6d95a972d 100644 --- a/tests/integration/test_master_coordinator.py +++ b/tests/integration/test_master_coordinator.py @@ -32,12 +32,12 @@ def is_master(mc: MasterCoordinator) -> bool: This takes care of a race condition were the flag `master` is set but `master_url` is not yet set. """ - return bool(mc.sc and mc.sc.master and mc.sc.master_url) + return bool(mc.sc and mc.sc.are_we_master and mc.sc.master_url) def has_master(mc: MasterCoordinator) -> bool: """ True if `mc` has a master. """ - return bool(mc.sc and not mc.sc.master and mc.sc.master_url) + return bool(mc.sc and not mc.sc.are_we_master and mc.sc.master_url) @pytest.mark.timeout(60) # Github workflows need a bit of extra time @@ -91,6 +91,29 @@ def test_master_selection(kafka_servers: KafkaServers, strategy: str) -> None: assert slave.sc.master_url == master_url +def test_no_eligible_master(kafka_servers: KafkaServers) -> None: + client_id = new_random_name("master_selection_") + group_id = new_random_name("group_id") + + config_aa = set_config_defaults({ + "advertised_hostname": "127.0.0.1", + "bootstrap_uri": kafka_servers.bootstrap_servers, + "client_id": client_id, + "group_id": group_id, + "port": get_random_port(port_range=TESTS_PORT_RANGE, blacklist=[]), + "master_eligibility": False, + }) + + with closing(init_admin(config_aa)) as mc: + # Wait for the election to happen, ie. flag is not None + while not mc.sc or mc.sc.are_we_master is None: + time.sleep(0.3) + + # Make sure the end configuration is as expected + assert mc.sc.are_we_master is False + assert mc.sc.master_url is None + + async def test_schema_request_forwarding(registry_async_pair): master_url, slave_url = registry_async_pair max_tries, counter = 5, 0 diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index e5639b757..1406fe94a 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -7,9 +7,10 @@ from http import HTTPStatus from kafka import KafkaProducer from karapace.rapu import is_success +from karapace.schema_registry_apis import KarapaceSchemaRegistry from karapace.utils import Client from tests.utils import ( - create_field_name_factory, create_subject_name_factory, new_random_name, repeat_until_successful_request + create_field_name_factory, create_schema_name_factory, create_subject_name_factory, repeat_until_successful_request ) from typing import List, Tuple @@ -22,86 +23,7 @@ @pytest.mark.parametrize("trail", ["", "/"]) -@pytest.mark.parametrize("compatibility", ["FORWARD", "BACKWARD", "FULL"]) -async def test_enum_schema_compatibility(registry_async_client, compatibility, trail): - subject = create_subject_name_factory(f"test_enum_schema_compatibility-{trail}")() - - res = await registry_async_client.put(f"config{trail}", json={"compatibility": compatibility}) - assert res.status == 200 - schema = { - "type": "record", - "name": "myenumtest", - "fields": [{ - "type": { - "type": "enum", - "name": "enumtest", - "symbols": ["first", "second"], - }, - "name": "faa", - }] - } - res = await registry_async_client.post( - f"subjects/{subject}/versions{trail}", - json={"schema": jsonlib.dumps(schema)}, - ) - assert res.status == 200 - assert "id" in res.json() - schema_id = res.json()["id"] - schema = { - "type": "record", - "name": "myenumtest", - "fields": [{ - "type": { - "type": "enum", - "name": "enumtest", - "symbols": ["first", "second", "third"], - }, - "name": "faa", - }] - } - res = await registry_async_client.post( - f"subjects/{subject}/versions{trail}", - json={"schema": jsonlib.dumps(schema)}, - ) - assert res.status == 200 - assert "id" in res.json() - schema_id2 = res.json()["id"] - assert schema_id != schema_id2 - - schema = { - "type": "record", - "name": "myenumtest", - "fields": [{ - "type": { - "type": "enum", - "name": "enumtest", - "symbols": ["second"], - }, - "name": "faa", - }] - } - res = await registry_async_client.post( - f"subjects/{subject}/versions{trail}", - json={"schema": jsonlib.dumps(schema)}, - ) - assert res.status == 200 - assert "id" in res.json() - schema_id3 = res.json()["id"] - assert schema_id3 != schema_id2 - - res = await registry_async_client.get(f"schemas/ids/{schema_id3}{trail}") - assert res.status_code == 200 - res = jsonlib.loads(res.json()["schema"]) - assert res["type"] == "record" - assert res["name"] == "myenumtest" - assert res["fields"][0]["name"] == "faa" - assert res["fields"][0]["type"]["type"] == "enum" - assert res["fields"][0]["type"]["name"] == "enumtest" - assert res["fields"][0]["type"]["symbols"] == ["second"] - - -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_union_to_union(registry_async_client, trail): +async def test_union_to_union(registry_async_client: Client, trail: str) -> None: subject_name_factory = create_subject_name_factory(f"test_union_to_union-{trail}") subject_1 = subject_name_factory() @@ -155,7 +77,7 @@ async def test_union_to_union(registry_async_client, trail): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_missing_subject_compatibility(registry_async_client, trail): +async def test_missing_subject_compatibility(registry_async_client: Client, trail: str) -> None: subject = create_subject_name_factory(f"test_missing_subject_compatibility-{trail}")() res = await registry_async_client.post( @@ -173,7 +95,7 @@ async def test_missing_subject_compatibility(registry_async_client, trail): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_record_union_schema_compatibility(registry_async_client, trail): +async def test_record_union_schema_compatibility(registry_async_client: Client, trail: str) -> None: subject = create_subject_name_factory(f"test_record_union_schema_compatibility-{trail}")() res = await registry_async_client.put(f"config/{subject}{trail}", json={"compatibility": "BACKWARD"}) @@ -253,7 +175,7 @@ async def test_record_union_schema_compatibility(registry_async_client, trail): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_record_nested_schema_compatibility(registry_async_client, trail): +async def test_record_nested_schema_compatibility(registry_async_client: Client, trail: str) -> None: subject = create_subject_name_factory(f"test_record_nested_schema_compatibility-{trail}")() res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) @@ -291,22 +213,26 @@ async def test_record_nested_schema_compatibility(registry_async_client, trail): # change string to integer in the nested record, should fail schema["fields"][1]["type"]["fields"][0]["type"] = "int" res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}, ) assert res.status == 409 @pytest.mark.parametrize("trail", ["", "/"]) -async def test_compatibility_endpoint(registry_async_client, trail): +async def test_compatibility_endpoint(registry_async_client: Client, trail: str) -> None: + """ + Creates a subject with a schema. + Calls compatibility/subjects/{subject}/versions/latest endpoint + and checks it return is_compatible true for a compatible new schema + and false for incompatible schema. + """ subject = create_subject_name_factory(f"test_compatibility_endpoint-{trail}")() - - res = await registry_async_client.put(f"config{trail}", json={"compatibility": "BACKWARD"}) - assert res.status == 200 + schema_name = create_schema_name_factory(f"test_compatibility_endpoint_{trail}")() schema = { "type": "record", - "name": "Objct", + "name": schema_name, "fields": [ { "name": "age", @@ -321,9 +247,8 @@ async def test_compatibility_endpoint(registry_async_client, trail): ) assert res.status == 200 - res = await registry_async_client.get("schemas/ids/{}{}".format(res.json()["id"], trail)) - schema_gotten_back = jsonlib.loads(res.json()["schema"]) - assert schema_gotten_back == schema + res = await registry_async_client.put(f"config/{subject}{trail}", json={"compatibility": "BACKWARD"}) + assert res.status == 200 # replace int with long schema["fields"] = [{"type": "long", "name": "age"}] @@ -334,6 +259,7 @@ async def test_compatibility_endpoint(registry_async_client, trail): assert res.status == 200 assert res.json() == {"is_compatible": True} + # replace int with string schema["fields"] = [{"type": "string", "name": "age"}] res = await registry_async_client.post( f"compatibility/subjects/{subject}/versions/latest{trail}", @@ -344,7 +270,7 @@ async def test_compatibility_endpoint(registry_async_client, trail): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_type_compatibility(registry_async_client, trail): +async def test_type_compatibility(registry_async_client: Client, trail: str) -> None: def _test_cases(): # Generate FORWARD, BACKWARD and FULL tests for primitive types _CONVERSIONS = { @@ -425,15 +351,14 @@ def _test_cases(): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_record_schema_compatibility(registry_async_client, trail): - subject_name_factory = create_subject_name_factory(f"test_record_schema_compatibility-{trail}") - subject_1 = subject_name_factory() +async def test_record_schema_compatibility_forward(registry_async_client: Client, trail: str) -> None: + subject_name_factory = create_subject_name_factory(f"test_record_schema_compatibility_forward_{trail}") + subject = subject_name_factory() + schema_name = create_schema_name_factory(f"test_record_schema_compatibility_forward_{trail}")() - res = await registry_async_client.put("config", json={"compatibility": "FORWARD"}) - assert res.status == 200 - schema = { + schema_1 = { "type": "record", - "name": "Objct", + "name": schema_name, "fields": [ { "name": "first_name", @@ -441,18 +366,20 @@ async def test_record_schema_compatibility(registry_async_client, trail): }, ] } - res = await registry_async_client.post( - f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema)}, + f"subjects/{subject}/versions{trail}", + json={"schema": jsonlib.dumps(schema_1)}, ) assert res.status == 200 assert "id" in res.json() schema_id = res.json()["id"] - schema2 = { + res = await registry_async_client.put(f"/config/{subject}{trail}", json={"compatibility": "FORWARD"}) + assert res.status == 200 + + schema_2 = { "type": "record", - "name": "Objct", + "name": schema_name, "fields": [ { "name": "first_name", @@ -469,17 +396,17 @@ async def test_record_schema_compatibility(registry_async_client, trail): ] } res = await registry_async_client.post( - f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema2)}, + f"subjects/{subject}/versions{trail}", + json={"schema": jsonlib.dumps(schema_2)}, ) assert res.status == 200 assert "id" in res.json() schema_id2 = res.json()["id"] assert schema_id != schema_id2 - schema3a = { + schema_3a = { "type": "record", - "name": "Objct", + "name": schema_name, "fields": [ { "name": "last_name", @@ -497,17 +424,17 @@ async def test_record_schema_compatibility(registry_async_client, trail): ] } res = await registry_async_client.post( - f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema3a)}, + f"subjects/{subject}/versions{trail}", + json={"schema": jsonlib.dumps(schema_3a)}, ) # Fails because field removed assert res.status == 409 res_json = res.json() assert res_json["error_code"] == 409 - schema3b = { + schema_3b = { "type": "record", - "name": "Objct", + "name": schema_name, "fields": [ { "name": "first_name", @@ -524,17 +451,53 @@ async def test_record_schema_compatibility(registry_async_client, trail): ] } res = await registry_async_client.post( - f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema3b)}, + f"subjects/{subject}/versions{trail}", + json={"schema": jsonlib.dumps(schema_3b)}, ) # Fails because incompatible type change assert res.status == 409 res_json = res.json() assert res_json["error_code"] == 409 - schema4 = { + schema_4 = { "type": "record", - "name": "Objct", + "name": schema_name, + "fields": [ + { + "name": "first_name", + "type": "string" + }, + { + "name": "last_name", + "type": "string" + }, + { + "name": "third_name", + "type": "string", + "default": "foodefaultvalue" + }, + { + "name": "age", + "type": "int" + }, + ] + } + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", + json={"schema": jsonlib.dumps(schema_4)}, + ) + assert res.status == 200 + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_record_schema_compatibility_backward(registry_async_client: Client, trail: str) -> None: + subject_name_factory = create_subject_name_factory(f"test_record_schema_compatibility_backward_{trail}") + subject_1 = subject_name_factory() + schema_name = create_schema_name_factory(f"test_record_schema_compatibility_backward_{trail}")() + + schema_1 = { + "type": "record", + "name": schema_name, "fields": [ { "name": "first_name", @@ -557,14 +520,17 @@ async def test_record_schema_compatibility(registry_async_client, trail): } res = await registry_async_client.post( f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema4)}, + json={"schema": jsonlib.dumps(schema_1)}, ) assert res.status == 200 - res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) - schema5 = { + res = await registry_async_client.put(f"config/{subject_1}{trail}", json={"compatibility": "BACKWARD"}) + assert res.status == 200 + + # adds fourth_name w/o default, invalid + schema_2 = { "type": "record", - "name": "Objct", + "name": schema_name, "fields": [ { "name": "first_name", @@ -591,39 +557,40 @@ async def test_record_schema_compatibility(registry_async_client, trail): } res = await registry_async_client.post( f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema5)}, + json={"schema": jsonlib.dumps(schema_2)}, ) assert res.status == 409 # Add a default value for the field - schema5["fields"][3] = {"name": "fourth_name", "type": "string", "default": "foof"} + schema_2["fields"][3] = {"name": "fourth_name", "type": "string", "default": "foof"} res = await registry_async_client.post( f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema5)}, + json={"schema": jsonlib.dumps(schema_2)}, ) assert res.status == 200 assert "id" in res.json() # Try to submit schema with a different definition - schema5["fields"][3] = {"name": "fourth_name", "type": "int", "default": 2} + schema_2["fields"][3] = {"name": "fourth_name", "type": "int", "default": 2} res = await registry_async_client.post( f"subjects/{subject_1}/versions{trail}", - json={"schema": jsonlib.dumps(schema5)}, + json={"schema": jsonlib.dumps(schema_2)}, ) assert res.status == 409 subject_2 = subject_name_factory() res = await registry_async_client.put(f"config/{subject_2}{trail}", json={"compatibility": "BACKWARD"}) - schema = {"type": "record", "name": "Object", "fields": [{"name": "first_name", "type": "string"}]} - res = await registry_async_client.post(f"subjects/{subject_2}/versions{trail}", json={"schema": jsonlib.dumps(schema)}) assert res.status == 200 - schema["fields"].append({"name": "last_name", "type": "string"}) - res = await registry_async_client.post(f"subjects/{subject_2}/versions{trail}", json={"schema": jsonlib.dumps(schema)}) + schema_1 = {"type": "record", "name": schema_name, "fields": [{"name": "first_name", "type": "string"}]} + res = await registry_async_client.post(f"subjects/{subject_2}/versions{trail}", json={"schema": jsonlib.dumps(schema_1)}) + assert res.status == 200 + schema_1["fields"].append({"name": "last_name", "type": "string"}) + res = await registry_async_client.post(f"subjects/{subject_2}/versions{trail}", json={"schema": jsonlib.dumps(schema_1)}) assert res.status == 409 @pytest.mark.parametrize("trail", ["", "/"]) -async def test_enum_schema_field_add_compatibility(registry_async_client, trail): +async def test_enum_schema_field_add_compatibility(registry_async_client: Client, trail: str) -> None: subject_name_factory = create_subject_name_factory(f"test_enum_schema_field_add_compatibility-{trail}") expected_results = [("BACKWARD", 200), ("FORWARD", 200), ("FULL", 200)] for compatibility, status_code in expected_results: @@ -641,7 +608,7 @@ async def test_enum_schema_field_add_compatibility(registry_async_client, trail) @pytest.mark.parametrize("trail", ["", "/"]) -async def test_array_schema_field_add_compatibility(registry_async_client, trail): +async def test_array_schema_field_add_compatibility(registry_async_client: Client, trail: str) -> None: subject_name_factory = create_subject_name_factory(f"test_array_schema_field_add_compatibility-{trail}") expected_results = [("BACKWARD", 200), ("FORWARD", 409), ("FULL", 409)] for compatibility, status_code in expected_results: @@ -659,7 +626,7 @@ async def test_array_schema_field_add_compatibility(registry_async_client, trail @pytest.mark.parametrize("trail", ["", "/"]) -async def test_array_nested_record_compatibility(registry_async_client, trail): +async def test_array_nested_record_compatibility(registry_async_client: Client, trail: str) -> None: subject_name_factory = create_subject_name_factory(f"test_array_nested_record_compatibility-{trail}") expected_results = [("BACKWARD", 409), ("FORWARD", 200), ("FULL", 409)] for compatibility, status_code in expected_results: @@ -687,7 +654,7 @@ async def test_array_nested_record_compatibility(registry_async_client, trail): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_record_nested_array_compatibility(registry_async_client, trail): +async def test_record_nested_array_compatibility(registry_async_client: Client, trail: str) -> None: subject_name_factory = create_subject_name_factory(f"test_record_nested_array_compatibility-{trail}") expected_results = [("BACKWARD", 200), ("FORWARD", 409), ("FULL", 409)] for compatibility, status_code in expected_results: @@ -715,8 +682,8 @@ async def test_record_nested_array_compatibility(registry_async_client, trail): async def test_map_schema_field_add_compatibility( - registry_async_client -): # TODO: Rename to pålain check map schema and add additional steps + registry_async_client: Client +) -> None: # TODO: Rename to pålain check map schema and add additional steps subject_name_factory = create_subject_name_factory("test_map_schema_field_add_compatibility") expected_results = [("BACKWARD", 200), ("FORWARD", 409), ("FULL", 409)] for compatibility, status_code in expected_results: @@ -733,7 +700,7 @@ async def test_map_schema_field_add_compatibility( assert res.status == status_code -async def test_enum_schema(registry_async_client): +async def test_enum_schema(registry_async_client: Client) -> None: subject_name_factory = create_subject_name_factory("test_enum_schema") for compatibility in {"BACKWARD", "FORWARD", "FULL"}: subject = subject_name_factory() @@ -790,7 +757,7 @@ async def test_enum_schema(registry_async_client): @pytest.mark.parametrize("compatibility", ["BACKWARD", "FORWARD", "FULL"]) -async def test_fixed_schema(registry_async_client, compatibility): +async def test_fixed_schema(registry_async_client: Client, compatibility: str) -> None: subject_name_factory = create_subject_name_factory(f"test_fixed_schema-{compatibility}") status_code_allowed = 200 status_code_denied = 409 @@ -850,7 +817,7 @@ async def test_fixed_schema(registry_async_client, compatibility): assert res.status == status_code_denied -async def test_primitive_schema(registry_async_client): +async def test_primitive_schema(registry_async_client: Client) -> None: subject_name_factory = create_subject_name_factory("test_primitive_schema") expected_results = [("BACKWARD", 200), ("FORWARD", 200), ("FULL", 200)] for compatibility, status_code in expected_results: @@ -880,7 +847,7 @@ async def test_primitive_schema(registry_async_client): res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}) -async def test_union_comparing_to_other_types(registry_async_client): +async def test_union_comparing_to_other_types(registry_async_client: Client) -> None: subject_name_factory = create_subject_name_factory("test_primitive_schema") expected_results = [("BACKWARD", 409), ("FORWARD", 200), ("FULL", 409)] for compatibility, status_code in expected_results: @@ -927,7 +894,7 @@ async def test_union_comparing_to_other_types(registry_async_client): assert res.status == status_code -async def test_transitive_compatibility(registry_async_client): +async def test_transitive_compatibility(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_transitive_compatibility")() res = await registry_async_client.put(f"config/{subject}", json={"compatibility": "BACKWARD_TRANSITIVE"}) assert res.status == 200 @@ -1019,71 +986,144 @@ async def assert_schema_versions_failed(client: Client, trail: str, schema_id: i assert res.status_code == response_code +async def register_schema(registry_async_client: Client, trail, subject: str, schema_str: str) -> Tuple[int, int]: + # Register to get the id + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", + json={"schema": schema_str}, + ) + assert res.status == 200 + schema_id = res.json()["id"] + + # Get version + res = await registry_async_client.post( + f"subjects/{subject}{trail}", + json={"schema": schema_str}, + ) + assert res.status == 200 + assert res.json()["id"] == schema_id + return schema_id, res.json()["version"] + + @pytest.mark.parametrize("trail", ["", "/"]) -async def test_schema_versions(registry_async_client, trail): +async def test_schema_versions_multiple_subjects_same_schema(registry_async_client: Client, trail: str) -> None: """ Tests case where there are multiple subjects with the same schema. The schema/versions endpoint returns all these subjects. """ - subject_name_factory = create_subject_name_factory(f"test_schema-{trail}") - unique_field_factory = create_field_name_factory(trail) - - schema_str1 = '{"type": "string", "unique" : "%s"}' % unique_field_factory() - schema_str2 = '{"type": "string", "unique" : "%s"}' % unique_field_factory() + subject_name_factory = create_subject_name_factory(f"test_schema_versions_multiple_subjects_same_schema-{trail}") + schema_name_factory = create_schema_name_factory(f"test_schema_versions_multiple_subjects_same_schema_{trail}") - async def register_schema(subject: str, schema_str: str) -> Tuple[int, int]: - # Register to get the id - res = await registry_async_client.post( - f"subjects/{subject}/versions{trail}", - json={"schema": schema_str}, - ) - assert res.status == 200 - schema_id = res.json()["id"] - - # Get version - res = await registry_async_client.post( - f"subjects/{subject}{trail}", - json={"schema": schema_str}, - ) - assert res.status == 200 - assert res.json()["id"] == schema_id - return schema_id, res.json()["version"] + schema_1 = { + "type": "record", + "name": schema_name_factory(), + "fields": [{ + "name": "f1", + "type": "string", + }, { + "name": "f2", + "type": "string", + }] + } + schema_str_1 = jsonlib.dumps(schema_1) + schema_2 = { + "type": "record", + "name": schema_name_factory(), + "fields": [{ + "name": "f1", + "type": "string", + }] + } + schema_str_2 = jsonlib.dumps(schema_2) subject_1 = subject_name_factory() - schema_id_1, version_1 = await register_schema(subject_1, schema_str1) + schema_id_1, version_1 = await register_schema(registry_async_client, trail, subject_1, schema_str_1) schema_1_versions = [(subject_1, version_1)] await assert_schema_versions(registry_async_client, trail, schema_id_1, schema_1_versions) subject_2 = subject_name_factory() - schema_id_2, version_2 = await register_schema(subject_2, schema_str1) + schema_id_2, version_2 = await register_schema(registry_async_client, trail, subject_2, schema_str_1) schema_1_versions = [(subject_1, version_1), (subject_2, version_2)] assert schema_id_1 == schema_id_2 await assert_schema_versions(registry_async_client, trail, schema_id_1, schema_1_versions) subject_3 = subject_name_factory() - schema_id_3, version_3 = await register_schema(subject_3, schema_str1) + schema_id_3, version_3 = await register_schema(registry_async_client, trail, subject_3, schema_str_1) schema_1_versions = [(subject_1, version_1), (subject_2, version_2), (subject_3, version_3)] assert schema_id_1 == schema_id_3 await assert_schema_versions(registry_async_client, trail, schema_id_1, schema_1_versions) - # subject_4 with different schema + # subject_4 with different schema to check there are no side effects subject_4 = subject_name_factory() - schema_id_4, version_4 = await register_schema(subject_4, schema_str2) + schema_id_4, version_4 = await register_schema(registry_async_client, trail, subject_4, schema_str_2) schema_2_versions = [(subject_4, version_4)] assert schema_id_1 != schema_id_4 await assert_schema_versions(registry_async_client, trail, schema_id_1, schema_1_versions) await assert_schema_versions(registry_async_client, trail, schema_id_4, schema_2_versions) - # subject_4 now with the same schema, will have different version - schema_id_5, version_5 = await register_schema(subject_4, schema_str1) - assert schema_id_1 == schema_id_5 - schema_1_versions = [(subject_1, version_1), (subject_2, version_2), (subject_3, version_3), (subject_4, version_5)] + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_versions_deleting(registry_async_client: Client, trail: str) -> None: + """ + Tests getting schema versions when removing a schema version and eventually the subject. + """ + subject = create_subject_name_factory(f"test_schema_versions_deleting_{trail}")() + schema_name = create_schema_name_factory(f"test_schema_versions_deleting_{trail}")() + + schema_1 = { + "type": "record", + "name": schema_name, + "fields": [{ + "name": "field_1", + "type": "string" + }, { + "name": "field_2", + "type": "string" + }] + } + schema_str_1 = jsonlib.dumps(schema_1) + schema_2 = { + "type": "record", + "name": schema_name, + "fields": [ + { + "name": "field_1", + "type": "string" + }, + ] + } + schema_str_2 = jsonlib.dumps(schema_2) + + schema_id_1, version_1 = await register_schema(registry_async_client, trail, subject, schema_str_1) + schema_1_versions = [(subject, version_1)] await assert_schema_versions(registry_async_client, trail, schema_id_1, schema_1_versions) - await assert_schema_versions(registry_async_client, trail, schema_id_4, schema_2_versions) + + res = await registry_async_client.put(f"config/{subject}{trail}", json={"compatibility": "BACKWARD"}) + assert res.status == 200 + + schema_id_2, version_2 = await register_schema(registry_async_client, trail, subject, schema_str_2) + schema_2_versions = [(subject, version_2)] + await assert_schema_versions(registry_async_client, trail, schema_id_2, schema_2_versions) + + # Deleting one version, the other still found + res = await registry_async_client.delete("subjects/{}/versions/{}".format(subject, version_1)) + assert res.status_code == 200 + assert res.json() == version_1 + + await assert_schema_versions(registry_async_client, trail, schema_id_1, []) + await assert_schema_versions(registry_async_client, trail, schema_id_2, schema_2_versions) + + # Deleting the subject, the schema version 2 cannot be found anymore + res = await registry_async_client.delete("subjects/{}".format(subject)) + assert res.status_code == 200 + assert res.json() == [version_2] + + await assert_schema_versions(registry_async_client, trail, schema_id_1, []) + await assert_schema_versions(registry_async_client, trail, schema_id_2, []) @pytest.mark.parametrize("trail", ["", "/"]) -async def test_schema_types(registry_async_client, trail): +async def test_schema_types(registry_async_client: Client, trail: str) -> None: """ Tests for /schemas/types endpoint. """ @@ -1096,9 +1136,15 @@ async def test_schema_types(registry_async_client, trail): @pytest.mark.parametrize("trail", ["", "/"]) -async def test_schema(registry_async_client, trail): - subject = new_random_name("subject") - schema_str = '{"type": "string"}' +async def test_schema_repost(registry_async_client: Client, trail: str) -> None: + """" + Repost same schema again to see that a new id is not generated but an old one is given back + """ + subject = create_subject_name_factory(f"test_schema_repost-{trail}")() + unique_field_factory = create_field_name_factory(trail) + + unique = unique_field_factory() + schema_str = jsonlib.dumps({"type": "string", "unique": unique}) res = await registry_async_client.post( f"subjects/{subject}/versions{trail}", json={"schema": schema_str}, @@ -1106,84 +1152,218 @@ async def test_schema(registry_async_client, trail): assert res.status == 200 assert "id" in res.json() schema_id = res.json()["id"] + res = await registry_async_client.get(f"schemas/ids/{schema_id}{trail}") assert res.status_code == 200 - assert res.json()["schema"] == schema_str + assert jsonlib.loads(res.json()["schema"]) == jsonlib.loads(schema_str) - # repost same schema again to see that a new id is not generated but an old one is given back res = await registry_async_client.post( f"subjects/{subject}/versions{trail}", - json={"schema": '{"type": "string"}'}, + json={"schema": schema_str}, ) assert res.status == 200 assert "id" in res.json() assert schema_id == res.json()["id"] - # Schema missing in the json body + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_missing_body(registry_async_client: Client, trail: str) -> None: + subject = create_subject_name_factory(f"test_schema_missing_body-{trail}")() + res = await registry_async_client.post( f"subjects/{subject}/versions{trail}", json={}, ) - assert res.status == 500 - assert res.json()["error_code"] == 500 - assert res.json()["message"] == "Internal Server Error" + assert res.status == 422 + assert res.json()["error_code"] == 42201 + assert res.json()["message"] == "Empty schema" - # nonexistent schema id + +async def test_schema_non_existing_id(registry_async_client: Client) -> None: + """ + Tests getting a non-existing schema id + """ result = await registry_async_client.get(os.path.join("schemas/ids/123456789")) assert result.json()["error_code"] == 40403 - # invalid schema_id + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_non_invalid_id(registry_async_client: Client, trail: str) -> None: + """ + Tests getting an invalid schema id + """ result = await registry_async_client.get(f"schemas/ids/invalid{trail}") assert result.status == 404 assert result.json()["error_code"] == 404 assert result.json()["message"] == "HTTP 404 Not Found" + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_subject_invalid_id(registry_async_client: Client, trail: str) -> None: + """ + Creates a subject with a schema and trying to find the invalid versions for the subject. + """ + subject = create_subject_name_factory(f"test_schema_subject_invalid_id-{trail}")() + unique_field_factory = create_field_name_factory(trail) + res = await registry_async_client.post( - "subjects/{}/versions".format(subject), json={"schema": "{\"type\": \"string\", \"foo\": \"string\"}"} + f"subjects/{subject}/versions", + json={"schema": "{\"type\": \"string\", \"foo\": \"string\", \"%s\": \"string\"}" % unique_field_factory()} ) assert res.status_code == 200 - assert "id" in res.json() - assert schema_id != res.json()["id"] - # Fetch the schema back to see how it was mangled - result = await registry_async_client.get(os.path.join("schemas/ids/{}".format(res.json()["id"]))) - schema = jsonlib.loads(result.json()["schema"]) - assert schema["type"] == "string" - assert schema["foo"] == "string" + # Find an invalid version 0 + res = await registry_async_client.get(f"subjects/{subject}/versions/0") + assert res.status_code == 422 + assert res.json()["error_code"] == 42202 + assert res.json()["message"] == \ + 'The specified version \'0\' is not a valid version id. '\ + + 'Allowed values are between [1, 2^31-1] and the string "latest"' + + # Find an invalid version (too large) + res = await registry_async_client.get(f"subjects/{subject}/versions/15") + assert res.status_code == 404 + assert res.json()["error_code"] == 40402 + assert res.json()["message"] == "Version 15 not found." + + +async def test_schema_subject_post_invalid(registry_async_client: Client) -> None: + """ + Tests posting to /subjects/{subject} with different invalid values. + """ + subject_name_factory = create_subject_name_factory("test_schema_subject_post_invalid") + + schema_str = jsonlib.dumps({"type": "string"}) + + # Create the subject + subject_1 = subject_name_factory() + res = await registry_async_client.post( + f"subjects/{subject_1}/versions", + json={"schema": schema_str}, + ) + assert res.status == 200 + + res = await registry_async_client.post( + f"subjects/{subject_1}", + json={"schema": jsonlib.dumps({"type": "invalid_type"})}, + ) + assert res.status == 500, "Invalid schema for existing subject should return 500" + assert res.json()["message"] == f"Error while looking up schema under subject {subject_1}" + + # Subject is not found + subject_2 = subject_name_factory() + res = await registry_async_client.post( + f"subjects/{subject_2}", + json={"schema": schema_str}, + ) + assert res.status == 404 + assert res.json()["error_code"] == 40401 + assert res.json()["message"] == f"Subject '{subject_2}' not found." + + # Schema not found for subject + res = await registry_async_client.post( + f"subjects/{subject_1}", + json={"schema": '{"type": "int"}'}, + ) + assert res.status == 404 + assert res.json()["error_code"] == 40403 + assert res.json()["message"] == "Schema not found" + + # Schema not included in the request body + res = await registry_async_client.post(f"subjects/{subject_1}", json={}) + assert res.status == 500 + assert res.json()["error_code"] == 500 + assert res.json()["message"] == f"Error while looking up schema under subject {subject_1}" + + # Schema not included in the request body for subject that does not exist + subject_3 = subject_name_factory() + res = await registry_async_client.post( + f"subjects/{subject_3}", + json={}, + ) + assert res.status == 404 + assert res.json()["error_code"] == 40401 + assert res.json()["message"] == f"Subject '{subject_3}' not found." + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_lifecycle(registry_async_client: Client, trail: str) -> None: + subject = create_subject_name_factory(f"test_schema_lifecycle-{trail}")() + unique_field_factory = create_field_name_factory(trail) + + unique_1 = unique_field_factory() + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schema": jsonlib.dumps({ + "type": "string", + "foo": "string", + unique_1: "string" + })} + ) + assert res.status_code == 200 + schema_id_1 = res.json()["id"] + + unique_2 = unique_field_factory() + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schema": jsonlib.dumps({ + "type": "string", + "foo": "string", + unique_2: "string" + })} + ) + schema_id_2 = res.json()["id"] + assert res.status_code == 200 + assert schema_id_1 != schema_id_2 + + await assert_schema_versions(registry_async_client, trail, schema_id_1, [(subject, 1)]) + await assert_schema_versions(registry_async_client, trail, schema_id_2, [(subject, 2)]) + + result = await registry_async_client.get(os.path.join(f"schemas/ids/{schema_id_1}")) + schema_json_1 = jsonlib.loads(result.json()["schema"]) + assert schema_json_1["type"] == "string" + assert schema_json_1["foo"] == "string" + assert schema_json_1[unique_1] == "string" + + result = await registry_async_client.get(os.path.join(f"schemas/ids/{schema_id_2}")) + schema_json_2 = jsonlib.loads(result.json()["schema"]) + assert schema_json_2["type"] == "string" + assert schema_json_2["foo"] == "string" + assert schema_json_2[unique_2] == "string" res = await registry_async_client.get("subjects") assert res.status_code == 200 assert subject in res.json() - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status_code == 200 assert res.json() == [1, 2] - res = await registry_async_client.get("subjects/{}/versions/1".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions/1") assert res.status_code == 200 assert res.json()["subject"] == subject - assert res.json()["schema"] == schema_str - - # Find an invalid version 0 - res = await registry_async_client.get("subjects/{}/versions/0".format(subject)) - assert res.status_code == 422 - assert res.json()["error_code"] == 42202 - assert res.json()["message"] == \ - 'The specified version is not a valid version id. Allowed values are between [1, 2^31-1] and the string "latest"' - - # Find an invalid version (too large) - res = await registry_async_client.get("subjects/{}/versions/15".format(subject)) - assert res.status_code == 404 - assert res.json()["error_code"] == 40402 - assert res.json()["message"] == "Version 15 not found." + assert jsonlib.loads(res.json()["schema"]) == schema_json_1 # Delete an actual version - res = await registry_async_client.delete("subjects/{}/versions/1".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/1") assert res.status_code == 200 assert res.json() == 1 + # Get the schema by id, still there, wasn't hard-deleted + res = await registry_async_client.get(f"schemas/ids/{schema_id_1}{trail}") + assert res.status_code == 200 + assert jsonlib.loads(res.json()["schema"]) == schema_json_1 + + # Get the schema by id + res = await registry_async_client.get(f"schemas/ids/{schema_id_2}{trail}") + assert res.status_code == 200 + + # Get the versions, old version not found anymore (even if schema itself is) + await assert_schema_versions(registry_async_client, trail, schema_id_1, []) + await assert_schema_versions(registry_async_client, trail, schema_id_2, [(subject, 2)]) + # Delete a whole subject - res = await registry_async_client.delete("subjects/{}".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}") assert res.status_code == 200 assert res.json() == [2] @@ -1194,57 +1374,71 @@ async def test_schema(registry_async_client, trail): # After deleting the last version of a subject, it shouldn't be in the list res = await registry_async_client.post( - "subjects/{}/versions".format(subject), - json={"schema": '{"type": "string"}'}, + f"subjects/{subject}/versions", + json={"schema": '{"type": "string", "unique": "%s"}' % unique_field_factory()}, ) assert res.status == 200 res = await registry_async_client.get("subjects") assert subject in res.json() - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.json() == [3] - res = await registry_async_client.delete("subjects/{}/versions/3".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/3") assert res.status_code == 200 res = await registry_async_client.get("subjects") assert subject not in res.json() - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status_code == 404 assert res.json()["error_code"] == 40401 assert res.json()["message"] == f"Subject '{subject}' not found." - res = await registry_async_client.get("subjects/{}/versions/latest".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions/latest") assert res.status_code == 404 assert res.json()["error_code"] == 40401 assert res.json()["message"] == f"Subject '{subject}' not found." # Creating a new schema works after deleting the only available version + unique_3 = unique_field_factory() res = await registry_async_client.post( - "subjects/{}/versions".format(subject), - json={"schema": '{"type": "string"}'}, + f"subjects/{subject}/versions", + json={"schema": jsonlib.dumps({ + "type": "string", + "foo": "string", + unique_3: "string" + })} ) assert res.status == 200 - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.json() == [4] - # Check version number generation when deleting an entire subjcect - subject = new_random_name("subject") - res = await registry_async_client.put("config/{}".format(subject), json={"compatibility": "NONE"}) - assert res.status == 200 + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_version_numbering(registry_async_client: Client, trail: str) -> None: + """ + Test updating the schema of a subject increases its version number. + Deletes the subjects and asserts that when recreated, has a greater version number. + """ + subject = create_subject_name_factory(f"test_schema_version_numbering-{trail}")() + unique_field_factory = create_field_name_factory(trail) + + unique = unique_field_factory() schema = { "type": "record", - "name": "Object", - "fields": [ - { - "name": "first_name", - "type": "string", - }, - ] + "name": unique, + "fields": [{ + "name": "first_name", + "type": "string", + }], } - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema)}) + res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}) assert res.status == 200 assert "id" in res.json() + + res = await registry_async_client.put(f"config/{subject}", json={"compatibility": "FORWARD"}) + assert res.status == 200 + schema2 = { "type": "record", - "name": "Object", + "name": unique, "fields": [ { "name": "first_name", @@ -1256,42 +1450,68 @@ async def test_schema(registry_async_client, trail): }, ] } - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema2)}) + res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema2)}) assert res.status == 200 assert "id" in res.json() - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status == 200 assert res.json() == [1, 2] - res = await registry_async_client.delete("subjects/{}".format(subject)) - assert res.status == 200 + # Recreate subject - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema)}) - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}") + assert res.status == 200 + res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}) + assert res.status == 200 + res = await registry_async_client.get(f"subjects/{subject}/versions") + assert res.status == 200 assert res.json() == [3] # Version number generation should now begin at 3 - # Check the return format on a more complex schema for version get - subject = new_random_name("subject") + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_version_numbering_complex(registry_async_client: Client, trail: str) -> None: + """ + Tests that when fetching a more complex schema, it matches with the created one. + """ + subject = create_subject_name_factory(f"test_schema_version_numbering_complex-{trail}")() + unique_field_factory = create_field_name_factory(trail) + schema = { "type": "record", - "name": "Objct", + "name": "Object", "fields": [ { "name": "first_name", "type": "string", }, - ] + ], + "unique": unique_field_factory() } res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}, ) - res = await registry_async_client.get("subjects/{}/versions/1".format(subject)) + schema_id = res.json()["id"] + + res = await registry_async_client.get(f"subjects/{subject}/versions/1") assert res.status == 200 assert res.json()["subject"] == subject assert sorted(jsonlib.loads(res.json()["schema"])) == sorted(schema) + await assert_schema_versions(registry_async_client, trail, schema_id, [(subject, 1)]) + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_three_subjects_sharing_schema(registry_async_client: Client, trail: str) -> None: + """" + Submits two subjects with the same schema. + Submits a third subject initially with different schema. Updates to share the schema. + Asserts all three subjects have the same schema. + """ + subject_name_factory = create_subject_name_factory(f"test_schema_XXX-{trail}") + unique_field_factory = create_field_name_factory(trail) + # Submitting the exact same schema for a different subject should return the same schema ID. - subject = new_random_name("subject") + subject_1 = subject_name_factory() schema = { "type": "record", "name": "Object", @@ -1300,121 +1520,138 @@ async def test_schema(registry_async_client, trail): "name": "just_a_value", "type": "string", }, - ] + { + "name": unique_field_factory(), + "type": "string", + }, + ], } - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema)}) + res = await registry_async_client.post(f"subjects/{subject_1}/versions", json={"schema": jsonlib.dumps(schema)}) assert res.status == 200 assert "id" in res.json() - original_schema_id = res.json()["id"] + schema_id_1 = res.json()["id"] + # New subject with the same schema - subject = new_random_name("subject") - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema)}) + subject_2 = subject_name_factory() + res = await registry_async_client.post(f"subjects/{subject_2}/versions", json={"schema": jsonlib.dumps(schema)}) assert res.status == 200 assert "id" in res.json() - new_schema_id = res.json()["id"] - assert original_schema_id == new_schema_id + schema_id_2 = res.json()["id"] + assert schema_id_1 == schema_id_2 # It also works for multiple versions in a single subject - subject = new_random_name("subject") + subject_3 = subject_name_factory() res = await registry_async_client.put( - "config/{}".format(subject), json={"compatibility": "NONE"} + f"config/{subject_3}", json={"compatibility": "NONE"} ) # We don't care about the compatibility in this test res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject_3}/versions", json={"schema": '{"type": "string"}'}, ) assert res.status == 200 res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject_3}/versions", json={"schema": jsonlib.dumps(schema)}, ) assert res.status == 200 - assert res.json()["id"] == new_schema_id # Same ID as in the previous test step + assert res.json()["id"] == schema_id_1 # Same ID as in the previous test step + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_subject_version_schema(registry_async_client: Client, trail: str) -> None: + """ + Tests for the /subjects/(string: subject)/versions/(versionId: version)/schema endpoint. + """ + subject_name_factory = create_subject_name_factory(f"test_schema_subject_version_schema_{trail}") + schema_name = create_schema_name_factory(f"test_schema_subject_version_schema_{trail}")() # The subject version schema endpoint returns the correct results - subject = new_random_name("subject") - schema_str = '{"type": "string"}' + subject_1 = subject_name_factory() + + schema = { + "type": "record", + "name": schema_name, + "fields": [{ + "name": "just_a_value", + "type": "string", + }], + } + schema_str = jsonlib.dumps(schema) + res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject_1}/versions", json={"schema": schema_str}, ) assert res.status == 200 - res = await registry_async_client.get(f"subjects/{subject}/versions/1/schema") + res = await registry_async_client.get(f"subjects/{subject_1}/versions/1/schema") assert res.status == 200 assert res.json() == jsonlib.loads(schema_str) - subject2 = new_random_name("subject") - res = await registry_async_client.get(f"subjects/{subject2}/versions/1/schema") # Invalid subject + + subject_2 = subject_name_factory() + res = await registry_async_client.get(f"subjects/{subject_2}/versions/1/schema") # Invalid subject assert res.status == 404 assert res.json()["error_code"] == 40401 - assert res.json()["message"] == f"Subject '{subject2}' not found." - res = await registry_async_client.get(f"subjects/{subject}/versions/2/schema") + assert res.json()["message"] == f"Subject '{subject_2}' not found." + + res = await registry_async_client.get(f"subjects/{subject_1}/versions/2/schema") assert res.status == 404 assert res.json()["error_code"] == 40402 assert res.json()["message"] == "Version 2 not found." - res = await registry_async_client.get(f"subjects/{subject}/versions/latest/schema") + + res = await registry_async_client.get(f"subjects/{subject_1}/versions/latest/schema") assert res.status == 200 assert res.json() == jsonlib.loads(schema_str) - # The schema check for subject endpoint returns correct results - subject = new_random_name("subject") + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_same_subject(registry_async_client: Client, trail: str) -> None: + """ + The same schema JSON should be returned when checking the same schema str against the same subject + """ + subject_name_factory = create_subject_name_factory(f"test_schema_same_subject_{trail}") + schema_name = create_schema_name_factory(f"test_schema_same_subject_{trail}")() + + schema_str = jsonlib.dumps({ + "type": "record", + "name": schema_name, + "fields": [{ + "name": "f", + "type": "string", + }] + }) + subject = subject_name_factory() res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject}/versions", json={"schema": schema_str}, ) assert res.status == 200 schema_id = res.json()["id"] - # The same ID should be returned when checking the same schema against the same subject res = await registry_async_client.post( f"subjects/{subject}", json={"schema": schema_str}, ) assert res.status == 200 - assert res.json() == {"id": schema_id, "subject": subject, "schema": schema_str, "version": 1} - # Invalid schema should return 500 - res = await registry_async_client.post( - f"subjects/{subject}", - json={"schema": '{"type": "invalid_type"}'}, - ) - assert res.status == 500 - assert res.json()["message"] == f"Error while looking up schema under subject {subject}" - # Subject is not found - subject3 = new_random_name("subject") - res = await registry_async_client.post( - f"subjects/{subject3}", - json={"schema": schema_str}, - ) - assert res.status == 404 - assert res.json()["error_code"] == 40401 - assert res.json()["message"] == f"Subject '{subject3}' not found." - # Schema not found for subject - res = await registry_async_client.post( - f"subjects/{subject}", - json={"schema": '{"type": "int"}'}, - ) - assert res.status == 404 - assert res.json()["error_code"] == 40403 - assert res.json()["message"] == "Schema not found" - # Schema not included in the request body - res = await registry_async_client.post(f"subjects/{subject}", json={}) - assert res.status == 500 - assert res.json()["error_code"] == 500 - assert res.json()["message"] == "Internal Server Error" - # Schema not included in the request body for subject that does not exist - subject4 = new_random_name("subject") - res = await registry_async_client.post( - f"subjects/{subject4}", - json={}, - ) - assert res.status == 404 - assert res.json()["error_code"] == 40401 - assert res.json()["message"] == f"Subject '{subject4}' not found." - # Test that global ID values stay consistent after using pre-existing schema ids - subject = new_random_name("subject") + # Switch the str schema to a dict for comparison + json = res.json() + json["schema"] = jsonlib.loads(json["schema"]) + assert json == {"id": schema_id, "subject": subject, "schema": jsonlib.loads(schema_str), "version": 1} + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_schema_version_number_existing_schema(registry_async_client: Client, trail: str) -> None: + """ + Tests creating the same schemas for two subjects. Asserts the schema ids are the same for both subjects. + """ + subject_name_factory = create_subject_name_factory(f"test_schema_version_number_existing_schema-{trail}") + unique_field_factory = create_field_name_factory(trail) + + subject_1 = subject_name_factory() res = await registry_async_client.put( - "config/{}".format(subject), json={"compatibility": "NONE"} + f"config/{subject_1}", json={"compatibility": "NONE"} ) # We don't care about compatibility - schema = { + unique = unique_field_factory() + schema_1 = { "type": "record", "name": "Object", "fields": [ @@ -1422,9 +1659,13 @@ async def test_schema(registry_async_client, trail): "name": "just_a_value", "type": "string", }, - ] + { + "name": f"{unique}", + "type": "string", + }, + ], } - schema2 = { + schema_2 = { "type": "record", "name": "Object", "fields": [ @@ -1432,9 +1673,13 @@ async def test_schema(registry_async_client, trail): "name": "just_a_value2", "type": "string", }, - ] + { + "name": f"{unique}", + "type": "string", + }, + ], } - schema3 = { + schema_3 = { "type": "record", "name": "Object", "fields": [ @@ -1442,30 +1687,40 @@ async def test_schema(registry_async_client, trail): "name": "just_a_value3", "type": "int", }, - ] + { + "name": f"{unique}", + "type": "string", + }, + ], } - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema)}) + res = await registry_async_client.post(f"subjects/{subject_1}/versions", json={"schema": jsonlib.dumps(schema_1)}) assert res.status == 200 - first_schema_id = res.json()["id"] - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema2)}) + schema_id_1 = res.json()["id"] + + res = await registry_async_client.post(f"subjects/{subject_1}/versions", json={"schema": jsonlib.dumps(schema_2)}) assert res.status == 200 - assert res.json()["id"] == first_schema_id + 1 + schema_id_2 = res.json()["id"] + assert schema_id_2 > schema_id_1 + # Reuse the first schema in another subject - subject = new_random_name("subject") + subject_2 = subject_name_factory() res = await registry_async_client.put( - "config/{}".format(subject), json={"compatibility": "NONE"} + f"config/{subject_2}", json={"compatibility": "NONE"} ) # We don't care about compatibility - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema)}) + res = await registry_async_client.post(f"subjects/{subject_2}/versions", json={"schema": jsonlib.dumps(schema_1)}) assert res.status == 200 - assert res.json()["id"] == first_schema_id + assert res.json()["id"] == schema_id_1 + # Create a new schema - res = await registry_async_client.post("subjects/{}/versions".format(subject), json={"schema": jsonlib.dumps(schema3)}) + res = await registry_async_client.post(f"subjects/{subject_2}/versions", json={"schema": jsonlib.dumps(schema_3)}) assert res.status == 200 - assert res.json()["id"] == first_schema_id + 2 + schema_id_3 = res.json()["id"] + assert res.json()["id"] == schema_id_3 + assert schema_id_3 > schema_id_2 @pytest.mark.parametrize("trail", ["", "/"]) -async def test_config(registry_async_client, trail): +async def test_config(registry_async_client: Client, trail: str) -> None: subject_name_factory = create_subject_name_factory(f"test_config-{trail}") # Tests /config endpoint @@ -1521,17 +1776,17 @@ async def test_config(registry_async_client, trail): assert res.headers["Content-Type"] == "application/vnd.schemaregistry.v1+json" # The subject doesn't exist from the schema point of view - res = await registry_async_client.get("subjects/{}/versions".format(subject_2)) + res = await registry_async_client.get(f"subjects/{subject_2}/versions") assert res.status_code == 404 res = await registry_async_client.post( - "subjects/{}/versions".format(subject_2), + f"subjects/{subject_2}/versions", json={"schema": '{"type": "string"}'}, ) assert res.status_code == 200 assert "id" in res.json() - res = await registry_async_client.get("config/{}".format(subject_2)) + res = await registry_async_client.get(f"config/{subject_2}") assert res.status_code == 200 assert res.json()["compatibilityLevel"] == "FULL" @@ -1545,7 +1800,7 @@ async def test_config(registry_async_client, trail): assert res.json()["compatibilityLevel"] == "NONE" -async def test_http_headers(registry_async_client): +async def test_http_headers(registry_async_client: Client) -> None: res = await registry_async_client.get("subjects", headers={"Accept": "application/json"}) assert res.headers["Content-Type"] == "application/json" @@ -1556,12 +1811,12 @@ async def test_http_headers(registry_async_client): # Giving an invalid Accept value res = await registry_async_client.get("subjects", headers={"Accept": "application/vnd.schemaregistry.v2+json"}) assert res.status == 406 - assert res.json()["message"] == HTTPStatus.NOT_ACCEPTABLE.description + assert res.json()["message"] == "HTTP 406 Not Acceptable" # PUT with an invalid Content type res = await registry_async_client.put("config", json={"compatibility": "NONE"}, headers={"Content-Type": "text/html"}) assert res.status == 415 - assert res.json()["message"] == HTTPStatus.UNSUPPORTED_MEDIA_TYPE.description + assert res.json()["message"] == "HTTP 415 Unsupported Media Type" assert res.headers["Content-Type"] == "application/vnd.schemaregistry.v1+json" # Multiple Accept values @@ -1585,7 +1840,7 @@ async def test_http_headers(registry_async_client): assert res.headers["Content-Type"] == "application/vnd.schemaregistry.v1+json" res = await registry_async_client.get("subjects", headers={"Accept": "text/*"}) assert res.status == 406 - assert res.json()["message"] == HTTPStatus.NOT_ACCEPTABLE.description + assert res.json()["message"] == "HTTP 406 Not Acceptable" # Accept without any type works res = await registry_async_client.get("subjects", headers={"Accept": "*/does_not_matter"}) @@ -1608,7 +1863,6 @@ async def test_http_headers(registry_async_client): assert res.headers["Content-Type"] == "application/vnd.schemaregistry.v1+json" res = await registry_async_client.get("subjects", headers={"Accept": "application/octet-stream"}) assert res.status == 406 - assert res.json()["message"] == HTTPStatus.NOT_ACCEPTABLE.description # Parse Content-Type correctly res = await registry_async_client.put( @@ -1650,7 +1904,7 @@ async def test_http_headers(registry_async_client): assert res.status_code == 404, res.content -async def test_schema_body_validation(registry_async_client): +async def test_schema_body_validation(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_schema_body_validation")() post_endpoints = {f"subjects/{subject}", f"subjects/{subject}/versions"} for endpoint in post_endpoints: @@ -1676,28 +1930,39 @@ async def test_schema_body_validation(registry_async_client): assert res.json()["message"] == "Internal Server Error" -async def test_version_number_validation(registry_async_client): - # Create a schema +async def test_version_number_validation(registry_async_client: Client) -> None: + """ + Creates a subject and schema. Tests that the endpoints + subjects/{subject}/versions/{version} and + subjects/{subject}/versions/{version}/schema + return correct values both with valid and invalid parameters. + """ subject = create_subject_name_factory("test_version_number_validation")() res = await registry_async_client.post( - "subjects/{}/versions".format(subject), + f"subjects/{subject}/versions", json={"schema": '{"type": "string"}'}, ) assert res.status_code == 200 assert "id" in res.json() + res = await registry_async_client.get(f"subjects/{subject}/versions") + assert res.status == 200 + schema_version = res.json()[0] + invalid_schema_version = schema_version - 1 + version_endpoints = {f"subjects/{subject}/versions/$VERSION", f"subjects/{subject}/versions/$VERSION/schema"} for endpoint in version_endpoints: # Valid schema id - res = await registry_async_client.get(endpoint.replace("$VERSION", "1")) + res = await registry_async_client.get(endpoint.replace("$VERSION", str(schema_version))) assert res.status == 200 + # Invalid number - res = await registry_async_client.get(endpoint.replace("$VERSION", "0")) + res = await registry_async_client.get(endpoint.replace("$VERSION", str(invalid_schema_version))) assert res.status == 422 assert res.json()["error_code"] == 42202 assert res.json()[ "message" - ] == "The specified version is not a valid version id. " \ + ] == f"The specified version '{invalid_schema_version}' is not a valid version id. " \ "Allowed values are between [1, 2^31-1] and the string \"latest\"" # Valid latest string res = await registry_async_client.get(endpoint.replace("$VERSION", "latest")) @@ -1708,35 +1973,47 @@ async def test_version_number_validation(registry_async_client): assert res.json()["error_code"] == 42202 assert res.json()[ "message" - ] == "The specified version is not a valid version id. " \ + ] == "The specified version 'invalid' is not a valid version id. " \ "Allowed values are between [1, 2^31-1] and the string \"latest\"" -async def test_common_endpoints(registry_async_client): +async def test_common_endpoints(registry_async_client: Client) -> None: res = await registry_async_client.get("") assert res.status == 200 assert res.json() == {} -async def test_invalid_namespace(registry_async_client): +async def test_invalid_namespace(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_invalid_namespace")() schema = {"type": "record", "name": "foo", "namespace": "foo-bar-baz", "fields": []} res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}) assert res.ok, res.json() -async def test_schema_remains_constant(registry_async_client): +async def test_schema_remains_constant(registry_async_client: Client) -> None: + """ + Creates a subject with schema. Asserts the schema is the same when fetching it using schemas/ids/{schema_id} + """ subject = create_subject_name_factory("test_schema_remains_constant")() - schema = {"type": "record", "name": "foo", "namespace": "foo-bar-baz", "fields": [{"type": "string", "name": "bla"}]} + schema_name = create_schema_name_factory("test_schema_remains_constant")() + schema = { + "type": "record", + "name": schema_name, + "namespace": "foo-bar-baz", + "fields": [{ + "type": "string", + "name": "bla" + }] + } schema_str = jsonlib.dumps(schema) res = await registry_async_client.post(f"subjects/{subject}/versions", json={"schema": schema_str}) assert res.ok, res.json() - scid = res.json()["id"] - res = await registry_async_client.get(f"schemas/ids/{scid}") - assert res.json()["schema"] == schema_str + schema_id = res.json()["id"] + res = await registry_async_client.get(f"schemas/ids/{schema_id}") + assert jsonlib.loads(res.json()["schema"]) == jsonlib.loads(schema_str) -async def test_malformed_kafka_message(registry_async, registry_async_client): +async def test_malformed_kafka_message(registry_async: KarapaceSchemaRegistry, registry_async_client: Client) -> None: topic = registry_async.config["topic_name"] prod = KafkaProducer(bootstrap_servers=registry_async.config["bootstrap_uri"]) @@ -1762,7 +2039,7 @@ async def test_malformed_kafka_message(registry_async, registry_async_client): assert res_data == payload, res_data -async def test_inner_type_compat_failure(registry_async_client): +async def test_inner_type_compat_failure(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_inner_type_compat_failure")() sc = { @@ -1808,7 +2085,7 @@ async def test_inner_type_compat_failure(registry_async_client): assert sc_id != res.json()["id"] -async def test_anon_type_union_failure(registry_async_client): +async def test_anon_type_union_failure(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_anon_type_union_failure")() schema = { "type": "record", @@ -1861,7 +2138,7 @@ async def test_anon_type_union_failure(registry_async_client): @pytest.mark.parametrize("compatibility", ["FULL", "FULL_TRANSITIVE"]) -async def test_full_transitive_failure(registry_async_client, compatibility): +async def test_full_transitive_failure(registry_async_client: Client, compatibility: str) -> None: subject = create_subject_name_factory(f"test_full_transitive_failure-{compatibility}")() init = { @@ -1915,7 +2192,7 @@ async def test_full_transitive_failure(registry_async_client, compatibility): assert res.status == 409 -async def test_invalid_schemas(registry_async_client): +async def test_invalid_schemas(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_invalid_schemas")() repated_field = { @@ -1939,7 +2216,7 @@ async def test_invalid_schemas(registry_async_client): assert not is_success(HTTPStatus(res.status)), "an invalid schema must not be a success" -async def test_schema_hard_delete_version(registry_async_client): +async def test_schema_hard_delete_version(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_schema_hard_delete_version")() res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) assert res.status == 200 @@ -1985,40 +2262,40 @@ async def test_schema_hard_delete_version(registry_async_client): assert schemav1_id != schemav2_id # Cannot directly hard delete schema v1 - res = await registry_async_client.delete("subjects/{}/versions/1?permanent=true".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/1?permanent=true") assert res.status_code == 404 assert res.json()["error_code"] == 40407 assert res.json()["message"] == f"Subject '{subject}' Version 1 was not deleted first before being permanently deleted" # Soft delete schema v1 - res = await registry_async_client.delete("subjects/{}/versions/1".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/1") assert res.status_code == 200 assert res.json() == 1 # Cannot soft delete twice - res = await registry_async_client.delete("subjects/{}/versions/1".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/1") assert res.status_code == 404 assert res.json()["error_code"] == 40406 assert res.json( )["message"] == f"Subject '{subject}' Version 1 was soft deleted.Set permanent=true to delete permanently" - res = await registry_async_client.get("subjects/{}/versions/1".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions/1") assert res.status_code == 404 assert res.json()["error_code"] == 40402 assert res.json()["message"] == "Version 1 not found." # Hard delete schema v1 - res = await registry_async_client.delete("subjects/{}/versions/1?permanent=true".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/1?permanent=true") assert res.status_code == 200 # Cannot hard delete twice - res = await registry_async_client.delete("subjects/{}/versions/1?permanent=true".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}/versions/1?permanent=true") assert res.status_code == 404 assert res.json()["error_code"] == 40402 assert res.json()["message"] == "Version 1 not found." -async def test_schema_hard_delete_whole_schema(registry_async_client): +async def test_schema_hard_delete_whole_schema(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_schema_hard_delete_whole_schema")() res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) assert res.status == 200 @@ -2064,33 +2341,33 @@ async def test_schema_hard_delete_whole_schema(registry_async_client): assert schemav1_id != schemav2_id # Hard delete whole schema cannot be done before soft delete - res = await registry_async_client.delete("subjects/{}?permanent=true".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}?permanent=true") assert res.status_code == 404 assert res.json()["error_code"] == 40405 assert res.json()["message"] == f"Subject '{subject}' was not deleted first before being permanently deleted" # Soft delete whole schema - res = await registry_async_client.delete("subjects/{}".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}") assert res.status_code == 200 assert res.json() == [1, 2] - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status_code == 404 assert res.json()["error_code"] == 40401 assert res.json()["message"] == f"Subject '{subject}' not found." # Hard delete whole schema - res = await registry_async_client.delete("subjects/{}?permanent=true".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}?permanent=true") assert res.status_code == 200 assert res.json() == [1, 2] - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status_code == 404 assert res.json()["error_code"] == 40401 assert res.json()["message"] == f"Subject '{subject}' not found." -async def test_schema_hard_delete_and_recreate(registry_async_client): +async def test_schema_hard_delete_and_recreate(registry_async_client: Client) -> None: subject = create_subject_name_factory("test_schema_hard_delete_and_recreate")() res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) assert res.status == 200 @@ -2115,7 +2392,7 @@ async def test_schema_hard_delete_and_recreate(registry_async_client): schema_id = res.json()["id"] # Soft delete whole schema - res = await registry_async_client.delete("subjects/{}".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}") assert res.status_code == 200 # Recreate with same subject after soft delete @@ -2128,13 +2405,13 @@ async def test_schema_hard_delete_and_recreate(registry_async_client): assert schema_id == res.json()["id"], "the same schema registered, the same identifier" # Soft delete whole schema - res = await registry_async_client.delete("subjects/{}".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}") assert res.status_code == 200 # Hard delete whole schema - res = await registry_async_client.delete("subjects/{}?permanent=true".format(subject)) + res = await registry_async_client.delete(f"subjects/{subject}?permanent=true") assert res.status_code == 200 - res = await registry_async_client.get("subjects/{}/versions".format(subject)) + res = await registry_async_client.get(f"subjects/{subject}/versions") assert res.status_code == 404 assert res.json()["error_code"] == 40401 assert res.json()["message"] == f"Subject '{subject}' not found." diff --git a/tests/utils.py b/tests/utils.py index 28a164001..41fc65671 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -217,11 +217,15 @@ def new_random_name(prefix: str) -> str: def create_subject_name_factory(prefix: str) -> Callable[[], str]: - return create_id_factory(f"subject-{prefix}") + return create_id_factory(f"subject_{prefix}") def create_field_name_factory(prefix: str) -> Callable[[], str]: - return create_id_factory(f"field-{prefix}") + return create_id_factory(f"field_{prefix}") + + +def create_schema_name_factory(prefix: str) -> Callable[[], str]: + return create_id_factory(f"schema_{prefix}") def create_id_factory(prefix: str) -> Callable[[], str]: From 39f5b08cb0b102191a0936d2b3efb16a0e871ec6 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 9 Jun 2021 16:08:32 +0300 Subject: [PATCH 021/168] add next part of unittests for protoparser library --- karapace/protobuf/extensions_element.py | 10 +- karapace/protobuf/field_element.py | 5 +- karapace/protobuf/group_element.py | 4 +- karapace/protobuf/kotlin_wrapper.py | 2 +- karapace/protobuf/one_of_element.py | 1 + karapace/protobuf/option_element.py | 2 +- karapace/protobuf/proto_type.py | 24 +- karapace/protobuf/utils.py | 4 +- tests/unit/test_enum_element.py | 153 ++++++ tests/unit/test_extend_element.py | 142 +++++ tests/unit/test_extensions_element.py | 51 ++ tests/unit/test_field_element.py | 94 ++++ tests/unit/test_message_element.py | 665 ++++++++++++++++++++++++ tests/unit/test_option_element.py | 64 +++ tests/unit/test_parsing_tester.py | 32 ++ tests/unit/test_proto_file_element.py | 166 ++---- tests/unit/test_service_element.py | 224 ++++++++ 17 files changed, 1492 insertions(+), 151 deletions(-) create mode 100644 tests/unit/test_enum_element.py create mode 100644 tests/unit/test_extend_element.py create mode 100644 tests/unit/test_extensions_element.py create mode 100644 tests/unit/test_field_element.py create mode 100644 tests/unit/test_message_element.py create mode 100644 tests/unit/test_option_element.py create mode 100644 tests/unit/test_parsing_tester.py create mode 100644 tests/unit/test_service_element.py diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 77edb5124..56b4e2e5e 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -10,12 +10,14 @@ class ExtensionsElement: location: Location documentation: str = "" """ An [Int] or [IntRange] tag. """ - values: list + values: list = [] - def __init__(self, location: Location, documentation: str, values: list): + def __init__(self, location: Location, documentation: str = None, values: list = None): self.location = location - self.documentation = documentation - self.values = values + if documentation: + self.documentation = documentation + if values: + self.values = values def to_schema(self) -> str: result: list = [] diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 7ab213fde..118550484 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -64,13 +64,14 @@ def options_with_special_values(self) -> list: not options themselves as they're missing from `google.protobuf.FieldOptions`. """ - options = self.options.copy() + options: list = self.options.copy() if self.default_value: proto_type: ProtoType = ProtoType.get2(self.element_type) options.append(OptionElement("default", proto_type.to_kind(), self.default_value, False)) + if self.json_name: - self.options.append(OptionElement("json_name", OptionElement.Kind.STRING, self.json_name, False)) + options.append(OptionElement("json_name", OptionElement.Kind.STRING, self.json_name, False)) return options diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index 5e9e01b94..762d2ec23 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -27,9 +27,9 @@ def __init__( self.location = location self.name = name self.tag = tag - self.documentation = documentation - if self.fields: + if fields: self.fields = fields + self.documentation = documentation def to_schema(self) -> str: result: list = [] diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index fab12f3fc..ef1c4792a 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -21,7 +21,7 @@ def trim_margin(s: str) -> str: if idx < 0: new_lines.append(line) else: - new_lines.append(line[idx + 1:].rstrip()) + new_lines.append(line[idx + 1:]) return "\n".join(new_lines) diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index faf886abf..74f51ddfd 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -27,6 +27,7 @@ def to_schema(self) -> str: append_documentation(result, self.documentation) result.append(f"oneof {self.name} {{") if self.options: + result.append("\n") for option in self.options: append_indented(result, option.to_schema_declaration()) diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 732ee0420..5be5bacd2 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -75,7 +75,7 @@ def append_options(options: list): data.append("[\n") for i in range(0, count): - if i < count: + if i < count - 1: endl = "," else: endl = "" diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 10a8875dc..de0f0c643 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -103,18 +103,18 @@ def to_kind(self) -> OptionElement.Kind: return { "bool": OptionElement.Kind.BOOLEAN, "string": OptionElement.Kind.STRING, - "bytes": OptionElement.Kind.STRING, - "double": OptionElement.Kind.STRING, - "float": OptionElement.Kind.STRING, - "fixed32": OptionElement.Kind.STRING, - "fixed64": OptionElement.Kind.STRING, - "int32": OptionElement.Kind.STRING, - "int64": OptionElement.Kind.STRING, - "sfixed32": OptionElement.Kind.STRING, - "sfixed64": OptionElement.Kind.STRING, - "sint32": OptionElement.Kind.STRING, - "sint64": OptionElement.Kind.STRING, - "uint32": OptionElement.Kind.STRING, + "bytes": OptionElement.Kind.NUMBER, + "double": OptionElement.Kind.NUMBER, + "float": OptionElement.Kind.NUMBER, + "fixed32": OptionElement.Kind.NUMBER, + "fixed64": OptionElement.Kind.NUMBER, + "int32": OptionElement.Kind.NUMBER, + "int64": OptionElement.Kind.NUMBER, + "sfixed32": OptionElement.Kind.NUMBER, + "sfixed64": OptionElement.Kind.NUMBER, + "sint32": OptionElement.Kind.NUMBER, + "sint64": OptionElement.Kind.NUMBER, + "uint32": OptionElement.Kind.NUMBER, "uint64": OptionElement.Kind.NUMBER }.get(self.simple_name, OptionElement.Kind.ENUM) diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 8e6c70564..96adc0928 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -17,7 +17,7 @@ def append_documentation(data: list, documentation: str): lines.pop() for line in lines: - data.append("# ") + data.append("// ") data.append(line) data.append("\n") @@ -32,7 +32,7 @@ def append_options(data: list, options: list): data.append("[\n") for i in range(0, count): - if i < count: + if i < count - 1: endl = "," else: endl = "" diff --git a/tests/unit/test_enum_element.py b/tests/unit/test_enum_element.py new file mode 100644 index 000000000..46a01a8d4 --- /dev/null +++ b/tests/unit/test_enum_element.py @@ -0,0 +1,153 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/EnumElementTest.kt + +import pytest + +from karapace.protobuf.enum_constant_element import EnumConstantElement +from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement + +location: Location = Location.get("file.proto") + + +def test_empty_to_schema(): + element = EnumElement( + location=location, + name="Enum" + ) + expected = "enum Enum {}\n" + assert element.to_schema() == expected + + +def test_simple_to_schema(): + element = EnumElement( + location=location, + name="Enum", + constants=[ + EnumConstantElement(location=location, name="ONE", tag=1), + EnumConstantElement(location=location, name="TWO", tag=2), + EnumConstantElement(location=location, name="SIX", tag=6) + ] + ) + expected = """ + |enum Enum { + | ONE = 1; + | TWO = 2; + | SIX = 6; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_constants(): + one = EnumConstantElement(location=location, name="ONE", tag=1) + two = EnumConstantElement(location=location, name="TWO", tag=2) + six = EnumConstantElement(location=location, name="SIX", tag=6) + element = EnumElement( + location=location, + name="Enum", + constants=[one, two, six] + ) + assert len(element.constants) == 3 + + +def test_simple_with_options_to_schema(): + element = EnumElement( + location=location, + name="Enum", + options=[OptionElement("kit", OptionElement.Kind.STRING, "kat")], + constants=[ + EnumConstantElement(location=location, name="ONE", tag=1), + EnumConstantElement(location=location, name="TWO", tag=2), + EnumConstantElement(location=location, name="SIX", tag=6) + ] + ) + expected = """ + |enum Enum { + | option kit = "kat"; + | ONE = 1; + | TWO = 2; + | SIX = 6; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_options(): + kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") + foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") + element = EnumElement( + location=location, + name="Enum", + options=[kit_kat, foo_bar], + constants=[EnumConstantElement(location=location, name="ONE", tag=1)] + ) + assert len(element.options) == 2 + + +def test_simple_with_documentation_to_schema(): + element = EnumElement( + location=location, + name="Enum", + documentation="Hello", + constants=[ + EnumConstantElement(location=location, name="ONE", tag=1), + EnumConstantElement(location=location, name="TWO", tag=2), + EnumConstantElement(location=location, name="SIX", tag=6) + ] + ) + expected = """ + |// Hello + |enum Enum { + | ONE = 1; + | TWO = 2; + | SIX = 6; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_field_to_schema(): + value = EnumConstantElement(location=location, name="NAME", tag=1) + expected = "NAME = 1;\n" + assert value.to_schema() == expected + + +def test_field_with_documentation_to_schema(): + value = EnumConstantElement( + location=location, + name="NAME", + tag=1, + documentation="Hello" + ) + expected = """ + |// Hello + |NAME = 1; + |""" + expected = trim_margin(expected) + assert value.to_schema() == expected + + +def test_field_with_options_to_schema(): + value = EnumConstantElement( + location=location, + name="NAME", + tag=1, + options=[ + OptionElement("kit", OptionElement.Kind.STRING, "kat", True), + OptionElement("tit", OptionElement.Kind.STRING, "tat") + ] + ) + expected = """ + |NAME = 1 [ + | (kit) = "kat", + | tit = "tat" + |]; + |""" + expected = trim_margin(expected) + assert value.to_schema() == expected diff --git a/tests/unit/test_extend_element.py b/tests/unit/test_extend_element.py new file mode 100644 index 000000000..3a3e08605 --- /dev/null +++ b/tests/unit/test_extend_element.py @@ -0,0 +1,142 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ExtendElementTest.kt + +import pytest + +from karapace.protobuf.extend_element import ExtendElement +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location + +location = Location.get("file.proto") + + +def test_empty_to_schema(): + extend = ExtendElement( + location=location, + name="Name" + ) + expected = "extend Name {}\n" + assert extend.to_schema() == expected + + +def test_simple_to_schema(): + extend = ExtendElement( + location=location, + name="Name", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ] + ) + expected = """ + |extend Name { + | required string name = 1; + |} + |""" + expected = trim_margin(expected) + assert extend.to_schema() == expected + + +def test_add_multiple_fields(): + first_name = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="first_name", + tag=1 + ) + last_name = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="last_name", + tag=2 + ) + extend = ExtendElement( + location=location, + name="Name", + fields=[first_name, last_name] + ) + assert len(extend.fields) == 2 + + +def test_simple_with_documentation_to_schema(): + extend = ExtendElement( + location=location, + name="Name", + documentation="Hello", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ] + ) + expected = """ + |// Hello + |extend Name { + | required string name = 1; + |} + |""" + expected = trim_margin(expected) + assert extend.to_schema() == expected + + +def test_json_name_to_schema(): + extend = ExtendElement( + location=location, + name="Name", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + json_name="my_json", + tag=1 + ) + ] + ) + expected = """ + |extend Name { + | required string name = 1 [json_name = "my_json"]; + |} + |""" + expected = trim_margin(expected) + assert extend.to_schema() == expected + + +def test_default_is_set_in_proto2_file(): + extend = ExtendElement( + location=location, + name="Name", + documentation="Hello", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + default_value="defaultValue" + ) + ] + ) + expected = """ + |// Hello + |extend Name { + | required string name = 1 [default = "defaultValue"]; + |} + |""" + expected = trim_margin(expected) + assert extend.to_schema() == expected diff --git a/tests/unit/test_extensions_element.py b/tests/unit/test_extensions_element.py new file mode 100644 index 000000000..dc1e3e9d1 --- /dev/null +++ b/tests/unit/test_extensions_element.py @@ -0,0 +1,51 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ExtensionsElementTest.kt + +from karapace.protobuf.extensions_element import ExtensionsElement +from karapace.protobuf.kotlin_wrapper import trim_margin, KotlinRange +from karapace.protobuf.location import Location + +from karapace.protobuf.utils import MAX_TAG_VALUE + +location = Location.get("file.proto") + + +def test_single_value_to_schema(): + actual = ExtensionsElement( + location=location, + values=[500] + ) + expected = "extensions 500;\n" + assert actual.to_schema() == expected + + +def test_range_to_schema(): + actual = ExtensionsElement( + location=location, + values=[KotlinRange(500, 505)] + ) + expected = "extensions 500 to 505;\n" + assert actual.to_schema() == expected + + +def test_max_range_to_schema(): + actual = ExtensionsElement( + location=location, + values=[KotlinRange(500, MAX_TAG_VALUE)] + ) + expected = "extensions 500 to max;\n" + assert actual.to_schema() == expected + + +def test_with_documentation_to_schema(): + actual = ExtensionsElement( + location=location, + documentation="Hello", + values=[500] + ) + expected = """ + |// Hello + |extensions 500; + |""" + expected = trim_margin(expected) + assert actual.to_schema() == expected diff --git a/tests/unit/test_field_element.py b/tests/unit/test_field_element.py new file mode 100644 index 000000000..939fb9b09 --- /dev/null +++ b/tests/unit/test_field_element.py @@ -0,0 +1,94 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/FieldElementTest.kt + +import pytest + +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement + +location = Location.get("file.proto") + + +def test_field(): + field = FieldElement( + location=location, + label=Field.Label.OPTIONAL, + element_type="CType", + name="ctype", + tag=1, + options=[ + OptionElement("default", OptionElement.Kind.ENUM, "TEST"), + OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") + ] + ) + + assert len(field.options) == 2 + assert OptionElement("default", OptionElement.Kind.ENUM, "TEST") in field.options + assert OptionElement("deprecated", OptionElement.Kind.BOOLEAN, "true") in field.options + + +def test_add_multiple_options(): + kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") + foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + options=[kit_kat, foo_bar] + ) + + assert len(field.options) == 2 + + +def test_default_is_set(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + default_value="defaultValue" + ) + + assert field.to_schema() == trim_margin(""" + |required string name = 1 [default = "defaultValue"]; + |""") + + +def test_json_name_and_default_value(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + default_value="defaultValue", + json_name="my_json", + tag=1 + ) + + assert field.to_schema() == trim_margin(""" + |required string name = 1 [ + | default = "defaultValue", + | json_name = "my_json" + |]; + |""") + + +def test_json_name(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + json_name="my_json", + tag=1 + ) + + assert field.to_schema() == trim_margin(""" + |required string name = 1 [json_name = "my_json"]; + |""") diff --git a/tests/unit/test_message_element.py b/tests/unit/test_message_element.py new file mode 100644 index 000000000..e6a425a51 --- /dev/null +++ b/tests/unit/test_message_element.py @@ -0,0 +1,665 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/MessageElementTest.kt + +import pytest + +from karapace.protobuf.extensions_element import ExtensionsElement +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.group_element import GroupElement +from karapace.protobuf.kotlin_wrapper import trim_margin, KotlinRange +from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.one_of_element import OneOfElement +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.reserved_document import ReservedElement + +location: Location = Location.get("file.proto") + + +def test_empty_to_schema(): + element = MessageElement( + location=location, + name="Message" + ) + expected = "message Message {}\n" + assert element.to_schema() == expected + + +def test_simple_to_schema(): + element = MessageElement( + location=location, + name="Message", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ] + ) + expected = """ + |message Message { + | required string name = 1; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_fields(): + first_name = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="first_name", + tag=1 + ) + last_name = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="last_name", + tag=2 + ) + element = MessageElement( + location=location, + name="Message", + fields=[first_name, last_name] + ) + assert len(element.fields) == 2 + + +def test_simple_with_documentation_to_schema(): + element = MessageElement( + location=location, + name="Message", + documentation="Hello", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ] + ) + expected = """ + |// Hello + |message Message { + | required string name = 1; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_simple_with_options_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + element = MessageElement( + location=location, + name="Message", + fields=[field], + options=[OptionElement("kit", OptionElement.Kind.STRING, "kat")] + ) + expected = """message Message { + | option kit = "kat"; + | + | required string name = 1; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_options(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") + foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") + element = MessageElement( + location=location, + name="Message", + fields=[field], + options=[kit_kat, foo_bar] + ) + assert len(element.options) == 2 + + +def test_simple_with_nested_elements_to_schema(): + element = MessageElement( + location=location, + name="Message", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ], + nested_types=[ + MessageElement( + location=location, + name="Nested", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ] + ) + ] + ) + expected = """ + |message Message { + | required string name = 1; + | + | message Nested { + | required string name = 1; + | } + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_types(): + nested1 = MessageElement( + location=location, + name="Nested1") + nested2 = MessageElement( + location=location, + name="Nested2") + element = MessageElement( + location=location, + name="Message", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ], + nested_types=[nested1, nested2] + ) + assert len(element.nested_types) == 2 + + +def test_simple_with_extensions_to_schema(): + element = MessageElement( + location=location, + name="Message", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ], + extensions=[ExtensionsElement(location=location, values=[KotlinRange(500, 501)])] + ) + expected = """ + |message Message { + | required string name = 1; + | + | extensions 500 to 501; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_extensions(): + fives = ExtensionsElement(location=location, values=[KotlinRange(500, 501)]) + sixes = ExtensionsElement(location=location, values=[KotlinRange(600, 601)]) + element = MessageElement( + location=location, + name="Message", + fields=[ + FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + ], + extensions=[fives, sixes] + ) + assert len(element.extensions) == 2 + + +def test_one_of_to_schema(): + element = MessageElement( + location=location, + name="Message", + one_ofs=[ + OneOfElement( + name="hi", + fields=[ + FieldElement( + location=location, + element_type="string", + name="name", + tag=1 + ) + ] + ) + ] + ) + expected = """ + |message Message { + | oneof hi { + | string name = 1; + | } + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_one_of_with_group_to_schema(): + element = MessageElement( + location=location, + name="Message", + one_ofs=[ + OneOfElement( + name="hi", + fields=[ + FieldElement( + location=location, + element_type="string", + name="name", + tag=1 + ) + ], + groups=[ + GroupElement( + location=location.at(5, 5), + name="Stuff", + tag=3, + label=None, + fields=[ + FieldElement( + location=location.at(6, 7), + label=Field.Label.OPTIONAL, + element_type="int32", + name="result_per_page", + tag=4 + ), + FieldElement( + location=location.at(7, 7), + label=Field.Label.OPTIONAL, + element_type="int32", + name="page_count", + tag=5 + ) + ] + ) + ] + ) + ] + ) + expected = """ + |message Message { + | oneof hi { + | string name = 1; + | + | group Stuff = 3 { + | optional int32 result_per_page = 4; + | optional int32 page_count = 5; + | } + | } + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_add_multiple_one_ofs(): + hi = OneOfElement( + name="hi", + fields=[ + FieldElement( + location=location, + element_type="string", + name="name", + tag=1 + ) + ] + ) + hey = OneOfElement( + name="hey", + fields=[ + FieldElement( + location=location, + element_type="string", + name="city", + tag=2 + ) + ] + ) + element = MessageElement( + location=location, + name="Message", + one_ofs=[hi, hey] + ) + assert len(element.one_ofs) == 2 + + +def test_reserved_to_schema(): + element = MessageElement( + location=location, + name="Message", + reserveds=[ + ReservedElement(location=location, values=[10, KotlinRange(12, 14), "foo"]), + ReservedElement(location=location, values=[10]), + ReservedElement(location=location, values=[KotlinRange(12, 14)]), + ReservedElement(location=location, values=["foo"]) + ] + ) + expected = """ + |message Message { + | reserved 10, 12 to 14, "foo"; + | reserved 10; + | reserved 12 to 14; + | reserved "foo"; + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_group_to_schema(): + element = MessageElement( + location=location.at(1, 1), + name="SearchResponse", + groups=[ + GroupElement( + location=location.at(2, 3), + label=Field.Label.REPEATED, + name="Result", + tag=1, + fields=[ + FieldElement( + location=location.at(3, 5), + label=Field.Label.REQUIRED, + element_type="string", + name="url", + tag=2 + ), + FieldElement( + location=location.at(4, 5), + label=Field.Label.OPTIONAL, + element_type="string", + name="title", + tag=3 + ), + FieldElement( + location=location.at(5, 5), + label=Field.Label.REPEATED, + element_type="string", + name="snippets", + tag=4 + ) + ] + ) + ] + ) + expected = """ + |message SearchResponse { + | repeated group Result = 1 { + | required string url = 2; + | optional string title = 3; + | repeated string snippets = 4; + | } + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_multiple_everything_to_schema(): + field1 = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + field2 = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="bool", + name="other_name", + tag=2 + ) + one_off_1_field = FieldElement( + location=location, + element_type="string", + name="namey", + tag=3 + ) + one_of_1 = OneOfElement( + name="thingy", + fields=[one_off_1_field] + ) + one_off_2_field = FieldElement( + location=location, + element_type="string", + name="namer", + tag=4 + ) + one_of_2 = OneOfElement( + name="thinger", + fields=[one_off_2_field] + ) + extensions1 = ExtensionsElement(location=location, values=[KotlinRange(500, 501)]) + extensions2 = ExtensionsElement(location=location, values=[503]) + nested = MessageElement( + location=location, + name="Nested", + fields=[field1] + ) + option = OptionElement("kit", OptionElement.Kind.STRING, "kat") + element = MessageElement( + location=location, + name="Message", + fields=[field1, field2], + one_ofs=[one_of_1, one_of_2], + nested_types=[nested], + extensions=[extensions1, extensions2], + options=[option] + ) + expected = """ + |message Message { + | option kit = "kat"; + | + | required string name = 1; + | + | required bool other_name = 2; + | + | oneof thingy { + | string namey = 3; + | } + | + | oneof thinger { + | string namer = 4; + | } + | + | extensions 500 to 501; + | extensions 503; + | + | message Nested { + | required string name = 1; + | } + |} + |""" + expected = trim_margin(expected) + assert element.to_schema() == expected + + +def test_field_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1 + ) + expected = "required string name = 1;\n" + assert field.to_schema() == expected + + +def test_field_with_default_string_to_schema_in_proto2(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + default_value="benoît" + ) + expected = "required string name = 1 [default = \"benoît\"];\n" + assert field.to_schema() == expected + + +def test_field_with_default_number_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="int32", + name="age", + tag=1, + default_value="34" + ) + expected = "required int32 age = 1 [default = 34];\n" + assert field.to_schema() == expected + + +def test_field_with_default_bool_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="bool", + name="human", + tag=1, + default_value="true" + ) + expected = "required bool human = 1 [default = true];\n" + assert field.to_schema() == expected + + +def test_one_of_field_to_schema(): + field = FieldElement( + location=location, + element_type="string", + name="name", + tag=1 + ) + expected = "string name = 1;\n" + assert field.to_schema() == expected + + +def test_field_with_documentation_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + documentation="Hello" + ) + expected = """// Hello + |required string name = 1; + |""" + expected = trim_margin(expected) + assert field.to_schema() == expected + + +def test_field_with_one_option_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + options=[OptionElement("kit", OptionElement.Kind.STRING, "kat")] + ) + expected = """required string name = 1 [kit = "kat"]; + |""" + expected = trim_margin(expected) + assert field.to_schema() == expected + + +def test_field_with_more_than_one_option_to_schema(): + field = FieldElement( + location=location, + label=Field.Label.REQUIRED, + element_type="string", + name="name", + tag=1, + options=[OptionElement("kit", OptionElement.Kind.STRING, "kat"), + OptionElement("dup", OptionElement.Kind.STRING, "lo")] + ) + expected = """required string name = 1 [ + | kit = "kat", + | dup = "lo" + |]; + |""" + expected = trim_margin(expected) + assert field.to_schema() == expected + + +def test_one_of_with_options(): + expected = """ + |oneof page_info { + | option (my_option) = true; + | + | int32 page_number = 2; + | int32 result_per_page = 3; + |} + |""" + expected = trim_margin(expected) + one_of = OneOfElement( + name="page_info", + fields=[ + FieldElement( + location=location.at(4, 5), + element_type="int32", + name="page_number", + tag=2 + ), + FieldElement( + location=location.at(5, 5), + element_type="int32", + name="result_per_page", + tag=3 + ) + ], + options=[OptionElement("my_option", OptionElement.Kind.BOOLEAN, "true", True)] + ) + assert one_of.to_schema() == expected diff --git a/tests/unit/test_option_element.py b/tests/unit/test_option_element.py new file mode 100644 index 000000000..be911bd04 --- /dev/null +++ b/tests/unit/test_option_element.py @@ -0,0 +1,64 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/OptionElementTest.kt + +import pytest + +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.option_element import OptionElement + + +def test_simple_to_schema(): + option = OptionElement("foo", OptionElement.Kind.STRING, "bar") + expected = """foo = \"bar\"""" + assert option.to_schema() == expected + + +def test_nested_to_schema(): + option = OptionElement("foo.boo", + OptionElement.Kind.OPTION, + OptionElement("bar", OptionElement.Kind.STRING, "baz"), True) + expected = """(foo.boo).bar = \"baz\"""" + assert option.to_schema() == expected + + +def test_list_to_schema(): + option = OptionElement( + "foo", + OptionElement.Kind.LIST, + [ + OptionElement("ping", OptionElement.Kind.STRING, "pong", True), + OptionElement("kit", OptionElement.Kind.STRING, "kat") + ], + True + ) + expected = """ + |(foo) = [ + | (ping) = "pong", + | kit = "kat" + |] + """ + expected = trim_margin(expected) + assert option.to_schema() == expected + + +def test_map_to_schema(): + option = OptionElement( + "foo", OptionElement.Kind.MAP, {"ping": "pong", "kit": ["kat", "kot"]} + ) + expected = """ + |foo = { + | ping: "pong", + | kit: [ + | "kat", + | "kot" + | ] + |} + """ + expected = trim_margin(expected) + assert option.to_schema() == expected + + +def test_boolean_to_schema(): + option = OptionElement("foo", OptionElement.Kind.BOOLEAN, "false") + expected = "foo = false" + assert option.to_schema() == expected diff --git a/tests/unit/test_parsing_tester.py b/tests/unit/test_parsing_tester.py new file mode 100644 index 000000000..92da7c626 --- /dev/null +++ b/tests/unit/test_parsing_tester.py @@ -0,0 +1,32 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/parsing_tester.kt + +import fnmatch +import os + +from karapace.protobuf.location import Location +from karapace.protobuf.proto_parser import ProtoParser + +# Recursively traverse a directory and attempt to parse all of its proto files. + + +# Directory under which to search for protos. Change as needed. +src = "test" + + +def test_multi_files(): + total = 0 + failed = 0 + + for root, dirnames, filenames in os.walk(src): + for filename in fnmatch.filter(filenames, '*.proto'): + fn = os.path.join(root, filename) + print(f"Parsing {fn}") + total += 1 + try: + data = open(fn).read() + ProtoParser.parse(Location.get(fn), data) + except Exception as e: + print(e) + failed += 1 + print(f"\nTotal: {total} Failed: {failed}") diff --git a/tests/unit/test_proto_file_element.py b/tests/unit/test_proto_file_element.py index 445d648d4..d50b56750 100644 --- a/tests/unit/test_proto_file_element.py +++ b/tests/unit/test_proto_file_element.py @@ -1,7 +1,5 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElementTest.kt -import copy - from karapace.protobuf.extend_element import ExtendElement from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement @@ -14,6 +12,8 @@ from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax +import copy + location: Location = Location.get("some/folder", "file.proto") @@ -28,10 +28,7 @@ def test_empty_to_schema(): def test_empty_with_package_to_schema(): - file = ProtoFileElement( - location=location, - package_name="example.simple" - ) + file = ProtoFileElement(location=location, package_name="example.simple") expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -43,14 +40,8 @@ def test_empty_with_package_to_schema(): def test_simple_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) - file = ProtoFileElement( - location=location, - types=[element] - ) + element = MessageElement(location=location, name="Message") + file = ProtoFileElement(location=location, types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -62,15 +53,8 @@ def test_simple_to_schema(): def test_simple_with_imports_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) - file = ProtoFileElement( - location=location, - imports=["example.other"], - types=[element] - ) + element = MessageElement(location=location, name="Message") + file = ProtoFileElement(location=location, imports=["example.other"], types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -84,28 +68,14 @@ def test_simple_with_imports_to_schema(): def test_add_multiple_dependencies(): - element = MessageElement( - location=location, - name="Message" - ) - file = ProtoFileElement( - location=location, - imports=["example.other", "example.another"], - types=[element] - ) + element = MessageElement(location=location, name="Message") + file = ProtoFileElement(location=location, imports=["example.other", "example.another"], types=[element]) assert len(file.imports) == 2 def test_simple_with_public_imports_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) - file = ProtoFileElement( - location=location, - public_imports=["example.other"], - types=[element] - ) + element = MessageElement(location=location, name="Message") + file = ProtoFileElement(location=location, public_imports=["example.other"], types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -119,28 +89,15 @@ def test_simple_with_public_imports_to_schema(): def test_add_multiple_public_dependencies(): - element = MessageElement( - location=location, - name="Message" - ) - file = ProtoFileElement(location=location, - public_imports=["example.other", "example.another"], - types=[element] - ) + element = MessageElement(location=location, name="Message") + file = ProtoFileElement(location=location, public_imports=["example.other", "example.another"], types=[element]) assert len(file.public_imports) == 2 def test_simple_with_both_imports_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) - file = ProtoFileElement(location=location, - imports=["example.thing"], - public_imports=["example.other"], - types=[element] - ) + element = MessageElement(location=location, name="Message") + file = ProtoFileElement(location=location, imports=["example.thing"], public_imports=["example.other"], types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -155,19 +112,9 @@ def test_simple_with_both_imports_to_schema(): def test_simple_with_services_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) - service = ServiceElement( - location=location, - name="Service" - ) - file = ProtoFileElement( - location=location, - types=[element], - services=[service] - ) + element = MessageElement(location=location, name="Message") + service = ServiceElement(location=location, name="Service") + file = ProtoFileElement(location=location, types=[element], services=[service]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -181,32 +128,16 @@ def test_simple_with_services_to_schema(): def test_add_multiple_services(): - service1 = ServiceElement( - location=location, - name="Service1" - ) - service2 = ServiceElement( - location=location, - name="Service2" - ) - file = ProtoFileElement( - location=location, - services=[service1, service2] - ) + service1 = ServiceElement(location=location, name="Service1") + service2 = ServiceElement(location=location, name="Service2") + file = ProtoFileElement(location=location, services=[service1, service2]) assert len(file.services) == 2 def test_simple_with_options_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) + element = MessageElement(location=location, name="Message") option = OptionElement("kit", OptionElement.Kind.STRING, "kat") - file = ProtoFileElement( - location=location, - options=[option], - types=[element] - ) + file = ProtoFileElement(location=location, options=[option], types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -220,17 +151,10 @@ def test_simple_with_options_to_schema(): def test_add_multiple_options(): - element = MessageElement( - location=location, - name="Message" - ) + element = MessageElement(location=location, name="Message") kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") - file = ProtoFileElement( - location=location, - options=[kit_kat, foo_bar], - types=[element] - ) + file = ProtoFileElement(location=location, options=[kit_kat, foo_bar], types=[element]) assert len(file.options) == 2 @@ -255,10 +179,7 @@ def test_simple_with_extends_to_schema(): def test_add_multiple_extends(): extend1 = ExtendElement(location=location, name="Extend1") extend2 = ExtendElement(location=location, name="Extend2") - file = ProtoFileElement( - location=location, - extend_declarations=[extend1, extend2] - ) + file = ProtoFileElement(location=location, extend_declarations=[extend1, extend2]) assert len(file.extend_declarations) == 2 @@ -269,14 +190,8 @@ def test_multiple_everything_to_schema(): extend2 = ExtendElement(location=location.at(18, 1), name="Extend2") option1 = OptionElement("kit", OptionElement.Kind.STRING, "kat") option2 = OptionElement("foo", OptionElement.Kind.STRING, "bar") - service1 = ServiceElement( - location=location.at(20, 1), - name="Service1" - ) - service2 = ServiceElement( - location=location.at(22, 1), - name="Service2" - ) + service1 = ServiceElement(location=location.at(20, 1), name="Service1") + service2 = ServiceElement(location=location.at(22, 1), name="Service2") file = ProtoFileElement( location=location, package_name="example.simple", @@ -321,11 +236,7 @@ def test_multiple_everything_to_schema(): def test_syntax_to_schema(): element = MessageElement(location=location, name="Message") - file = ProtoFileElement( - location=location, - syntax=Syntax.PROTO_2, - types=[element] - ) + file = ProtoFileElement(location=location, syntax=Syntax.PROTO_2, types=[element]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -347,11 +258,7 @@ def test_default_is_set_in_proto2(): tag=1, default_value="defaultValue" ) - message = MessageElement( - location=location.at(11, 1), - name="Message", - fields=[field] - ) + message = MessageElement(location=location.at(11, 1), name="Message", fields=[field]) file = ProtoFileElement( syntax=Syntax.PROTO_2, location=location, @@ -438,8 +345,11 @@ def test_convert_packed_option_from_wire_schema_in_proto2(): message = MessageElement( location=location.at(8, 1), name="Message", - fields=[field_numeric, field_numeric_packed_true, field_numeric_packed_false, field_string, - field_string_packed_true, field_string_packed_false]) + fields=[ + field_numeric, field_numeric_packed_true, field_numeric_packed_false, field_string, field_string_packed_true, + field_string_packed_false + ] + ) file = ProtoFileElement( syntax=Syntax.PROTO_2, location=location, @@ -533,8 +443,10 @@ def test_convert_packed_option_from_wire_schema_in_proto3(): message = MessageElement( location=location.at(8, 1), name="Message", - fields=[field_numeric, field_numeric_packed_true, field_numeric_packed_false, field_string, - field_string_packed_true, field_string_packed_false] + fields=[ + field_numeric, field_numeric_packed_true, field_numeric_packed_false, field_string, field_string_packed_true, + field_string_packed_false + ] ) file = ProtoFileElement( syntax=Syntax.PROTO_3, diff --git a/tests/unit/test_service_element.py b/tests/unit/test_service_element.py new file mode 100644 index 000000000..2e84671c0 --- /dev/null +++ b/tests/unit/test_service_element.py @@ -0,0 +1,224 @@ +# Ported from square/wire: +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ServiceElementTest.kt + +import pytest + +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.rpc_element import RpcElement +from karapace.protobuf.service_element import ServiceElement + +location: Location = Location.get("file.proto") + + +def test_empty_to_schema(): + service = ServiceElement( + location=location, + name="Service" + ) + expected = "service Service {}\n" + assert service.to_schema() == expected + + +def test_single_to_schema(): + service = ServiceElement( + location=location, + name="Service", + rpcs=[ + RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType" + ) + ] + ) + expected = """ + |service Service { + | rpc Name (RequestType) returns (ResponseType); + |} + |""" + expected = trim_margin(expected) + assert service.to_schema() == expected + + +def test_add_multiple_rpcs(): + first_name = RpcElement( + location=location, + name="FirstName", + request_type="RequestType", + response_type="ResponseType" + ) + last_name = RpcElement( + location=location, + name="LastName", + request_type="RequestType", + response_type="ResponseType" + ) + service = ServiceElement( + location=location, + name="Service", + rpcs=[first_name, last_name] + ) + assert len(service.rpcs) == 2 + + +def test_single_with_options_to_schema(): + service = ServiceElement( + location=location, + name="Service", + options=[OptionElement("foo", OptionElement.Kind.STRING, "bar")], + rpcs=[ + RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType" + ) + ] + ) + expected = """ + |service Service { + | option foo = "bar"; + | + | rpc Name (RequestType) returns (ResponseType); + |} + |""" + expected = trim_margin(expected) + assert service.to_schema() == expected + + +def test_add_multiple_options(): + kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") + foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") + service = ServiceElement( + location=location, + name="Service", + options=[kit_kat, foo_bar], + rpcs=[ + RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType" + ) + ] + ) + assert len(service.options) == 2 + + +def test_single_with_documentation_to_schema(): + service = ServiceElement( + location=location, + name="Service", + documentation="Hello", + rpcs=[ + RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType" + ) + ] + ) + expected = """ + |// Hello + |service Service { + | rpc Name (RequestType) returns (ResponseType); + |} + |""" + expected = trim_margin(expected) + assert service.to_schema() == expected + + +def test_multiple_to_schema(): + rpc = RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType" + ) + service = ServiceElement( + location=location, + name="Service", + rpcs=[rpc, rpc] + ) + expected = """ + |service Service { + | rpc Name (RequestType) returns (ResponseType); + | rpc Name (RequestType) returns (ResponseType); + |} + |""" + expected = trim_margin(expected) + + assert service.to_schema() == expected + + +def test_rpc_to_schema(): + rpc = RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType" + ) + expected = "rpc Name (RequestType) returns (ResponseType);\n" + assert rpc.to_schema() == expected + + +def test_rpc_with_documentation_to_schema(): + rpc = RpcElement( + location=location, + name="Name", + documentation="Hello", + request_type="RequestType", + response_type="ResponseType" + ) + expected = """ + |// Hello + |rpc Name (RequestType) returns (ResponseType); + |""" + expected = trim_margin(expected) + assert rpc.to_schema() == expected + + +def test_rpc_with_options_to_schema(): + rpc = RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType", + options=[OptionElement("foo", OptionElement.Kind.STRING, "bar")] + ) + + expected = """ + |rpc Name (RequestType) returns (ResponseType) { + | option foo = "bar"; + |}; + |""" + expected = trim_margin(expected) + assert rpc.to_schema() == expected + + +def test_rpc_with_request_streaming_to_schema(): + rpc = RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType", + request_streaming=True + ) + expected = "rpc Name (stream RequestType) returns (ResponseType);\n" + assert rpc.to_schema() == expected + + +def test_rpc_with_response_streaming_to_schema(): + rpc = RpcElement( + location=location, + name="Name", + request_type="RequestType", + response_type="ResponseType", + response_streaming=True + ) + expected = "rpc Name (RequestType) returns (stream ResponseType);\n" + assert rpc.to_schema() == expected From 29eb67fcbe822ff519fd1cc0c6355f3fbe3a7f3a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 9 Jun 2021 16:32:21 +0300 Subject: [PATCH 022/168] fixup lint issues --- tests/unit/test_enum_element.py | 20 +- tests/unit/test_extend_element.py | 49 +--- tests/unit/test_extensions_element.py | 24 +- tests/unit/test_field_element.py | 28 +-- tests/unit/test_message_element.py | 309 ++++---------------------- tests/unit/test_option_element.py | 19 +- tests/unit/test_parsing_tester.py | 11 +- tests/unit/test_service_element.py | 101 ++------- 8 files changed, 94 insertions(+), 467 deletions(-) diff --git a/tests/unit/test_enum_element.py b/tests/unit/test_enum_element.py index 46a01a8d4..f098f2d63 100644 --- a/tests/unit/test_enum_element.py +++ b/tests/unit/test_enum_element.py @@ -1,8 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/EnumElementTest.kt -import pytest - from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.kotlin_wrapper import trim_margin @@ -13,10 +11,7 @@ def test_empty_to_schema(): - element = EnumElement( - location=location, - name="Enum" - ) + element = EnumElement(location=location, name="Enum") expected = "enum Enum {}\n" assert element.to_schema() == expected @@ -46,11 +41,7 @@ def test_add_multiple_constants(): one = EnumConstantElement(location=location, name="ONE", tag=1) two = EnumConstantElement(location=location, name="TWO", tag=2) six = EnumConstantElement(location=location, name="SIX", tag=6) - element = EnumElement( - location=location, - name="Enum", - constants=[one, two, six] - ) + element = EnumElement(location=location, name="Enum", constants=[one, two, six]) assert len(element.constants) == 3 @@ -119,12 +110,7 @@ def test_field_to_schema(): def test_field_with_documentation_to_schema(): - value = EnumConstantElement( - location=location, - name="NAME", - tag=1, - documentation="Hello" - ) + value = EnumConstantElement(location=location, name="NAME", tag=1, documentation="Hello") expected = """ |// Hello |NAME = 1; diff --git a/tests/unit/test_extend_element.py b/tests/unit/test_extend_element.py index 3a3e08605..7c718a8b0 100644 --- a/tests/unit/test_extend_element.py +++ b/tests/unit/test_extend_element.py @@ -1,8 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ExtendElementTest.kt -import pytest - from karapace.protobuf.extend_element import ExtendElement from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement @@ -13,10 +11,7 @@ def test_empty_to_schema(): - extend = ExtendElement( - location=location, - name="Name" - ) + extend = ExtendElement(location=location, name="Name") expected = "extend Name {}\n" assert extend.to_schema() == expected @@ -25,15 +20,7 @@ def test_simple_to_schema(): extend = ExtendElement( location=location, name="Name", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ] + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)] ) expected = """ |extend Name { @@ -45,25 +32,9 @@ def test_simple_to_schema(): def test_add_multiple_fields(): - first_name = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="first_name", - tag=1 - ) - last_name = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="last_name", - tag=2 - ) - extend = ExtendElement( - location=location, - name="Name", - fields=[first_name, last_name] - ) + first_name = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="first_name", tag=1) + last_name = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="last_name", tag=2) + extend = ExtendElement(location=location, name="Name", fields=[first_name, last_name]) assert len(extend.fields) == 2 @@ -72,15 +43,7 @@ def test_simple_with_documentation_to_schema(): location=location, name="Name", documentation="Hello", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ] + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)] ) expected = """ |// Hello diff --git a/tests/unit/test_extensions_element.py b/tests/unit/test_extensions_element.py index dc1e3e9d1..0b40c9251 100644 --- a/tests/unit/test_extensions_element.py +++ b/tests/unit/test_extensions_element.py @@ -2,47 +2,33 @@ # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ExtensionsElementTest.kt from karapace.protobuf.extensions_element import ExtensionsElement -from karapace.protobuf.kotlin_wrapper import trim_margin, KotlinRange +from karapace.protobuf.kotlin_wrapper import KotlinRange, trim_margin from karapace.protobuf.location import Location - from karapace.protobuf.utils import MAX_TAG_VALUE location = Location.get("file.proto") def test_single_value_to_schema(): - actual = ExtensionsElement( - location=location, - values=[500] - ) + actual = ExtensionsElement(location=location, values=[500]) expected = "extensions 500;\n" assert actual.to_schema() == expected def test_range_to_schema(): - actual = ExtensionsElement( - location=location, - values=[KotlinRange(500, 505)] - ) + actual = ExtensionsElement(location=location, values=[KotlinRange(500, 505)]) expected = "extensions 500 to 505;\n" assert actual.to_schema() == expected def test_max_range_to_schema(): - actual = ExtensionsElement( - location=location, - values=[KotlinRange(500, MAX_TAG_VALUE)] - ) + actual = ExtensionsElement(location=location, values=[KotlinRange(500, MAX_TAG_VALUE)]) expected = "extensions 500 to max;\n" assert actual.to_schema() == expected def test_with_documentation_to_schema(): - actual = ExtensionsElement( - location=location, - documentation="Hello", - values=[500] - ) + actual = ExtensionsElement(location=location, documentation="Hello", values=[500]) expected = """ |// Hello |extensions 500; diff --git a/tests/unit/test_field_element.py b/tests/unit/test_field_element.py index 939fb9b09..9f38093d6 100644 --- a/tests/unit/test_field_element.py +++ b/tests/unit/test_field_element.py @@ -1,8 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/FieldElementTest.kt -import pytest - from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement from karapace.protobuf.kotlin_wrapper import trim_margin @@ -34,12 +32,7 @@ def test_add_multiple_options(): kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1, - options=[kit_kat, foo_bar] + location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1, options=[kit_kat, foo_bar] ) assert len(field.options) == 2 @@ -55,7 +48,8 @@ def test_default_is_set(): default_value="defaultValue" ) - assert field.to_schema() == trim_margin(""" + assert field.to_schema( + ) == trim_margin(""" |required string name = 1 [default = "defaultValue"]; |""") @@ -71,24 +65,22 @@ def test_json_name_and_default_value(): tag=1 ) - assert field.to_schema() == trim_margin(""" + assert field.to_schema() == trim_margin( + """ |required string name = 1 [ | default = "defaultValue", | json_name = "my_json" |]; - |""") + |""" + ) def test_json_name(): field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - json_name="my_json", - tag=1 + location=location, label=Field.Label.REQUIRED, element_type="string", name="name", json_name="my_json", tag=1 ) - assert field.to_schema() == trim_margin(""" + assert field.to_schema( + ) == trim_margin(""" |required string name = 1 [json_name = "my_json"]; |""") diff --git a/tests/unit/test_message_element.py b/tests/unit/test_message_element.py index e6a425a51..ab67a58cd 100644 --- a/tests/unit/test_message_element.py +++ b/tests/unit/test_message_element.py @@ -1,13 +1,11 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/MessageElementTest.kt -import pytest - from karapace.protobuf.extensions_element import ExtensionsElement from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement from karapace.protobuf.group_element import GroupElement -from karapace.protobuf.kotlin_wrapper import trim_margin, KotlinRange +from karapace.protobuf.kotlin_wrapper import KotlinRange, trim_margin from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.one_of_element import OneOfElement @@ -18,10 +16,7 @@ def test_empty_to_schema(): - element = MessageElement( - location=location, - name="Message" - ) + element = MessageElement(location=location, name="Message") expected = "message Message {}\n" assert element.to_schema() == expected @@ -30,15 +25,7 @@ def test_simple_to_schema(): element = MessageElement( location=location, name="Message", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ] + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)] ) expected = """ |message Message { @@ -50,25 +37,9 @@ def test_simple_to_schema(): def test_add_multiple_fields(): - first_name = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="first_name", - tag=1 - ) - last_name = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="last_name", - tag=2 - ) - element = MessageElement( - location=location, - name="Message", - fields=[first_name, last_name] - ) + first_name = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="first_name", tag=1) + last_name = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="last_name", tag=2) + element = MessageElement(location=location, name="Message", fields=[first_name, last_name]) assert len(element.fields) == 2 @@ -77,15 +48,7 @@ def test_simple_with_documentation_to_schema(): location=location, name="Message", documentation="Hello", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ] + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)] ) expected = """ |// Hello @@ -98,18 +61,9 @@ def test_simple_with_documentation_to_schema(): def test_simple_with_options_to_schema(): - field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) + field = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1) element = MessageElement( - location=location, - name="Message", - fields=[field], - options=[OptionElement("kit", OptionElement.Kind.STRING, "kat")] + location=location, name="Message", fields=[field], options=[OptionElement("kit", OptionElement.Kind.STRING, "kat")] ) expected = """message Message { | option kit = "kat"; @@ -122,21 +76,10 @@ def test_simple_with_options_to_schema(): def test_add_multiple_options(): - field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) + field = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1) kit_kat = OptionElement("kit", OptionElement.Kind.STRING, "kat") foo_bar = OptionElement("foo", OptionElement.Kind.STRING, "bar") - element = MessageElement( - location=location, - name="Message", - fields=[field], - options=[kit_kat, foo_bar] - ) + element = MessageElement(location=location, name="Message", fields=[field], options=[kit_kat, foo_bar]) assert len(element.options) == 2 @@ -144,27 +87,13 @@ def test_simple_with_nested_elements_to_schema(): element = MessageElement( location=location, name="Message", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ], + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)], nested_types=[ MessageElement( location=location, name="Nested", fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) + FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1) ] ) ] @@ -183,24 +112,12 @@ def test_simple_with_nested_elements_to_schema(): def test_add_multiple_types(): - nested1 = MessageElement( - location=location, - name="Nested1") - nested2 = MessageElement( - location=location, - name="Nested2") + nested1 = MessageElement(location=location, name="Nested1") + nested2 = MessageElement(location=location, name="Nested2") element = MessageElement( location=location, name="Message", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ], + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)], nested_types=[nested1, nested2] ) assert len(element.nested_types) == 2 @@ -210,15 +127,7 @@ def test_simple_with_extensions_to_schema(): element = MessageElement( location=location, name="Message", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ], + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)], extensions=[ExtensionsElement(location=location, values=[KotlinRange(500, 501)])] ) expected = """ @@ -238,15 +147,7 @@ def test_add_multiple_extensions(): element = MessageElement( location=location, name="Message", - fields=[ - FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - ], + fields=[FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1)], extensions=[fives, sixes] ) assert len(element.extensions) == 2 @@ -257,17 +158,7 @@ def test_one_of_to_schema(): location=location, name="Message", one_ofs=[ - OneOfElement( - name="hi", - fields=[ - FieldElement( - location=location, - element_type="string", - name="name", - tag=1 - ) - ] - ) + OneOfElement(name="hi", fields=[FieldElement(location=location, element_type="string", name="name", tag=1)]) ] ) expected = """ @@ -288,14 +179,7 @@ def test_one_of_with_group_to_schema(): one_ofs=[ OneOfElement( name="hi", - fields=[ - FieldElement( - location=location, - element_type="string", - name="name", - tag=1 - ) - ], + fields=[FieldElement(location=location, element_type="string", name="name", tag=1)], groups=[ GroupElement( location=location.at(5, 5), @@ -327,7 +211,7 @@ def test_one_of_with_group_to_schema(): |message Message { | oneof hi { | string name = 1; - | + | | group Stuff = 3 { | optional int32 result_per_page = 4; | optional int32 page_count = 5; @@ -340,33 +224,9 @@ def test_one_of_with_group_to_schema(): def test_add_multiple_one_ofs(): - hi = OneOfElement( - name="hi", - fields=[ - FieldElement( - location=location, - element_type="string", - name="name", - tag=1 - ) - ] - ) - hey = OneOfElement( - name="hey", - fields=[ - FieldElement( - location=location, - element_type="string", - name="city", - tag=2 - ) - ] - ) - element = MessageElement( - location=location, - name="Message", - one_ofs=[hi, hey] - ) + hi = OneOfElement(name="hi", fields=[FieldElement(location=location, element_type="string", name="name", tag=1)]) + hey = OneOfElement(name="hey", fields=[FieldElement(location=location, element_type="string", name="city", tag=2)]) + element = MessageElement(location=location, name="Message", one_ofs=[hi, hey]) assert len(element.one_ofs) == 2 @@ -405,18 +265,10 @@ def test_group_to_schema(): tag=1, fields=[ FieldElement( - location=location.at(3, 5), - label=Field.Label.REQUIRED, - element_type="string", - name="url", - tag=2 + location=location.at(3, 5), label=Field.Label.REQUIRED, element_type="string", name="url", tag=2 ), FieldElement( - location=location.at(4, 5), - label=Field.Label.OPTIONAL, - element_type="string", - name="title", - tag=3 + location=location.at(4, 5), label=Field.Label.OPTIONAL, element_type="string", name="title", tag=3 ), FieldElement( location=location.at(5, 5), @@ -443,47 +295,15 @@ def test_group_to_schema(): def test_multiple_everything_to_schema(): - field1 = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) - field2 = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="bool", - name="other_name", - tag=2 - ) - one_off_1_field = FieldElement( - location=location, - element_type="string", - name="namey", - tag=3 - ) - one_of_1 = OneOfElement( - name="thingy", - fields=[one_off_1_field] - ) - one_off_2_field = FieldElement( - location=location, - element_type="string", - name="namer", - tag=4 - ) - one_of_2 = OneOfElement( - name="thinger", - fields=[one_off_2_field] - ) + field1 = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1) + field2 = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="bool", name="other_name", tag=2) + one_off_1_field = FieldElement(location=location, element_type="string", name="namey", tag=3) + one_of_1 = OneOfElement(name="thingy", fields=[one_off_1_field]) + one_off_2_field = FieldElement(location=location, element_type="string", name="namer", tag=4) + one_of_2 = OneOfElement(name="thinger", fields=[one_off_2_field]) extensions1 = ExtensionsElement(location=location, values=[KotlinRange(500, 501)]) extensions2 = ExtensionsElement(location=location, values=[503]) - nested = MessageElement( - location=location, - name="Nested", - fields=[field1] - ) + nested = MessageElement(location=location, name="Nested", fields=[field1]) option = OptionElement("kit", OptionElement.Kind.STRING, "kat") element = MessageElement( location=location, @@ -523,25 +343,14 @@ def test_multiple_everything_to_schema(): def test_field_to_schema(): - field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1 - ) + field = FieldElement(location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1) expected = "required string name = 1;\n" assert field.to_schema() == expected def test_field_with_default_string_to_schema_in_proto2(): field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1, - default_value="benoît" + location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1, default_value="benoît" ) expected = "required string name = 1 [default = \"benoît\"];\n" assert field.to_schema() == expected @@ -549,12 +358,7 @@ def test_field_with_default_string_to_schema_in_proto2(): def test_field_with_default_number_to_schema(): field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="int32", - name="age", - tag=1, - default_value="34" + location=location, label=Field.Label.REQUIRED, element_type="int32", name="age", tag=1, default_value="34" ) expected = "required int32 age = 1 [default = 34];\n" assert field.to_schema() == expected @@ -562,36 +366,21 @@ def test_field_with_default_number_to_schema(): def test_field_with_default_bool_to_schema(): field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="bool", - name="human", - tag=1, - default_value="true" + location=location, label=Field.Label.REQUIRED, element_type="bool", name="human", tag=1, default_value="true" ) expected = "required bool human = 1 [default = true];\n" assert field.to_schema() == expected def test_one_of_field_to_schema(): - field = FieldElement( - location=location, - element_type="string", - name="name", - tag=1 - ) + field = FieldElement(location=location, element_type="string", name="name", tag=1) expected = "string name = 1;\n" assert field.to_schema() == expected def test_field_with_documentation_to_schema(): field = FieldElement( - location=location, - label=Field.Label.REQUIRED, - element_type="string", - name="name", - tag=1, - documentation="Hello" + location=location, label=Field.Label.REQUIRED, element_type="string", name="name", tag=1, documentation="Hello" ) expected = """// Hello |required string name = 1; @@ -622,8 +411,10 @@ def test_field_with_more_than_one_option_to_schema(): element_type="string", name="name", tag=1, - options=[OptionElement("kit", OptionElement.Kind.STRING, "kat"), - OptionElement("dup", OptionElement.Kind.STRING, "lo")] + options=[ + OptionElement("kit", OptionElement.Kind.STRING, "kat"), + OptionElement("dup", OptionElement.Kind.STRING, "lo") + ] ) expected = """required string name = 1 [ | kit = "kat", @@ -647,18 +438,8 @@ def test_one_of_with_options(): one_of = OneOfElement( name="page_info", fields=[ - FieldElement( - location=location.at(4, 5), - element_type="int32", - name="page_number", - tag=2 - ), - FieldElement( - location=location.at(5, 5), - element_type="int32", - name="result_per_page", - tag=3 - ) + FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2), + FieldElement(location=location.at(5, 5), element_type="int32", name="result_per_page", tag=3) ], options=[OptionElement("my_option", OptionElement.Kind.BOOLEAN, "true", True)] ) diff --git a/tests/unit/test_option_element.py b/tests/unit/test_option_element.py index be911bd04..782789c98 100644 --- a/tests/unit/test_option_element.py +++ b/tests/unit/test_option_element.py @@ -1,8 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/OptionElementTest.kt -import pytest - from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.option_element import OptionElement @@ -14,22 +12,19 @@ def test_simple_to_schema(): def test_nested_to_schema(): - option = OptionElement("foo.boo", - OptionElement.Kind.OPTION, - OptionElement("bar", OptionElement.Kind.STRING, "baz"), True) + option = OptionElement( + "foo.boo", OptionElement.Kind.OPTION, OptionElement("bar", OptionElement.Kind.STRING, "baz"), True + ) expected = """(foo.boo).bar = \"baz\"""" assert option.to_schema() == expected def test_list_to_schema(): option = OptionElement( - "foo", - OptionElement.Kind.LIST, - [ + "foo", OptionElement.Kind.LIST, [ OptionElement("ping", OptionElement.Kind.STRING, "pong", True), OptionElement("kit", OptionElement.Kind.STRING, "kat") - ], - True + ], True ) expected = """ |(foo) = [ @@ -42,9 +37,7 @@ def test_list_to_schema(): def test_map_to_schema(): - option = OptionElement( - "foo", OptionElement.Kind.MAP, {"ping": "pong", "kit": ["kat", "kot"]} - ) + option = OptionElement("foo", OptionElement.Kind.MAP, {"ping": "pong", "kit": ["kat", "kot"]}) expected = """ |foo = { | ping: "pong", diff --git a/tests/unit/test_parsing_tester.py b/tests/unit/test_parsing_tester.py index 92da7c626..42f8aaf28 100644 --- a/tests/unit/test_parsing_tester.py +++ b/tests/unit/test_parsing_tester.py @@ -1,14 +1,13 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/parsing_tester.kt -import fnmatch -import os - from karapace.protobuf.location import Location from karapace.protobuf.proto_parser import ProtoParser -# Recursively traverse a directory and attempt to parse all of its proto files. +import fnmatch +import os +# Recursively traverse a directory and attempt to parse all of its proto files. # Directory under which to search for protos. Change as needed. src = "test" @@ -18,7 +17,7 @@ def test_multi_files(): total = 0 failed = 0 - for root, dirnames, filenames in os.walk(src): + for root, dirnames, filenames in os.walk(src): # pylint: disable=W0612 for filename in fnmatch.filter(filenames, '*.proto'): fn = os.path.join(root, filename) print(f"Parsing {fn}") @@ -26,7 +25,7 @@ def test_multi_files(): try: data = open(fn).read() ProtoParser.parse(Location.get(fn), data) - except Exception as e: + except Exception as e: # pylint: disable=broad-except print(e) failed += 1 print(f"\nTotal: {total} Failed: {failed}") diff --git a/tests/unit/test_service_element.py b/tests/unit/test_service_element.py index 2e84671c0..9f4935f15 100644 --- a/tests/unit/test_service_element.py +++ b/tests/unit/test_service_element.py @@ -1,8 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ServiceElementTest.kt -import pytest - from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement @@ -13,10 +11,7 @@ def test_empty_to_schema(): - service = ServiceElement( - location=location, - name="Service" - ) + service = ServiceElement(location=location, name="Service") expected = "service Service {}\n" assert service.to_schema() == expected @@ -25,14 +20,7 @@ def test_single_to_schema(): service = ServiceElement( location=location, name="Service", - rpcs=[ - RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType" - ) - ] + rpcs=[RpcElement(location=location, name="Name", request_type="RequestType", response_type="ResponseType")] ) expected = """ |service Service { @@ -44,23 +32,9 @@ def test_single_to_schema(): def test_add_multiple_rpcs(): - first_name = RpcElement( - location=location, - name="FirstName", - request_type="RequestType", - response_type="ResponseType" - ) - last_name = RpcElement( - location=location, - name="LastName", - request_type="RequestType", - response_type="ResponseType" - ) - service = ServiceElement( - location=location, - name="Service", - rpcs=[first_name, last_name] - ) + first_name = RpcElement(location=location, name="FirstName", request_type="RequestType", response_type="ResponseType") + last_name = RpcElement(location=location, name="LastName", request_type="RequestType", response_type="ResponseType") + service = ServiceElement(location=location, name="Service", rpcs=[first_name, last_name]) assert len(service.rpcs) == 2 @@ -69,14 +43,7 @@ def test_single_with_options_to_schema(): location=location, name="Service", options=[OptionElement("foo", OptionElement.Kind.STRING, "bar")], - rpcs=[ - RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType" - ) - ] + rpcs=[RpcElement(location=location, name="Name", request_type="RequestType", response_type="ResponseType")] ) expected = """ |service Service { @@ -96,14 +63,7 @@ def test_add_multiple_options(): location=location, name="Service", options=[kit_kat, foo_bar], - rpcs=[ - RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType" - ) - ] + rpcs=[RpcElement(location=location, name="Name", request_type="RequestType", response_type="ResponseType")] ) assert len(service.options) == 2 @@ -113,14 +73,7 @@ def test_single_with_documentation_to_schema(): location=location, name="Service", documentation="Hello", - rpcs=[ - RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType" - ) - ] + rpcs=[RpcElement(location=location, name="Name", request_type="RequestType", response_type="ResponseType")] ) expected = """ |// Hello @@ -133,17 +86,8 @@ def test_single_with_documentation_to_schema(): def test_multiple_to_schema(): - rpc = RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType" - ) - service = ServiceElement( - location=location, - name="Service", - rpcs=[rpc, rpc] - ) + rpc = RpcElement(location=location, name="Name", request_type="RequestType", response_type="ResponseType") + service = ServiceElement(location=location, name="Service", rpcs=[rpc, rpc]) expected = """ |service Service { | rpc Name (RequestType) returns (ResponseType); @@ -156,23 +100,14 @@ def test_multiple_to_schema(): def test_rpc_to_schema(): - rpc = RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType" - ) + rpc = RpcElement(location=location, name="Name", request_type="RequestType", response_type="ResponseType") expected = "rpc Name (RequestType) returns (ResponseType);\n" assert rpc.to_schema() == expected def test_rpc_with_documentation_to_schema(): rpc = RpcElement( - location=location, - name="Name", - documentation="Hello", - request_type="RequestType", - response_type="ResponseType" + location=location, name="Name", documentation="Hello", request_type="RequestType", response_type="ResponseType" ) expected = """ |// Hello @@ -202,11 +137,7 @@ def test_rpc_with_options_to_schema(): def test_rpc_with_request_streaming_to_schema(): rpc = RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType", - request_streaming=True + location=location, name="Name", request_type="RequestType", response_type="ResponseType", request_streaming=True ) expected = "rpc Name (stream RequestType) returns (ResponseType);\n" assert rpc.to_schema() == expected @@ -214,11 +145,7 @@ def test_rpc_with_request_streaming_to_schema(): def test_rpc_with_response_streaming_to_schema(): rpc = RpcElement( - location=location, - name="Name", - request_type="RequestType", - response_type="ResponseType", - response_streaming=True + location=location, name="Name", request_type="RequestType", response_type="ResponseType", response_streaming=True ) expected = "rpc Name (RequestType) returns (stream ResponseType);\n" assert rpc.to_schema() == expected From 1271cf32d99bb63fff2bdda22f5f1ec3c168d828 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 10 Jun 2021 14:38:13 +0300 Subject: [PATCH 023/168] fixup lint changing trailing whitespace strings issue --- tests/unit/test_message_element.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_message_element.py b/tests/unit/test_message_element.py index ab67a58cd..2c3af3d11 100644 --- a/tests/unit/test_message_element.py +++ b/tests/unit/test_message_element.py @@ -207,11 +207,13 @@ def test_one_of_with_group_to_schema(): ) ] ) + expected = """ |message Message { | oneof hi { | string name = 1; - | + | """ + \ + """ | group Stuff = 3 { | optional int32 result_per_page = 4; | optional int32 page_count = 5; From 35834a8a73d75dc1f644f6c277079d85c918ec0b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 11 Jun 2021 15:35:31 +0300 Subject: [PATCH 024/168] fixup set of PR issues --- karapace/protobuf/extend_element.py | 2 +- karapace/protobuf/message_element.py | 3 +- karapace/protobuf/option_element.py | 29 ++--------- karapace/protobuf/utils.py | 13 ++++- karapace/schema_reader.py | 2 + tests/unit/test_parsing_tester.py | 2 +- tests/unit/test_proto_file_element.py | 23 ++------- tests/unit/test_proto_parser.py | 73 +++++++-------------------- 8 files changed, 42 insertions(+), 105 deletions(-) diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index d6164ad47..9ceba940e 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -1,5 +1,5 @@ # Ported from square/wire: -# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtendedElement.kt +# wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtendElement.kt from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index d188b007c..6603559f9 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -14,12 +14,13 @@ class MessageElement(TypeElement): groups: list = [] options: list = [] nested_types: list = [] + documentation = "" def __init__( self, location: Location, name: str, - documentation: str = None, + documentation: str = "", nested_types: list = None, options: list = None, reserveds: list = None, diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 5be5bacd2..2e157c69d 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -4,16 +4,7 @@ from enum import Enum # from karapace.protobuf.kotlin_wrapper import * # from karapace.protobuf.kotlin_wrapper import * -from karapace.protobuf.utils import append_indented - - -def try_to_schema(obj: object) -> str: - try: - return obj.to_schema() - except AttributeError: - if isinstance(obj, str): - return obj - raise AttributeError +from karapace.protobuf.utils import append_indented, append_options, try_to_schema class ListOptionElement(list): @@ -65,22 +56,8 @@ def to_schema_declaration(self): @staticmethod def append_options(options: list): - data: list = list() - count = len(options) - if count == 1: - data.append('[') - data.append(try_to_schema(options[0])) - data.append(']') - return "".join(data) - - data.append("[\n") - for i in range(0, count): - if i < count - 1: - endl = "," - else: - endl = "" - append_indented(data, try_to_schema(options[i]) + endl) - data.append(']') + data: list = [] + append_options(data, options) return "".join(data) def format_option_map(self, value: dict) -> str: diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 96adc0928..0c49c95bf 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -26,7 +26,7 @@ def append_options(data: list, options: list): count = len(options) if count == 1: data.append('[') - data.append(options[0].to_schema()) + data.append(try_to_schema(options[0])) data.append(']') return @@ -36,10 +36,19 @@ def append_options(data: list, options: list): endl = "," else: endl = "" - append_indented(data, options[i].to_schema() + endl) + append_indented(data, try_to_schema(options[i]) + endl) data.append(']') +def try_to_schema(obj: object) -> str: + try: + return obj.to_schema() + except AttributeError: + if isinstance(obj, str): + return obj + raise AttributeError + + def append_indented(data: list, value: str): lines = value.split("\n") if len(lines) > 1 and not lines[-1]: diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index c936bcf01..c7645ea31 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -107,6 +107,8 @@ def to_json(self): return self.schema.schema if isinstance(self.schema, AvroSchema): return self.schema.to_json(names=None) + if isinstance(self.schema, ProtobufSchema): + return self.schema.to_json() return self.schema def __str__(self) -> str: diff --git a/tests/unit/test_parsing_tester.py b/tests/unit/test_parsing_tester.py index 42f8aaf28..cedd494c5 100644 --- a/tests/unit/test_parsing_tester.py +++ b/tests/unit/test_parsing_tester.py @@ -1,5 +1,5 @@ # Ported from square/wire: -# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/parsing_tester.kt +# wire-library/wire-schema/src/jvmTest/kotlin/com/squareup/wire/schema/internal/parser/ParsingTester.kt from karapace.protobuf.location import Location from karapace.protobuf.proto_parser import ProtoParser diff --git a/tests/unit/test_proto_file_element.py b/tests/unit/test_proto_file_element.py index d50b56750..cdb4ee5c9 100644 --- a/tests/unit/test_proto_file_element.py +++ b/tests/unit/test_proto_file_element.py @@ -331,8 +331,6 @@ def test_convert_packed_option_from_wire_schema_in_proto2(): tag=5, options=[PACKED_OPTION_ELEMENT] ) - el = copy.copy(PACKED_OPTION_ELEMENT) - el.value = "false" field_string_packed_false = FieldElement( location=location.at(19, 3), label=Field.Label.REPEATED, @@ -350,14 +348,7 @@ def test_convert_packed_option_from_wire_schema_in_proto2(): field_string_packed_false ] ) - file = ProtoFileElement( - syntax=Syntax.PROTO_2, - location=location, - package_name="example.simple", - imports=[], - public_imports=[], - types=[message] - ) + file = ProtoFileElement(syntax=Syntax.PROTO_2, location=location, package_name="example.simple", types=[message]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto @@ -429,8 +420,7 @@ def test_convert_packed_option_from_wire_schema_in_proto3(): tag=5, options=[PACKED_OPTION_ELEMENT] ) - el = copy.copy(PACKED_OPTION_ELEMENT) - el.value = "false" + field_string_packed_false = FieldElement( location=location.at(19, 3), label=Field.Label.REPEATED, @@ -448,14 +438,7 @@ def test_convert_packed_option_from_wire_schema_in_proto3(): field_string_packed_false ] ) - file = ProtoFileElement( - syntax=Syntax.PROTO_3, - location=location, - package_name="example.simple", - imports=[], - public_imports=[], - types=[message] - ) + file = ProtoFileElement(syntax=Syntax.PROTO_3, location=location, package_name="example.simple", types=[message]) expected = """ |// Proto schema formatted by Wire, do not edit. |// Source: file.proto diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py index 2dbbbdbbd..69ca5a836 100644 --- a/tests/unit/test_proto_parser.py +++ b/tests/unit/test_proto_parser.py @@ -596,7 +596,6 @@ def test_proto3_extension_fields_do_not_require_labels(): ExtendElement( location=location.at(4, 1), name="Message", - documentation="", fields=[ FieldElement(location=location.at(5, 3), element_type="string", name="a", tag=1), FieldElement(location=location.at(6, 3), element_type="int32", name="b", tag=2) @@ -669,7 +668,6 @@ def test_proto3_extension_fields_allow_optional(): ExtendElement( location=location.at(4, 1), name="Message", - documentation="", fields=[ FieldElement( location=location.at(5, 3), element_type="string", name="a", tag=1, label=Field.Label.OPTIONAL @@ -745,7 +743,6 @@ def test_proto3_extension_fields_permit_repeated(): ExtendElement( location=location.at(4, 1), name="Message", - documentation="", fields=[ FieldElement( location=location.at(5, 3), label=Field.Label.REPEATED, element_type="string", name="a", tag=1 @@ -817,7 +814,6 @@ def test_group(): label=Field.Label.REPEATED, name="Result", tag=1, - documentation="", fields=[ FieldElement( location=location.at(3, 5), label=Field.Label.REQUIRED, element_type="string", name="url", tag=2 @@ -865,13 +861,10 @@ def test_parse_message_and_one_of(): one_ofs=[ OneOfElement( name="page_info", - documentation="", fields=[ FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2), FieldElement(location=location.at(5, 5), element_type="int32", name="result_per_page", tag=3) ], - groups=[], - options=[] ) ] ) @@ -908,7 +901,6 @@ def test_parse_message_and_one_of_with_group(): one_ofs=[ OneOfElement( name="page_info", - documentation="", fields=[FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2)], groups=[ GroupElement( @@ -916,7 +908,6 @@ def test_parse_message_and_one_of_with_group(): location=location.at(5, 5), name="Stuff", tag=3, - documentation="", fields=[ FieldElement( location=location.at(6, 7), @@ -935,7 +926,6 @@ def test_parse_message_and_one_of_with_group(): ] ) ], - options=[] ) ] ) @@ -968,15 +958,20 @@ def test_parse_enum(): name="Topping", documentation="What's on my waffles.\nAlso works on pancakes.", constants=[ - EnumConstantElement(location=location.at(6, 3), name="FRUIT", tag=1, documentation="", options=[]), + EnumConstantElement(location=location.at(6, 3), name="FRUIT", tag=1), EnumConstantElement( - location=location.at(8, 3), name="CREAM", tag=2, documentation="Yummy, yummy cream.", options=[] + location=location.at(8, 3), + name="CREAM", + tag=2, + documentation="Yummy, yummy cream.", ), EnumConstantElement( - location=location.at(11, 3), name="SYRUP", tag=3, documentation="Quebec Maple syrup", options=[] + location=location.at(11, 3), + name="SYRUP", + tag=3, + documentation="Quebec Maple syrup", ) ], - options=[] ) ] ) @@ -1014,14 +1009,19 @@ def test_parse_enum_with_options(): location=location.at(8, 3), name="FRUIT", tag=1, - documentation="", options=[OptionElement("healthy", OptionElement.Kind.BOOLEAN, "true", True)] ), EnumConstantElement( - location=location.at(10, 3), name="CREAM", tag=2, documentation="Yummy, yummy cream.", options=[] + location=location.at(10, 3), + name="CREAM", + tag=2, + documentation="Yummy, yummy cream.", ), EnumConstantElement( - location=location.at(13, 3), name="SYRUP", tag=3, documentation="Quebec Maple syrup", options=[] + location=location.at(13, 3), + name="SYRUP", + tag=3, + documentation="Quebec Maple syrup", ) ] ) @@ -1073,20 +1073,17 @@ def test_nesting_in_message(): enum_element = EnumElement( location=location.at(3, 3), name="CType", - documentation="", constants=[ EnumConstantElement( location=location.at(4, 5), name="STRING", tag=0, - documentation="", options=[ OptionElement("opt_a", OptionElement.Kind.NUMBER, "1", True), OptionElement("opt_b", OptionElement.Kind.NUMBER, "2", True) ] ) ], - options=[] ) field = FieldElement( location=location.at(2, 3), @@ -1133,14 +1130,9 @@ def test_multi_ranges_extensions(): message_element = MessageElement( location=location.at(1, 1), name="MeGustaExtensions", - documentation="", - fields=[], - nested_types=[], extensions=[ ExtensionsElement( - location=location.at(2, 3), - documentation="", - values=[1] + [KotlinRange(5, 200)] + [500] + [KotlinRange(1000, MAX_TAG_VALUE)] + location=location.at(2, 3), values=[1] + [KotlinRange(5, 200)] + [500] + [KotlinRange(1000, MAX_TAG_VALUE)] ) ] ) @@ -1265,7 +1257,6 @@ def test_extend_in_message(): ExtendElement( location=location.at(2, 3), name="Foo", - documentation="", fields=[ FieldElement( location=location.at(3, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 @@ -1296,7 +1287,6 @@ def test_extend_in_message_with_package(): ExtendElement( location=location.at(4, 3), name="Foo", - documentation="", fields=[ FieldElement( location=location.at(5, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 @@ -1324,7 +1314,6 @@ def test_fqcn_extend_in_message(): ExtendElement( location=location.at(2, 3), name="example.Foo", - documentation="", fields=[ FieldElement( location=location.at(3, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 @@ -1355,7 +1344,6 @@ def test_fqcn_extend_in_message_with_package(): ExtendElement( location=location.at(4, 3), name="example.Foo", - documentation="", fields=[ FieldElement( location=location.at(5, 5), label=Field.Label.OPTIONAL, element_type="Bar", name="bar", tag=126 @@ -1507,23 +1495,19 @@ def test_service(): ServiceElement( location=location.at(1, 1), name="SearchService", - documentation="", options=[OptionElement("default_timeout", OptionElement.Kind.NUMBER, "30", True)], rpcs=[ RpcElement( location=location.at(4, 3), name="Search", - documentation="", request_type="SearchRequest", response_type="SearchResponse", - options=[], response_streaming=False, request_streaming=False ), RpcElement( location=location.at(5, 3), name="Purchase", - documentation="", request_type="PurchaseRequest", response_type="PurchaseResponse", options=[ @@ -1556,51 +1540,41 @@ def test_streaming_service(): ServiceElement( location=location.at(1, 1), name="RouteGuide", - documentation="", rpcs=[ RpcElement( location=location.at(2, 3), name="GetFeature", - documentation="", request_type="Point", response_type="Feature", - options=[], response_streaming=False, request_streaming=False ), RpcElement( location=location.at(3, 3), name="ListFeatures", - documentation="", request_type="Rectangle", response_type="Feature", response_streaming=True, # TODO: Report Square.Wire there was mistake True instead of False! request_streaming=False, - options=[] ), RpcElement( location=location.at(4, 3), name="RecordRoute", - documentation="", request_type="Point", response_type="RouteSummary", request_streaming=True, response_streaming=False, - options=[] ), RpcElement( location=location.at(5, 3), name="RouteChat", - documentation="", request_type="RouteNote", response_type="RouteNote", request_streaming=True, response_streaming=True, - options=[] ) ], - options=[] ) ] ) @@ -1959,7 +1933,7 @@ def test_reserved(): message = MessageElement( location=location.at(1, 1), name="Foo", - reserveds=[ReservedElement(location=location.at(2, 3), values=[10, KotlinRange(12, 14), "foo"], documentation="")] + reserveds=[ReservedElement(location=location.at(2, 3), values=[10, KotlinRange(12, 14), "foo"])] ) expected = ProtoFileElement(location=location, types=[message]) assert ProtoParser.parse(location, proto) == expected @@ -2422,7 +2396,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="import", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2435,7 +2408,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="package", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2448,7 +2420,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="option", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2461,7 +2432,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="reserved", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2474,7 +2444,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="message", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2487,7 +2456,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="enum", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2500,7 +2468,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="service", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2513,7 +2480,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="extend", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), @@ -2526,7 +2492,6 @@ def test_proto_keyword_as_service_name_and_rpc(): name="rpc", request_type="google.protobuf.StringValue", response_type="google.protobuf.StringValue", - documentation="" ) ] ), From 67224a795cee7e1816edb4ddb28b27a7cc66042d Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 17 Jun 2021 01:10:23 +0300 Subject: [PATCH 025/168] fixup style issues (mypy and other) --- karapace/protobuf/enum_constant_element.py | 13 +---- karapace/protobuf/enum_element.py | 10 +--- karapace/protobuf/extend_element.py | 10 +--- karapace/protobuf/extensions_element.py | 14 ++---- karapace/protobuf/field_element.py | 19 ++------ karapace/protobuf/kotlin_wrapper.py | 20 ++------ karapace/protobuf/location.py | 5 -- karapace/protobuf/message_element.py | 32 +++--------- karapace/protobuf/one_of_element.py | 16 ++---- karapace/protobuf/option_element.py | 15 ++---- karapace/protobuf/option_reader.py | 5 +- karapace/protobuf/proto_file_element.py | 57 ++++++---------------- karapace/protobuf/proto_parser.py | 43 ++++++---------- karapace/protobuf/proto_type.py | 14 ++---- karapace/protobuf/reserved_document.py | 9 +--- karapace/protobuf/rpc_element.py | 12 +---- karapace/protobuf/schema.py | 4 +- karapace/protobuf/service_element.py | 14 ++---- karapace/protobuf/syntax_reader.py | 17 +++---- karapace/protobuf/type_element.py | 16 +++--- karapace/protobuf/utils.py | 6 +-- 21 files changed, 92 insertions(+), 259 deletions(-) diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 116bd42c1..24755da31 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -5,12 +5,6 @@ class EnumConstantElement: - location: Location - name: str - tag: int - documentation: str = "" - options: list = [] - def __init__( self, location: Location, @@ -23,11 +17,8 @@ def __init__( self.name = name self.tag = tag - if options: - self.options = options - - if documentation: - self.documentation = documentation + self.options = options or [] + self.documentation = documentation or "" def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 41d7ca28e..52f66049f 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -7,16 +7,10 @@ class EnumElement(TypeElement): - constants: list = [] - def __init__(self, location: Location, name: str, documentation: str = "", options: list = None, constants: list = None): - self.location = location - self.name = name - self.documentation = documentation - self.options = options - self.constants = constants # Enums do not allow nested type declarations. - self.nested_types = [] + super().__init__(location, name, documentation, options or [], []) + self.constants = constants or [] def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index 9ceba940e..947456aaf 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -6,19 +6,13 @@ class ExtendElement: - location: Location - name: str - documentation: str = "" - fields: list = [] - def __init__(self, location: Location, name: str, documentation: str = "", fields: list = None): self.location = location self.name = name self.documentation = documentation - if fields: - self.fields = fields + self.fields = fields or [] - def to_schema(self): + def to_schema(self) -> str: result: list = list() append_documentation(result, self.documentation) result.append(f"extend {self.name} {{") diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 56b4e2e5e..5349a9e86 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -7,17 +7,11 @@ class ExtensionsElement: - location: Location - documentation: str = "" - """ An [Int] or [IntRange] tag. """ - values: list = [] - - def __init__(self, location: Location, documentation: str = None, values: list = None): + def __init__(self, location: Location, documentation: str = "", values: list = None): self.location = location - if documentation: - self.documentation = documentation - if values: - self.values = values + self.documentation = documentation + """ An [Int] or [IntRange] tag. """ + self.values = values or [] def to_schema(self) -> str: result: list = [] diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 118550484..7eeffa214 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -9,26 +9,16 @@ class FieldElement: - location: Location - label: Field.Label - element_type: str - name: str - default_value: str = None - json_name: str = None - tag: int = 0 - documentation: str = "" - options: list = [] - def __init__( self, location: Location, label: Field.Label = None, - element_type: str = None, + element_type: str = "", name: str = None, default_value: str = None, json_name: str = None, tag: int = None, - documentation: str = None, + documentation: str = "", options: list = None ): self.location = location @@ -39,10 +29,9 @@ def __init__( self.json_name = json_name self.tag = tag self.documentation = documentation - if options: - self.options = options + self.options = options or [] - def to_schema(self): + def to_schema(self) -> str: result: list = list() append_documentation(result, self.documentation) diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index ef1c4792a..d04f873a2 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -1,5 +1,7 @@ from karapace.protobuf.exception import IllegalArgumentException, IllegalStateException +import textwrap + def check(q: bool, message: str): if not q: @@ -10,10 +12,10 @@ def trim_margin(s: str) -> str: lines = s.split("\n") new_lines = list() - if not lines[0].strip(): + if not textwrap.dedent(lines[0]): del lines[0] - if not lines[-1].strip(): + if not textwrap.dedent(lines[-1]): del lines[-1] for line in lines: @@ -26,7 +28,7 @@ def trim_margin(s: str) -> str: return "\n".join(new_lines) -def require(q: bool, message: str): +def require(q: bool, message: str) -> None: if not q: raise IllegalArgumentException(message) @@ -44,18 +46,6 @@ class Any: pass -class StringBuilder(list): - def append_indented(self: list, value: str): - lines = value.split("\n") - if len(lines) > 1 and not lines[-1]: - lines = lines.pop() - - for line in lines: - self.append(" ") - self.append(line) - self.append("\n") - - class OptionsList(list): pass diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index b0e5f26d4..cb3e206db 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -5,11 +5,6 @@ class Location: """ Locates a .proto file, or a self.position within a .proto file, on the file system """ - base: str - path: str - line: int - column: int - def __init__(self, base: str, path: str, line: int = -1, column: int = -1): """ str - The base directory of this location; path - The path to this location relative to [base] diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 6603559f9..508d15830 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -7,15 +7,6 @@ class MessageElement(TypeElement): - reserveds: list = [] - fields: list = [] - one_ofs: list = [] - extensions: list = [] - groups: list = [] - options: list = [] - nested_types: list = [] - documentation = "" - def __init__( self, location: Location, @@ -29,23 +20,12 @@ def __init__( extensions: list = None, groups: list = None, ): - self.location = location - self.name = name - self.documentation = documentation - if nested_types: - self.nested_types = nested_types - if options: - self.options = options - if reserveds: - self.reserveds = reserveds - if fields: - self.fields = fields - if one_ofs: - self.one_ofs = one_ofs - if extensions: - self.extensions = extensions - if groups: - self.groups = groups + super().__init__(location, name, documentation, options or [], nested_types or []) + self.reserveds = reserveds or [] + self.fields = fields or [] + self.one_ofs = one_ofs or [] + self.extensions = extensions or [] + self.groups = groups or [] def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index 74f51ddfd..d57f90d59 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -5,22 +5,12 @@ class OneOfElement: - name: str - documentation: str = "" - fields: list = [] - groups: list = [] - options: list = [] - def __init__(self, name: str, documentation: str = "", fields=None, groups=None, options=None): self.name = name self.documentation = documentation - - if fields: - self.fields = fields - if options: - self.options = options - if groups: - self.groups = groups + self.fields = fields or [] + self.options = options or [] + self.groups = groups or [] def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 2e157c69d..54c5f3e02 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -21,17 +21,12 @@ class Kind(Enum): LIST = 6 OPTION = 7 - name: str - kind: Kind - value = None - """ If true, this [OptionElement] is a custom option. """ - is_parenthesized: bool - def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool = None): self.name = name self.kind = kind self.value = value - self.is_parenthesized = is_parenthesized + """ If true, this [OptionElement] is a custom option. """ + self.is_parenthesized = is_parenthesized or False self.formattedName = f"({self.name})" if is_parenthesized else self.name def to_schema(self) -> str: @@ -51,11 +46,11 @@ def to_schema(self) -> str: return "".join(aline) return aline - def to_schema_declaration(self): + def to_schema_declaration(self) -> str: return f"option {self.to_schema()};\n" @staticmethod - def append_options(options: list): + def append_options(options: list) -> str: data: list = [] append_options(data, options) return "".join(data) @@ -93,7 +88,7 @@ def format_list_map_value(self, value) -> str: append_indented(result, f"{self.format_option_map_value(elm)}{endl}") return "".join(result) - def __repr__(self): + def __repr__(self) -> str: return self.to_schema() def __eq__(self, other): diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index 8fb348f25..c502bc8f3 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -7,9 +7,6 @@ class KindAndValue: - kind: OptionElement.Kind - value: object - def __init__(self, kind: OptionElement.Kind, value: object): self.kind = kind self.value = value @@ -130,7 +127,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) self.reader.peek_char(';') @staticmethod - def add_to_list(_list: list, value: Union[list, str]): + def add_to_list(_list: list, value: Union[list, str]) -> None: """ Adds an object or objects to a List. """ if isinstance(value, list): for v in list(value): diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index cb6cd2d79..c44d54a77 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -6,43 +6,29 @@ class ProtoFileElement: - location: Location - package_name: str - syntax: Syntax - imports: list = [] - public_imports: list = [] - types: list = [] - services: list = [] - extend_declarations: list = [] - options: list = [] - def __init__( self, location: Location, package_name: str = None, syntax: Syntax = None, - imports=None, - public_imports=None, + imports: list = None, + public_imports: list = None, types=None, - services=None, - extend_declarations=None, - options=None + services: list = None, + extend_declarations: list = None, + options: list = None ): + if types is None: + types = [] self.location = location self.package_name = package_name self.syntax = syntax - if options: - self.options = options - if extend_declarations: - self.extend_declarations = extend_declarations - if services: - self.services = services - if types: - self.types = types - if public_imports: - self.public_imports = public_imports - if imports: - self.imports = imports + self.options = options or [] + self.extend_declarations = extend_declarations or [] + self.services = services or [] + self.types = types or [] + self.public_imports = public_imports or [] + self.imports = imports or [] def to_schema(self): strings: list = [ @@ -95,26 +81,11 @@ def empty(path): return ProtoFileElement(Location.get(path)) # TODO: there maybe be faster comparison workaround - def __eq__(self, other: 'ProtoFileElement'): + def __eq__(self, other: 'ProtoFileElement'): # type: ignore a = self.to_schema() b = other.to_schema() - # sys.stderr.write("\n\nTESTA=[") - # sys.stderr.write(a) - # sys.stderr.write("]\n\nTESTB=[") - # sys.stderr.write(b) - # sys.stderr.write("]\n\n") return a == b def __repr__(self): return self.to_schema() - - # return str(self.location) == str(other.location) and \ - # self.package_name == other.package_name and \ - # str(self.syntax) == str(other.syntax) and \ - # str(self.imports) == str(other.imports) and \ - # str(self.public_imports) == str(self.public_imports) and \ - # str(self.types) == str(self.types) and \ - # str(self.services) == str(self.services) and \ - # str(self.extend_declarations) == str(self.extend_declarations) and \ - # str(self.options) == str(self.options) diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 8dcca1551..42be44a38 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -68,33 +68,19 @@ def permits_extend(self) -> bool: class ProtoParser: - location: Location - reader: SyntaxReader - public_imports: list = [] - imports: list = [] - nested_types: list = [] - services: list = [] - extends_list: list = [] - options: list = [] - declaration_count: int = 0 - syntax: Syntax = None - package_name: str = None - prefix: str = "" - data: str - def __init__(self, location: Location, data: str): self.location = location - self.imports = [] - self.nested_types = [] - self.services = [] - self.extends_list = [] - self.options = [] + self.imports: list = [] + self.nested_types: list = [] + self.services: list = [] + self.extends_list: list = [] + self.options: list = [] self.declaration_count = 0 - self.syntax = None - self.package_name = None + self.syntax: Union[Syntax, None] = None + self.package_name: Union[str, None] = None self.prefix = "" self.data = data - self.public_imports = [] + self.public_imports: list = [] self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: @@ -140,7 +126,8 @@ def read_declaration(self, documentation: str, context: Context): # TODO(benoit) Let's better parse the proto keywords. We are pretty weak when field/constants # are named after any of the label we check here. - result = None + result: Union[None, OptionElement, ReservedElement, RpcElement, MessageElement, EnumElement, EnumConstantElement, + ServiceElement, ExtendElement, ExtensionsElement, OneOfElement, GroupElement, FieldElement] = None # pylint no-else-return if label == "package" and context.permits_package(): self.package_name = self.reader.read_name() @@ -305,7 +292,6 @@ def read_enum_element(self, location: Location, documentation: str) -> EnumEleme name = self.reader.read_name() constants: list = list() options: list = list() - declared = None self.reader.require("{") while True: value_documentation = self.reader.read_documentation() @@ -322,7 +308,7 @@ def read_enum_element(self, location: Location, documentation: str) -> EnumEleme pass return EnumElement(location, name, documentation, options, constants) - def read_field(self, documentation: str, location: Location, word: str): + def read_field(self, documentation: str, location: Location, word: str) -> Union[GroupElement, FieldElement]: label: Union[None, Field.Label] atype: str if word == "required": @@ -333,6 +319,7 @@ def read_field(self, documentation: str, location: Location, word: str): atype = self.reader.read_data_type() elif word == "optional": label = Field.Label.OPTIONAL + atype = self.reader.read_data_type() elif word == "repeated": @@ -381,16 +368,16 @@ def read_field_with_label( options_to_list(options), ) - def strip_default(self, options: list) -> str: + def strip_default(self, options: list) -> Union[str, None]: """ Defaults aren't options. """ return self.strip_value("default", options) - def strip_json_name(self, options: list) -> str: + def strip_json_name(self, options: list) -> Union[None, str]: """ `json_name` isn't an option. """ return self.strip_value("json_name", options) @staticmethod - def strip_value(name: str, options: list) -> str: + def strip_value(name: str, options: list) -> Union[None, str]: """ This finds an option named [name], removes, and returns it. Returns None if no [name] option is present. """ diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index de0f0c643..7f3bf4df4 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -17,14 +17,6 @@ def static_init(cls): @static_init class ProtoType: - is_scalar: bool - string: str - is_map: bool - """ The type of the map's keys. Only present when [is_map] is True. """ - key_type: object # ProtoType - """ The type of the map's values. Only present when [is_map] is True. """ - value_type: object # ProtoType - @property def simple_name(self) -> str: dot = self.string.rfind(".") @@ -86,7 +78,9 @@ def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None) self.is_scalar = is_scalar self.string = string self.is_map = False + """ The type of the map's keys. Only present when [is_map] is True. """ self.key_type = None + """ The type of the map's values. Only present when [is_map] is True. """ self.value_type = None else: if key_type.is_scalar() and key_type != self.BYTES and key_type != self.DOUBLE and key_type != self.FLOAT: @@ -158,7 +152,7 @@ def get(enclosing_type_or_package: str, type_name: str) -> object: if enclosing_type_or_package else ProtoType.get2(type_name) @staticmethod - def get2(name: str): + def get2(name: str) -> 'ProtoType': scalar = ProtoType.SCALAR_TYPES[name] if scalar: return scalar @@ -172,5 +166,5 @@ def get2(name: str): return ProtoType(False, name) @staticmethod - def get3(key_type: object, value_type: object, name: str): + def get3(key_type: object, value_type: object, name: str) -> object: return ProtoType(False, name, key_type, value_type) diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 11f186a91..2a9d27185 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -7,16 +7,11 @@ class ReservedElement: - location: Location - documentation: str - """ A [String] name or [Int] or [IntRange] tag. """ - values: list = [] - def __init__(self, location: Location, documentation: str = "", values: list = None): self.location = location self.documentation = documentation - if values: - self.values = values + """ A [String] name or [Int] or [IntRange] tag. """ + self.values = values or [] def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index d1a7c7bfe..33f2a60ec 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -6,15 +6,6 @@ class RpcElement: - location: Location - name: str - documentation: str - request_type: str - response_type: str - request_streaming: bool - response_streaming: bool - options: list = [] - def __init__( self, location: Location, @@ -33,8 +24,7 @@ def __init__( self.response_type = response_type self.request_streaming = request_streaming self.response_streaming = response_streaming - if options: - self.options = options + self.options = options or [] def to_schema(self) -> str: result: list = list() diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 7d31c1580..43dfce911 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -4,13 +4,11 @@ class ProtobufSchema: - schema: str - def __init__(self, schema: str): self.schema = schema def __str__(self) -> str: return self.schema - def to_json(self): + def to_json(self) -> str: return self.schema diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index af19dce19..42c41bbe0 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -6,22 +6,14 @@ class ServiceElement: - location: Location - name: str - documentation: str - rpcs: list = [] - options: list = [] - def __init__(self, location: Location, name: str, documentation: str = "", rpcs: list = None, options: list = None): self.location = location self.name = name self.documentation = documentation - if rpcs: - self.rpcs = rpcs - if options: - self.options = options + self.rpcs = rpcs or [] + self.options = options or [] - def to_schema(self): + def to_schema(self) -> str: result: list = list() append_documentation(result, self.documentation) result.append(f"service {self.name} {{") diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 1b6e5ced3..36952d233 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -1,8 +1,8 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/SyntaxReader.kt - from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.location import Location +from typing import Union def hex_digit(c: str) -> int: @@ -20,18 +20,12 @@ def min_of(a: int, b: int) -> int: class SyntaxReader: - data: str - _location: Location - """ Next character to be read """ - pos: int = 0 - """ The number of newline characters """ - line: int = 0 - """ The index of the most recent newline character. """ - line_start: int = 0 - def __init__(self, data: str, location: Location): + """ Next character to be read """ self.pos = 0 + """ The number of newline characters """ self.line = 0 + """ The index of the most recent newline character. """ self.line_start = 0 self.data = data self._location = location @@ -96,7 +90,7 @@ def read_quoted_string(self) -> str: self.expect(self.pos < len(self.data), "unexpected end of file") c = self.data[self.pos] self.pos += 1 - d: str = { + d: Union[str, None] = { 'a': "\u0007", # Alert. 'b': "\b", # Backspace. 'f': "\u000c", # Form feed. @@ -200,6 +194,7 @@ def read_int(self) -> int: print("OS error: {0}".format(err)) except ValueError: self.unexpected(f"expected an integer but was {tag}") + return -22 # this return never be called but mypy think we need it def read_documentation(self) -> str: """ Like skip_whitespace(), but this returns a string containing all comment text. By convention, diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 3fbd80869..865575d14 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -5,19 +5,21 @@ class TypeElement: - location: Location - name: str - documentation: str - options: list - nested_types: list + def __init__(self, location: Location, name: str, documentation: str, options: list, nested_types: list): + + self.location: Location = location + self.name: str = name + self.documentation: str = documentation + self.options: list = options + self.nested_types: list = nested_types def to_schema(self) -> str: pass - def __repr__(self): + def __repr__(self) -> str: mytype = type(self) return f"{mytype}({self.to_schema()})" - def __str__(self): + def __str__(self) -> str: mytype = type(self) return f"{mytype}({self.to_schema()})" diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 0c49c95bf..e70af8d03 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -7,7 +7,7 @@ def protobuf_encode(a: str) -> str: return a -def append_documentation(data: list, documentation: str): +def append_documentation(data: list, documentation: str) -> None: if not documentation: return @@ -22,7 +22,7 @@ def append_documentation(data: list, documentation: str): data.append("\n") -def append_options(data: list, options: list): +def append_options(data: list, options: list) -> None: count = len(options) if count == 1: data.append('[') @@ -49,7 +49,7 @@ def try_to_schema(obj: object) -> str: raise AttributeError -def append_indented(data: list, value: str): +def append_indented(data: list, value: str) -> None: lines = value.split("\n") if len(lines) > 1 and not lines[-1]: del lines[-1] From 20f884a90248c8bb0e3d03beb7e6edb755425e75 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 21 Jun 2021 08:53:27 +0300 Subject: [PATCH 026/168] fixup class variable issues --- karapace/protobuf/group_element.py | 10 ++-------- karapace/protobuf/kotlin_wrapper.py | 2 -- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index 762d2ec23..daa881f02 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -7,12 +7,6 @@ class GroupElement: - label: Field.Label - location: Location - name: str - tag: int - documentation: str = "" - fields: list = [] def __init__( self, @@ -27,8 +21,8 @@ def __init__( self.location = location self.name = name self.tag = tag - if fields: - self.fields = fields + + self.fields = fields or [] self.documentation = documentation def to_schema(self) -> str: diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index d04f873a2..815a688d2 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -51,8 +51,6 @@ class OptionsList(list): class KotlinRange: - minimum: int - maximum: int def __init__(self, minimum, maximum): self.minimum = minimum From 1ac76f1573761d481b18f72b56422731d02e321f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 21 Jun 2021 09:04:03 +0300 Subject: [PATCH 027/168] lint issues --- karapace/protobuf/group_element.py | 1 - karapace/protobuf/kotlin_wrapper.py | 1 - 2 files changed, 2 deletions(-) diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index daa881f02..45a004659 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -7,7 +7,6 @@ class GroupElement: - def __init__( self, label: Union[None, Field.Label], diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index 815a688d2..7d38d7e09 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -51,7 +51,6 @@ class OptionsList(list): class KotlinRange: - def __init__(self, minimum, maximum): self.minimum = minimum self.maximum = maximum From 4ea2d8a4735dfb9469de2596497804d11b676ad1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 17 Aug 2021 19:23:43 +0300 Subject: [PATCH 028/168] square.wire.protobuf parser utilization (parse/generate protobuf schema similar to SR --- karapace/protobuf/schema.py | 180 +++++++++++++++++++++- karapace/schema_reader.py | 9 +- tests/integration/test_client_protobuf.py | 22 ++- tests/schemas/protobuf.py | 48 +++++- tests/unit/test_protobuf_schema.py | 24 +++ 5 files changed, 274 insertions(+), 9 deletions(-) create mode 100644 tests/unit/test_protobuf_schema.py diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 43dfce911..eee32a2c8 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -1,14 +1,188 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Schema.kt # Ported partially for required functionality. +from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.proto_parser import ProtoParser +from karapace.protobuf.utils import append_documentation, append_indented +import logging +log = logging.getLogger(__name__) + + +def static_init(cls): + if getattr(cls, "static_init", None): + cls.static_init() + return cls + + +def add_slashes(text: str): + escape_dict = { + r'\a': '\\a', + r'\b': '\\b', + r'\c': '\\c', + r'\f': '\\f', + r'\n': '\\n', + r'\r': '\\r', + r'\t': '\\t', + r'\v': '\\v', + '\'': "\\'", + '\"': '\\\"', + '\\': '\\\\' + } + result: str = "" + for char in text: + try: + result += escape_dict[char] + except KeyError: + result += char + return result + + +def message_element_string(element: MessageElement) -> str: + result: list = list() + append_documentation(result, element.documentation) + result.append(f"message {element.name} {{") + if element.reserveds: + result.append("\n") + for reserved in element.reserveds: + append_indented(result, reserved.to_schema()) + + if element.options: + result.append("\n") + for option in element.options: + append_indented(result, option_element_string(option)) + + if element.fields: + result.append("\n") + for field in element.fields: + append_indented(result, field.to_schema()) + + if element.one_ofs: + result.append("\n") + for one_of in element.one_ofs: + append_indented(result, one_of.to_schema()) + + if element.groups: + result.append("\n") + for group in element.groups: + append_indented(result, group.to_schema()) + + if element.extensions: + result.append("\n") + for extension in element.extensions: + append_indented(result, extension.to_schema()) + + if element.nested_types: + result.append("\n") + for nested_type in element.nested_types: + if isinstance(nested_type, MessageElement): + append_indented(result, message_element_string(nested_type)) + + for nested_type in element.nested_types: + if isinstance(nested_type, EnumElement): + append_indented(result, enum_element_string(nested_type)) + + result.append("}\n") + return "".join(result) + + +def enum_element_string(element: EnumElement) -> str: + return element.to_schema() + + +def option_element_string(option: OptionElement): + result: str + if option.kind == OptionElement.Kind.STRING: + name: str + if option.is_parenthesized: + name = f"({option.name})" + else: + name = option.name + value = add_slashes(str(option.value)) + result = f"{name} = \"{value}\"" + else: + result = option.to_schema() + + return f"option {result};\n" + + +@static_init class ProtobufSchema: + @classmethod + def static_init(cls): + # if hasattr(cls, 'DEFAULT_LOCATION') and cls.DEFAULT_LOCATION: + # return + # log.warning("STATIC_INIT") + cls.DEFAULT_LOCATION = Location.get("") + def __init__(self, schema: str): - self.schema = schema + self.dirty = schema + self.cache_string = "" + self.schema = ProtoParser.parse(self.DEFAULT_LOCATION, schema) def __str__(self) -> str: - return self.schema + if not self.cache_string: + self.cache_string = self.to_schema() + log.warning("CACHE_STRING:%s", self.cache_string) + return self.cache_string + # def to_json(self) -> str: + # return self.schema.to_schema() def to_json(self) -> str: - return self.schema + return self.to_schema() + + def to_schema(self): + strings: list = [] + # [ + # "// Proto schema formatted by Wire, do not edit.\n", "// Source: ", + # str(self.location.with_path_only()), "\n" + # ] + + shm: ProtoFileElement = self.schema + if shm.syntax: + strings.append("syntax = \"") + strings.append(str(shm.syntax)) + strings.append("\";\n") + + if shm.package_name: + strings.append("package " + str(shm.package_name) + ";\n") + + if shm.imports or shm.public_imports: + strings.append("\n") + + for file in shm.imports: + strings.append("import \"" + str(file) + "\";\n") + + for file in shm.public_imports: + strings.append("import public \"" + str(file) + "\";\n") + + if shm.options: + strings.append("\n") + for option in shm.options: + # strings.append(str(option.to_schema_declaration())) + strings.append(option_element_string(option)) + + if shm.types: + strings.append("\n") + for type_element in shm.types: + if isinstance(type_element, MessageElement): + strings.append(message_element_string(type_element)) + for type_element in shm.types: + if isinstance(type_element, EnumElement): + strings.append(enum_element_string(type_element)) + + if shm.extend_declarations: + strings.append("\n") + for extend_declaration in shm.extend_declarations: + strings.append(str(extend_declaration.to_schema())) + + if shm.services: + strings.append("\n") + for service in shm.services: + strings.append(str(service.to_schema())) + return "".join(strings) diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index c7645ea31..e38305a24 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -86,7 +86,8 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen @staticmethod def parse_protobuf(schema_str: str): try: - return TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) + ts = TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) + return ts # TypeError - Raised when the user forgets to encode the schema as a string. except Exception as e: # FIXME: bare exception log.exception("Unexpected error:") @@ -108,8 +109,10 @@ def to_json(self): if isinstance(self.schema, AvroSchema): return self.schema.to_json(names=None) if isinstance(self.schema, ProtobufSchema): - return self.schema.to_json() - return self.schema + raise InvalidSchema("Protobuf do not support to_json serialization") + log.warning("STRANGE OBJECT: %s ", self.schema) + log.warning("STRANGE OBJECT2: %s ", self.schema.__dict__) + return self.schema.to_json() def __str__(self) -> str: if isinstance(self.schema, ProtobufSchema): diff --git a/tests/integration/test_client_protobuf.py b/tests/integration/test_client_protobuf.py index 862991389..2861b7255 100644 --- a/tests/integration/test_client_protobuf.py +++ b/tests/integration/test_client_protobuf.py @@ -1,8 +1,13 @@ +from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.schema_reader import SchemaType, TypedSchema from karapace.serialization import SchemaRegistryClient -from tests.schemas.protobuf import schema_protobuf_plain +from tests.schemas.protobuf import schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_plain from tests.utils import new_random_name +import logging + +log = logging.getLogger(__name__) + async def test_remote_client_protobuf(registry_async_client): schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) @@ -16,3 +21,18 @@ async def test_remote_client_protobuf(registry_async_client): stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_protobuf + + +async def test_remote_client_protobuf2(registry_async_client): + schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, trim_margin(schema_protobuf_order_before)) + schema_protobuf_after = TypedSchema.parse(SchemaType.PROTOBUF, trim_margin(schema_protobuf_order_after)) + reg_cli = SchemaRegistryClient() + reg_cli.client = registry_async_client + subject = new_random_name("subject") + sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) + assert sc_id >= 0 + stored_schema = await reg_cli.get_schema_for_id(sc_id) + assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" + stored_id, stored_schema = await reg_cli.get_latest_schema(subject) + assert stored_id == sc_id + assert stored_schema == schema_protobuf_after diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py index e8e1a0bb8..e52414cbf 100644 --- a/tests/schemas/protobuf.py +++ b/tests/schemas/protobuf.py @@ -1,5 +1,4 @@ -schema_protobuf_plain = """ -syntax = "proto3"; +schema_protobuf_plain = """syntax = "proto3"; package com.codingharbour.protobuf; option java_outer_classname = "SimpleMessageProtos"; @@ -9,3 +8,48 @@ string content2 = 3; } """ + +schema_protobuf_schema_registry1 = """ +|syntax = "proto3"; +|package com.codingharbour.protobuf; +| +|message SimpleMessage { +| string content = 1; +| string my_string = 2; +| int32 my_int = 3; +|} +| +""" + +schema_protobuf_order_before = """ +|syntax = "proto3"; +| +|option java_package = "com.codingharbour.protobuf"; +|option java_outer_classname = "TestEnumOrder"; +| +|enum Enum { +| HIGH = 0; +| MIDDLE = 1; +| LOW = 2; +|} +|message Message { +| int32 query = 1; +|} +""" + +schema_protobuf_order_after = """ +|syntax = "proto3"; +| +|option java_package = "com.codingharbour.protobuf"; +|option java_outer_classname = "TestEnumOrder"; +| +|message Message { +| int32 query = 1; +|} +|enum Enum { +| HIGH = 0; +| MIDDLE = 1; +| LOW = 2; +|} +| +""" diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py new file mode 100644 index 000000000..621af52d7 --- /dev/null +++ b/tests/unit/test_protobuf_schema.py @@ -0,0 +1,24 @@ +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.schema_reader import SchemaType, TypedSchema +from tests.schemas.protobuf import ( + schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_schema_registry1 +) + +location: Location = Location.get("file.proto") + + +def test_protobuf_schema_simple(): + proto = trim_margin(schema_protobuf_schema_registry1) + protobuf_schema = TypedSchema.parse(SchemaType.PROTOBUF, proto) + result = str(protobuf_schema) + + assert result == proto + + +def test_protobuf_schema_sort(): + proto = trim_margin(schema_protobuf_order_before) + protobuf_schema = TypedSchema.parse(SchemaType.PROTOBUF, proto) + result = str(protobuf_schema) + proto2 = trim_margin(schema_protobuf_order_after) + assert result == proto2 From f46539975c8150351f784e6fdb5ebe40add84c14 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 8 Sep 2021 16:20:43 +0300 Subject: [PATCH 029/168] add support of PROTOBUF to /schemas/types --- karapace/schema_registry_apis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index a50431f31..13cb2369f 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -376,7 +376,7 @@ async def schemas_get_versions(self, content_type, *, schema_id): self.r(subject_versions, content_type) async def schemas_types(self, content_type): - self.r(["JSON", "AVRO"], content_type) + self.r(["JSON", "AVRO", "PROTOBUF"], content_type) async def config_get(self, content_type): # Note: The format sent by the user differs from the return value, this From 0c9f686231e0e1c1bf804cf7f03ae024c62e68e2 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Sep 2021 10:56:39 +0300 Subject: [PATCH 030/168] fixup /schemas/types test --- tests/integration/test_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index cb0827811..4320c6ee9 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -1132,9 +1132,10 @@ async def test_schema_types(registry_async_client: Client, trail: str) -> None: res = await registry_async_client.get(f"/schemas/types{trail}") assert res.status_code == 200 json = res.json() - assert len(json) == 2 + assert len(json) == 3 assert "AVRO" in json assert "JSON" in json + assert "PROTOBUF" in json @pytest.mark.parametrize("trail", ["", "/"]) From e587b58aaa6ac571871724aa37a5cdc12df80591 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Sep 2021 21:54:13 +0300 Subject: [PATCH 031/168] Fixup workaround issue --- karapace/schema_reader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index e38305a24..81c907073 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -110,9 +110,7 @@ def to_json(self): return self.schema.to_json(names=None) if isinstance(self.schema, ProtobufSchema): raise InvalidSchema("Protobuf do not support to_json serialization") - log.warning("STRANGE OBJECT: %s ", self.schema) - log.warning("STRANGE OBJECT2: %s ", self.schema.__dict__) - return self.schema.to_json() + return self.schema def __str__(self) -> str: if isinstance(self.schema, ProtobufSchema): From 9341a8e6bc5b1afdbde85ba06cb6e577cf2db935 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 14 Sep 2021 11:44:50 +0300 Subject: [PATCH 032/168] Update karapace/protobuf/schema.py Co-authored-by: Augusto Hack --- karapace/protobuf/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index eee32a2c8..f03f7b2dc 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -20,7 +20,7 @@ def static_init(cls): return cls -def add_slashes(text: str): +def add_slashes(text: str) -> str: escape_dict = { r'\a': '\\a', r'\b': '\\b', From 81cb911b279c860a6bda4be9678c5d949c1a6ccb Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 14 Sep 2021 13:22:48 +0300 Subject: [PATCH 033/168] fixup by PR comments of Augusto Hack --- karapace/protobuf/schema.py | 31 +++++++---------------- tests/integration/test_client_protobuf.py | 4 --- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index f03f7b2dc..b970d94b3 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -22,16 +22,15 @@ def static_init(cls): def add_slashes(text: str) -> str: escape_dict = { - r'\a': '\\a', - r'\b': '\\b', - r'\c': '\\c', - r'\f': '\\f', - r'\n': '\\n', - r'\r': '\\r', - r'\t': '\\t', - r'\v': '\\v', + '\a': '\\a', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', + '\v': '\\v', '\'': "\\'", - '\"': '\\\"', + '\"': '\\"', '\\': '\\\\' } result: str = "" @@ -113,12 +112,7 @@ def option_element_string(option: OptionElement): @static_init class ProtobufSchema: - @classmethod - def static_init(cls): - # if hasattr(cls, 'DEFAULT_LOCATION') and cls.DEFAULT_LOCATION: - # return - # log.warning("STATIC_INIT") - cls.DEFAULT_LOCATION = Location.get("") + DEFAULT_LOCATION = Location.get("") def __init__(self, schema: str): self.dirty = schema @@ -131,18 +125,11 @@ def __str__(self) -> str: log.warning("CACHE_STRING:%s", self.cache_string) return self.cache_string - # def to_json(self) -> str: - # return self.schema.to_schema() def to_json(self) -> str: return self.to_schema() def to_schema(self): strings: list = [] - # [ - # "// Proto schema formatted by Wire, do not edit.\n", "// Source: ", - # str(self.location.with_path_only()), "\n" - # ] - shm: ProtoFileElement = self.schema if shm.syntax: strings.append("syntax = \"") diff --git a/tests/integration/test_client_protobuf.py b/tests/integration/test_client_protobuf.py index 2861b7255..e000defa3 100644 --- a/tests/integration/test_client_protobuf.py +++ b/tests/integration/test_client_protobuf.py @@ -4,10 +4,6 @@ from tests.schemas.protobuf import schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_plain from tests.utils import new_random_name -import logging - -log = logging.getLogger(__name__) - async def test_remote_client_protobuf(registry_async_client): schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) From 04b3d3bdb056720835db682123aaf686cc2768cf Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 16 Sep 2021 12:25:28 +0300 Subject: [PATCH 034/168] remove unused decorator --- karapace/protobuf/schema.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index b970d94b3..541622a89 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -14,12 +14,6 @@ log = logging.getLogger(__name__) -def static_init(cls): - if getattr(cls, "static_init", None): - cls.static_init() - return cls - - def add_slashes(text: str) -> str: escape_dict = { '\a': '\\a', @@ -110,7 +104,6 @@ def option_element_string(option: OptionElement): return f"option {result};\n" -@static_init class ProtobufSchema: DEFAULT_LOCATION = Location.get("") From 40e3eb90057ecca543f222942516f11915d16bd2 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 27 Sep 2021 11:13:52 +0300 Subject: [PATCH 035/168] Update schema.py speedup code --- karapace/protobuf/schema.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 541622a89..dcd9f3aff 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -29,13 +29,10 @@ def add_slashes(text: str) -> str: } result: str = "" for char in text: - try: - result += escape_dict[char] - except KeyError: - result += char + c = escape_dict.get(char) + result += c if c is not None else char return result - def message_element_string(element: MessageElement) -> str: result: list = list() append_documentation(result, element.documentation) From e3bf15c14fd271291eefd8b7bb1cbd09cd511ee0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 27 Sep 2021 12:08:49 +0300 Subject: [PATCH 036/168] Update schema.py fix lint issue --- karapace/protobuf/schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index dcd9f3aff..b55e94415 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -33,6 +33,7 @@ def add_slashes(text: str) -> str: result += c if c is not None else char return result + def message_element_string(element: MessageElement) -> str: result: list = list() append_documentation(result, element.documentation) From da951fe513e1377ea82fa1a091e7d56246d0000e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 27 Sep 2021 20:48:20 +0300 Subject: [PATCH 037/168] Update schema.py fix by @hackaugusto suggestion --- karapace/protobuf/schema.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index b55e94415..a18ba6650 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -27,11 +27,8 @@ def add_slashes(text: str) -> str: '\"': '\\"', '\\': '\\\\' } - result: str = "" - for char in text: - c = escape_dict.get(char) - result += c if c is not None else char - return result + trans_table = str.maketrans(escape_dict) + return text.translate(trans_table) def message_element_string(element: MessageElement) -> str: From 9f5d1208052ecaec4f7646c6ac2bafe9601c0d31 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 27 Sep 2021 20:54:24 +0300 Subject: [PATCH 038/168] Update schema.py remove to_json --- karapace/protobuf/schema.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index a18ba6650..5697f6895 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -113,9 +113,6 @@ def __str__(self) -> str: log.warning("CACHE_STRING:%s", self.cache_string) return self.cache_string - def to_json(self) -> str: - return self.to_schema() - def to_schema(self): strings: list = [] shm: ProtoFileElement = self.schema From a33362506b90fe1b7bffc4681dc94cff0e2026c0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 1 Oct 2021 19:48:50 +0300 Subject: [PATCH 039/168] backup --- karapace/compatibility/protobuf/checks.py | 16 ++- karapace/protobuf/compare_restult.py | 126 ++++++++++++++++++++++ karapace/protobuf/enum_element.py | 24 ++++- karapace/protobuf/field_element.py | 43 +++++++- karapace/protobuf/message_element.py | 57 +++++++++- karapace/protobuf/one_of_element.py | 22 +++- karapace/protobuf/proto_file_element.py | 78 +++++++++++++- karapace/protobuf/proto_type.py | 2 +- 8 files changed, 356 insertions(+), 12 deletions(-) create mode 100644 karapace/protobuf/compare_restult.py diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index e761655b2..9248313c4 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -1,11 +1,17 @@ # TODO: PROTOBUF* this functionality must be implemented -from karapace.avro_compatibility import SchemaCompatibilityResult +from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaIncompatibilityType +from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.schema_reader import SchemaType, TypedSchema def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: - # TODO: PROTOBUF* for investigation purposes yet - if writer != reader: - return SchemaCompatibilityResult.compatible() + reader_proto_file_element: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, reader) + writer_proto_file_element: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, writer) - return SchemaCompatibilityResult.compatible() + if writer_proto_file_element.compatible(reader_proto_file_element): + return SchemaCompatibilityResult.compatible() + #TODO: move incompatibility level raising to ProtoFileElement.compatible() + return SchemaCompatibilityResult.incompatible( + incompat_type=SchemaIncompatibilityType.name_mismatch, message=f" missed ", location=[] + ) diff --git a/karapace/protobuf/compare_restult.py b/karapace/protobuf/compare_restult.py new file mode 100644 index 000000000..86d5e4071 --- /dev/null +++ b/karapace/protobuf/compare_restult.py @@ -0,0 +1,126 @@ +from enum import auto, Enum +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.proto_type import ProtoType + + +class Modification(Enum): + # TODO + PACKAGE_ALTER = auto() + SYNTAX_ALTER = auto() + MESSAGE_ADD = auto() + MESSAGE_DROP = auto() + MESSAGE_MOVE = auto() + ENUM_CONSTANT_ADD = auto() + ENUM_CONSTANT_ALTER = auto() + ENUM_CONSTANT_DROP = auto() + TYPE_ALTER = auto() + FIELD_ADD = auto() + FIELD_DROP = auto() + FIELD_MOVE = auto() + FIELD_LABEL_ALTER = auto() + FIELD_KIND_ALTER = auto() + ONE_OF_ADD = auto() + ONE_OF_DROP = auto() + ONE_OF_MOVE = auto() + ONE_OF_FIELD_ADD = auto() + ONE_OF_FIELD_DROP = auto() + ONE_OF_FIELD_MOVE = auto() + + # protobuf compatibility issues is described in at + # https://yokota.blog/2021/08/26/understanding-protobuf-compatibility/ + @classmethod + def get_incompatible(cls): + return [cls.FIELD_LABEL_ALTER, cls.FIELD_KIND_ALTER, cls.ONE_OF_FIELD_ADD, cls.ONE_OF_FIELD_DROP] + + +class ModificationRecord: + def __init__(self, modification: Modification, path: str): + self.modification: Modification = modification + self.path: str = path + + def to_str(self): + # TODO + pass + + +class CompareResult: + def __init__(self): + self.result: list = [] + self.path: list = [] + + def push_path(self, string: str): + self.path.append(string) + + def pop_path(self): + self.path.pop() + + def add_modification(self, modification: Modification): + record = ModificationRecord(modification, ".".join(self.path)) + self.result.append(record) + + +class CompareTypes: + def __init__(self): + self.self_package_name = '' + self.other_package_name = '' + self.self_canonical_name: list = [] + self.other_canonical_name: list = [] + self.self_types = dict() + self.other_types = dict() + self.locked_messages = [] + + def add_other_type(self, name: str, type_: ProtoType): + self.other_types[name] = type_ + + def add_self_type(self, name: str, type_: ProtoType): + self.self_types[name] = type_ + + def self_type_name(self, type_: ProtoType): + string: str = type_.string + name: str + canonical_name: list = list(self.self_canonical_name) + if string[0] == '.': + name = string[1:] + return self.self_types.get(name) + else: + if self.self_package_name != '': + canonical_name.insert(0, self.self_package_name) + while canonical_name is not None: + pretender: str = ".".join(canonical_name) + '.' + string + t = self.self_types.get(pretender) + if t is not None: + return pretender + if self.self_types.get(string) is not None: + return string + return None + + def lock_message(self, message: MessageElement) -> bool: + if message in self.locked_messages: + return False + self.locked_messages.append(message) + return True + + def unlock_message(self, message: MessageElement) -> bool: + if message in self.locked_messages: + self.locked_messages.remove(message) + return True + return False + + def other_type_name(self, type_: ProtoType): + string: str = type_.string + name: str + canonical_name: list = list(self.other_canonical_name) + if string[0] == '.': + name = string[1:] + return self.other_types.get(name) + else: + if self.other_package_name != '': + canonical_name.insert(0, self.other_package_name) + while canonical_name is not None: + pretender: str = ".".join(canonical_name) + '.' + string + t = self.other_types.get(pretender) + if t is not None: + return pretender + if self.other_types.get(string) is not None: + return string + return None diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 52f66049f..bb559427a 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -1,6 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt - +from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.location import Location from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented @@ -30,3 +31,24 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) + + def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTypes): + self_tags: dict = dict() + other_tags: dict = dict() + constant: EnumConstantElement + + for constant in self.constants: + self_tags[constant.tag] = constant + + for constant in other.constants: + other_tags[constant.tag] = constant + + for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): + + if self_tags.get(tag) is None: + result.add_modification(Modification.ENUM_CONSTANT_ADD) + elif other_tags.get(tag) is None: + result.add_modification(Modification.ENUM_CONSTANT_DROP) + else: + if self_tags.get(tag).name == other_tags.get(tag).name: + result.add_modification(Modification.ENUM_CONSTANT_ALTER) diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 7eeffa214..8034aa9c4 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -1,6 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/FieldElement.kt - +from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement @@ -64,5 +65,43 @@ def options_with_special_values(self) -> list: return options + # Only non-repeated scalar types and Enums support default values. + + def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTypes): + # TODO: serge + + if self.name != other.name: + result.add_modification(Modification.FIELD_NAME_ALTER) + if self.label != other.label: + result.add_modification(Modification.FIELD_LABEL_ALTER) + + self.compare_type(ProtoType.get(self.element_type), ProtoType.get(other.element_type), result, types) + + def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + self_proto_type: ProtoType = ProtoType.get2(self_type.to_str()) + other_proto_type: ProtoType = ProtoType.get2(other_type.to_str()) + if self_proto_type.to_kind() != other_proto_type.to_kind(): + result.add_modification(Modification.FIELD_KIND_ALTER) + else: + if self_proto_type.is_map: + self.compare_map(self_proto_type, other_proto_type, result, types) + else: + self.compare_message(self_proto_type, other_proto_type, result, types) + + def compare_map(self, self_map: ProtoType, other_map: ProtoType, result: CompareResult, types: CompareTypes): + self.compare_type(self_map.key_type, other_map.key_type, result, types) + self.compare_type(self_map.value_type, other_map.value_type, result, types) + + def compare_message(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + # TODO + + self_type_name = types.self_type_name(self_type) + other_type_name = types.other_type_name(other_type) + + if self_type_name is None: + raise IllegalArgumentException(f"Cannot determine message type {self_type_name}") + + if other_type_name is None: + raise IllegalArgumentException(f"Cannot determine message type {other_type_name}") -# Only non-repeated scalar types and Enums support default values. + self_type_ = types.self_types.get(self_type_name) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 508d15830..ffc3d9272 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -1,7 +1,10 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt - +# compatibility routine added +from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location +from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented @@ -68,3 +71,55 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) + + def compare(self, other: 'MessageElement', result: CompareResult, types: CompareTypes): + + if types.lock_message(self): + field: FieldElement + subfield: FieldElement + one_of: OneOfElement + self_tags: dict = dict() + other_tags: dict = dict() + self_one_ofs: dict = dict() + other_one_ofs: dict = dict() + + for field in self.fields: + self_tags[field.tag] = field + + for field in other.fields: + other_tags[field.tag] = field + + for one_of in self.one_ofs: + self_one_ofs[one_of.name] = one_of + + for one_of in other.one_ofs: + other_one_ofs[one_of.name] = one_of + ''' Compare fields ''' + + for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): + result.push_path(tag) + + if self_tags.get(tag) is None: + result.add_modification(Modification.FIELD_ADD) + elif other_tags.get(tag) is None: + result.add_modification(Modification.FIELD_DROP) + else: + self_tags[tag].compare(other_tags[tag], result, types) + + result.pop_path() + ''' Compare OneOfs ''' + for name in list(self_one_ofs.keys()) + list(set(other_one_ofs.keys()) - set(self_one_ofs.keys())): + result.push_path(name) + + if self_one_ofs.get(name) is None: + result.add_modification(Modification.ONE_OF_ADD) + elif other_one_ofs.get(name) is None: + result.add_modification(Modification.ONE_OF_DROP) + else: + self_one_ofs[name].compare(other_one_ofs[name], result, types) + + result.pop_path() + + # TODO Compare NestedTypes must be there. + + types.unlock_message(self) diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index d57f90d59..f8dd6d67c 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -1,6 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OneOfElement.kt - +from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification from karapace.protobuf.utils import append_documentation, append_indented @@ -31,3 +31,23 @@ def to_schema(self) -> str: append_indented(result, group.to_schema()) result.append("}\n") return "".join(result) + + def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTypes): + self_tags: dict = dict() + other_tags: dict = dict() + + for field in self.fields: + self_tags[field.tag] = field + for field in other.fields: + other_tags[field.tag] = field + + for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): + result.push_path(tag) + + if self_tags.get(tag) is None: + result.add_modification(Modification.ONE_OF_FIELD_ADD) + elif other_tags.get(tag) is None: + result.add_modification(Modification.ONE_OF_FIELD_DROP) + else: + self_tags[tag].compare(other_tags[tag], result, types) + result.pop_path() diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index c44d54a77..29cc8466f 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -1,7 +1,10 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt - +from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement from karapace.protobuf.syntax import Syntax @@ -29,6 +32,7 @@ def __init__( self.types = types or [] self.public_imports = public_imports or [] self.imports = imports or [] + self.incompatible_changes = Modification.get_incompatible() def to_schema(self): strings: list = [ @@ -89,3 +93,75 @@ def __eq__(self, other: 'ProtoFileElement'): # type: ignore def __repr__(self): return self.to_schema() + + def compare(self, other: 'ProtoFileElement', result: CompareResult): + + if self.package_name != other.package_name: + result.add_modification(Modification.PACKAGE_ALTER) + # TODO: do we need syntax check? + if self.syntax != other.syntax: + result.add_modification(Modification.SYNTAX_ALTER) + + self_types: dict = dict() + other_types: dict = dict() + self_indexes: dict = dict() + other_indexes: dict = dict() + i = 0 + + compare_types = CompareTypes() + for type_ in self.types: + self_types[type_.name] = type_ + self_indexes[type_.name] = i + package_name = self.package_name if self.package_name else '' + compare_types.add_self_type(package_name, type_) + i += 1 + i = 0 + for type_ in other.types: + other_types[type_.name] = type_ + other_indexes[type_.name] = i + package_name = other.package_name if other.package_name else '' + compare_types.add_other_type(package_name, type_) + i += 1 + + for name in list(self_types.keys()) + list(set(other_types.keys()) - set(self_types.keys())): + + result.push_path(name) + + if self_types.get(name) is None and other_types.get(name) is not None: + if isinstance(other_types[name], MessageElement): + result.add_modification(Modification.MESSAGE_ADD) + elif isinstance(other_types[name], EnumElement): + result.add_modification(Modification.ENUM_ADD) + else: + # TODO: write message + raise IllegalArgumentException() + elif self_types.get(name) is not None and other_types.get(name) is None: + if isinstance(self_types[name], MessageElement): + result.add_modification(Modification.MESSAGE_DROP) + elif isinstance(self_types[name], EnumElement): + result.add_modification(Modification.ENUM_DROP) + else: + # TODO: write message + raise IllegalArgumentException() + else: + if other_indexes[name] != self_indexes[name]: + if isinstance(self_types[name], MessageElement): + # is it still compatible? + result.add_modification(Modification.MESSAGE_MOVE) + # elif isinstance(self_types[name], EnumElement): + # result.add_modification(Modifications.ENUM_MOVE) + else: + # TODO: write message + raise IllegalArgumentException() + else: + if isinstance(self_types[name], MessageElement) \ + and isinstance(other_types[name], MessageElement): + self_types[name].compare(other_types[name], result, compare_types) + elif isinstance(self_types[name], EnumElement) \ + and isinstance(other_types[name], EnumElement): + self_types[name].compare(other_types[name], result, compare_types) + else: + # incompatible type + result.add_modification(Modification.TYPE_ALTER) + + result.pop_path() diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 7f3bf4df4..42402348f 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -147,7 +147,7 @@ def hash_code(self) -> int: return hash(self.string) @staticmethod - def get(enclosing_type_or_package: str, type_name: str) -> object: + def get(enclosing_type_or_package: str, type_name: str) -> 'ProtoType': return ProtoType.get2(f"{enclosing_type_or_package}.{type_name}") \ if enclosing_type_or_package else ProtoType.get2(type_name) From d56b9dd26e7cdb00adb8b97c31f50be0d8f5ffeb Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 4 Oct 2021 00:05:41 +0300 Subject: [PATCH 040/168] backup compatibility workaround --- karapace/protobuf/compare_restult.py | 72 +++++++--- karapace/protobuf/field_element.py | 64 +++++---- karapace/protobuf/message_element.py | 16 ++- karapace/protobuf/proto_file_element.py | 28 ++-- karapace/protobuf/proto_type.py | 56 +++++++- karapace/protobuf/utils.py | 2 + tests/schemas/protobuf.py | 18 +++ tests/unit/test_compatibility.py | 181 ++++++++++++++++++++++++ tests/unit/test_protobuf_schema.py | 21 ++- 9 files changed, 392 insertions(+), 66 deletions(-) create mode 100644 tests/unit/test_compatibility.py diff --git a/karapace/protobuf/compare_restult.py b/karapace/protobuf/compare_restult.py index 86d5e4071..36ce3814b 100644 --- a/karapace/protobuf/compare_restult.py +++ b/karapace/protobuf/compare_restult.py @@ -1,6 +1,6 @@ from enum import auto, Enum -from karapace.protobuf.message_element import MessageElement from karapace.protobuf.proto_type import ProtoType +from karapace.protobuf.type_element import TypeElement class Modification(Enum): @@ -13,24 +13,32 @@ class Modification(Enum): ENUM_CONSTANT_ADD = auto() ENUM_CONSTANT_ALTER = auto() ENUM_CONSTANT_DROP = auto() + ENUM_ADD = auto() + ENUM_DROP = auto() TYPE_ALTER = auto() FIELD_ADD = auto() FIELD_DROP = auto() FIELD_MOVE = auto() FIELD_LABEL_ALTER = auto() FIELD_KIND_ALTER = auto() + FIELD_TYPE_ALTER = auto() ONE_OF_ADD = auto() ONE_OF_DROP = auto() ONE_OF_MOVE = auto() ONE_OF_FIELD_ADD = auto() ONE_OF_FIELD_DROP = auto() ONE_OF_FIELD_MOVE = auto() + FIELD_CONVERTED_TO_ONE_OF = auto() # protobuf compatibility issues is described in at # https://yokota.blog/2021/08/26/understanding-protobuf-compatibility/ - @classmethod - def get_incompatible(cls): - return [cls.FIELD_LABEL_ALTER, cls.FIELD_KIND_ALTER, cls.ONE_OF_FIELD_ADD, cls.ONE_OF_FIELD_DROP] + def iscompatible(self) -> bool: + return self not in [self.FIELD_LABEL_ALTER, + self.FIELD_KIND_ALTER, + self.ONE_OF_FIELD_ADD, + self.ONE_OF_FIELD_DROP, + self.FIELD_CONVERTED_TO_ONE_OF + ] class ModificationRecord: @@ -58,6 +66,13 @@ def add_modification(self, modification: Modification): record = ModificationRecord(modification, ".".join(self.path)) self.result.append(record) + def iscompatible(self): + record: ModificationRecord + for record in self.result: + if not record.modification.iscompatible(): + return False + return True + class CompareTypes: def __init__(self): @@ -68,12 +83,31 @@ def __init__(self): self.self_types = dict() self.other_types = dict() self.locked_messages = [] + self.environment = [] - def add_other_type(self, name: str, type_: ProtoType): + def add_self_type(self, name: str, type_: TypeElement): + if name: + name = name + '.' + else: + name = type_.name + self.self_types[name] = type_ + for t in type_.nested_types: + self.add_self_type(name, t) + + def add_other_type(self, name: str, type_: TypeElement): + if name: + name = name + '.' + else: + name = type_.name self.other_types[name] = type_ + for t in type_.nested_types: + self.add_other_type(name, t) - def add_self_type(self, name: str, type_: ProtoType): - self.self_types[name] = type_ + def get_self_type(self, name) -> TypeElement: + return self.self_types.get(self.self_type_name(name)) + + def get_other_type(self, name) -> TypeElement: + return self.other_types.get(self.other_type_name(name)) def self_type_name(self, type_: ProtoType): string: str = type_.string @@ -94,18 +128,6 @@ def self_type_name(self, type_: ProtoType): return string return None - def lock_message(self, message: MessageElement) -> bool: - if message in self.locked_messages: - return False - self.locked_messages.append(message) - return True - - def unlock_message(self, message: MessageElement) -> bool: - if message in self.locked_messages: - self.locked_messages.remove(message) - return True - return False - def other_type_name(self, type_: ProtoType): string: str = type_.string name: str @@ -124,3 +146,15 @@ def other_type_name(self, type_: ProtoType): if self.other_types.get(string) is not None: return string return None + + def lock_message(self, message: object) -> bool: + if message in self.locked_messages: + return False + self.locked_messages.append(message) + return True + + def unlock_message(self, message: object) -> bool: + if message in self.locked_messages: + self.locked_messages.remove(message) + return True + return False diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 8034aa9c4..6b8c87078 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -4,23 +4,25 @@ from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.field import Field from karapace.protobuf.location import Location +from karapace.protobuf.message_element import MessageElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_type import ProtoType +from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_options class FieldElement: def __init__( - self, - location: Location, - label: Field.Label = None, - element_type: str = "", - name: str = None, - default_value: str = None, - json_name: str = None, - tag: int = None, - documentation: str = "", - options: list = None + self, + location: Location, + label: Field.Label = None, + element_type: str = "", + name: str = None, + default_value: str = None, + json_name: str = None, + tag: int = None, + documentation: str = "", + options: list = None ): self.location = location self.label = label @@ -75,33 +77,43 @@ def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTy if self.label != other.label: result.add_modification(Modification.FIELD_LABEL_ALTER) - self.compare_type(ProtoType.get(self.element_type), ProtoType.get(other.element_type), result, types) - - def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): - self_proto_type: ProtoType = ProtoType.get2(self_type.to_str()) - other_proto_type: ProtoType = ProtoType.get2(other_type.to_str()) - if self_proto_type.to_kind() != other_proto_type.to_kind(): - result.add_modification(Modification.FIELD_KIND_ALTER) - else: - if self_proto_type.is_map: - self.compare_map(self_proto_type, other_proto_type, result, types) - else: - self.compare_message(self_proto_type, other_proto_type, result, types) + self.compare_type(ProtoType.get2(self.element_type), ProtoType.get2(other.element_type), result, types) def compare_map(self, self_map: ProtoType, other_map: ProtoType, result: CompareResult, types: CompareTypes): self.compare_type(self_map.key_type, other_map.key_type, result, types) self.compare_type(self_map.value_type, other_map.value_type, result, types) def compare_message(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): - # TODO + # TODO ... + + self_type_element: MessageElement = types.get_self_type(self_type.__str__()) + other_type_element: MessageElement = types.get_other_type(other_type.__str__()) self_type_name = types.self_type_name(self_type) other_type_name = types.other_type_name(other_type) if self_type_name is None: - raise IllegalArgumentException(f"Cannot determine message type {self_type_name}") + raise IllegalArgumentException(f"Cannot determine message type {self_type}") if other_type_name is None: - raise IllegalArgumentException(f"Cannot determine message type {other_type_name}") + raise IllegalArgumentException(f"Cannot determine message type {other_type}") + + if self_type_name != other_type_name: + result.add_modification(Modification.FIELD_TYPE_ALTER) - self_type_ = types.self_types.get(self_type_name) + self_type_element.compare(other_type_element, result, types) + + + def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + + if self_type.is_scalar == other_type.is_scalar and \ + self_type.is_map == other_type.is_map: + if self_type.is_map: + self.compare_map(self_type, other_type, result, types) + elif self_type.is_scalar: + if self_type.compatibility_kind() != other_type.compatibility_kind(): + result.add_modification(Modification.FIELD_KIND_ALTER) + else: + self.compare_message(self_type, other_type, result, types) + else: + result.add_modification(Modification.FIELD_KIND_ALTER) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index ffc3d9272..a7f70d733 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -2,7 +2,6 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt # compatibility routine added from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification -from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.type_element import TypeElement @@ -75,8 +74,8 @@ def to_schema(self) -> str: def compare(self, other: 'MessageElement', result: CompareResult, types: CompareTypes): if types.lock_message(self): - field: FieldElement - subfield: FieldElement + field: 'FieldElement' + subfield: 'FieldElement' one_of: OneOfElement self_tags: dict = dict() other_tags: dict = dict() @@ -94,8 +93,17 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare for one_of in other.one_ofs: other_one_ofs[one_of.name] = one_of - ''' Compare fields ''' + for field in other.one_ofs: + result.push_path(tag) + for subfield in field.fields: + tag = subfield.tag + if self_tags.get(tag): + self_tags.pop(tag) + result.add_modification(Modification.FIELD_CONVERTED_TO_ONE_OF) + result.pop_path() + + ''' Compare fields ''' for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): result.push_path(tag) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 29cc8466f..ae4239a01 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -6,20 +6,21 @@ from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.syntax import Syntax +from karapace.protobuf.type_element import TypeElement class ProtoFileElement: def __init__( - self, - location: Location, - package_name: str = None, - syntax: Syntax = None, - imports: list = None, - public_imports: list = None, - types=None, - services: list = None, - extend_declarations: list = None, - options: list = None + self, + location: Location, + package_name: str = None, + syntax: Syntax = None, + imports: list = None, + public_imports: list = None, + types=None, + services: list = None, + extend_declarations: list = None, + options: list = None ): if types is None: types = [] @@ -32,7 +33,6 @@ def __init__( self.types = types or [] self.public_imports = public_imports or [] self.imports = imports or [] - self.incompatible_changes = Modification.get_incompatible() def to_schema(self): strings: list = [ @@ -94,7 +94,7 @@ def __eq__(self, other: 'ProtoFileElement'): # type: ignore def __repr__(self): return self.to_schema() - def compare(self, other: 'ProtoFileElement', result: CompareResult): + def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareResult: if self.package_name != other.package_name: result.add_modification(Modification.PACKAGE_ALTER) @@ -109,6 +109,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult): i = 0 compare_types = CompareTypes() + type_: TypeElement for type_ in self.types: self_types[type_.name] = type_ self_indexes[type_.name] = i @@ -165,3 +166,6 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult): result.add_modification(Modification.TYPE_ALTER) result.pop_path() + + return result + diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 42402348f..b14b3524a 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -4,9 +4,11 @@ Names a protocol buffer message, enumerated type, service, map, or a scalar. This class models a fully-qualified name using the protocol buffer package. """ - +from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.kotlin_wrapper import check, require from karapace.protobuf.option_element import OptionElement +from enum import Enum, auto +from typing import Optional def static_init(cls): @@ -68,11 +70,13 @@ def static_init(cls): cls.SCALAR_TYPES[a.string] = a cls.NUMERIC_SCALAR_TYPES: tuple = ( - cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, cls.SFIXED64, cls.SINT32, + cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, cls.SFIXED64, + cls.SINT32, cls.SINT64, cls.UINT32, cls.UINT64 ) - def __init__(self, is_scalar: bool, string: str, key_type=None, value_type=None): + def __init__(self, is_scalar: bool, string: str, key_type: Optional['ProtoType'] = None, + value_type: Optional['ProtoType'] = None): """ Creates a scalar or message type. """ if not key_type and not value_type: self.is_scalar = is_scalar @@ -166,5 +170,49 @@ def get2(name: str) -> 'ProtoType': return ProtoType(False, name) @staticmethod - def get3(key_type: object, value_type: object, name: str) -> object: + def get3(key_type: 'ProtoType', value_type: 'ProtoType', name: str) -> object: return ProtoType(False, name, key_type, value_type) + + """ schmea compatibility check functionality karapace addon """ + """ Based on table https://developers.google.com/protocol-buffers/docs/proto3#scalar """ + + class CompatibilityKind(Enum): + VARIANT = auto() + SVARIANT = auto() # sint has incompatible format with int but compatible with it by size + FIXED64 = auto() + LENGTH_DELIMITED = auto() + FIXED32 = auto() + DOUBLE = auto() + FLOAT = auto() + + def compatibility_kind(self) -> 'ProtoType.CompatibilityKind': + + result = { + "int32": ProtoType.CompatibilityKind.VARIANT, + "int64": ProtoType.CompatibilityKind.VARIANT, + "uint32": ProtoType.CompatibilityKind.VARIANT, + "uint64": ProtoType.CompatibilityKind.VARIANT, + "bool": ProtoType.CompatibilityKind.VARIANT, + + "sint32": ProtoType.CompatibilityKind.SVARIANT, + "sint64": ProtoType.CompatibilityKind.SVARIANT, + + "double": ProtoType.CompatibilityKind.DOUBLE, # it is compatible by size with FIXED64 + + "fixed64": ProtoType.CompatibilityKind.FIXED64, + "sfixed64": ProtoType.CompatibilityKind.FIXED64, + + "float": ProtoType.CompatibilityKind.FLOAT, # it is compatible by size with FIXED32 + + "fixed32": ProtoType.CompatibilityKind.FIXED32, + "sfixed32": ProtoType.CompatibilityKind.FIXED32, + + "string": ProtoType.CompatibilityKind.LENGTH_DELIMITED, + "bytes": ProtoType.CompatibilityKind.LENGTH_DELIMITED, + + }.get(self.simple_name) + + if result: + return result + else: + raise IllegalArgumentException(f"undefined type: {self.simple_name}") diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index e70af8d03..d3fdb1fec 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -60,6 +60,7 @@ def append_indented(data: list, value: str) -> None: data.append("\n") + MIN_TAG_VALUE = 1 MAX_TAG_VALUE = ((1 << 29) & 0xffffffffffffffff) - 1 # 536,870,911 @@ -67,6 +68,7 @@ def append_indented(data: list, value: str) -> None: RESERVED_TAG_VALUE_END = 19999 """ True if the supplied value is in the valid tag range and not reserved. """ + # class MyInt(int): # def is_valid_tag(self) -> bool: # return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or\ diff --git a/tests/schemas/protobuf.py b/tests/schemas/protobuf.py index e52414cbf..02f5d6a72 100644 --- a/tests/schemas/protobuf.py +++ b/tests/schemas/protobuf.py @@ -53,3 +53,21 @@ |} | """ + +schema_protobuf_compare_one = """ +|syntax = "proto3"; +| +|option java_package = "com.codingharbour.protobuf"; +|option java_outer_classname = "TestEnumOrder"; +| +|message Message { +| int32 query = 1; +| string content = 2; +|} +|enum Enum { +| HIGH = 0; +| MIDDLE = 1; +| LOW = 2; +|} +| +""" diff --git a/tests/unit/test_compatibility.py b/tests/unit/test_compatibility.py new file mode 100644 index 000000000..01c2a917d --- /dev/null +++ b/tests/unit/test_compatibility.py @@ -0,0 +1,181 @@ +from karapace.protobuf.compare_restult import CompareResult +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.protobuf.location import Location +from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.protobuf.proto_parser import ProtoParser + +location: Location = Location.get("some/folder", "file.proto") + + +def test_compatibility_package(): + self_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + other_schema = """ + |syntax = "proto3"; + |package a2; + |message TestMessage { + | message Value { + | string str = 1; + | } + | string test = 1; + | .a2.TestMessage.Value val = 2; + |} + |""" + + self_schema = trim_margin(self_schema) + other_schema = trim_margin(other_schema) + self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) + other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) + result = CompareResult() + self_parsed.compare(other_parsed, result) + assert result.iscompatible() + + +def test_compatibility_field_add(): + self_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + other_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | string str2 = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + self_schema = trim_margin(self_schema) + other_schema = trim_margin(other_schema) + self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) + other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) + result = self_parsed.compare(other_parsed) + assert result.iscompatible() + + +def test_compatibility_field_drop(): + self_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | string str2 = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + other_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + self_schema = trim_margin(self_schema) + other_schema = trim_margin(other_schema) + self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) + other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) + result = self_parsed.compare(other_parsed) + assert result.iscompatible() + + +def test_compatibility_field_add_drop(): + self_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str2 = 1; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + other_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + self_schema = trim_margin(self_schema) + other_schema = trim_margin(other_schema) + self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) + other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) + result = self_parsed.compare(other_parsed) + assert result.iscompatible() + + +def test_compatibility_enum_add(): + self_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str2 = 1; + | int32 x = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + other_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str = 1; + | Enu x = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + | enum Enu { + | A = 0; + | B = 1; + | } + |} + |""" + + self_schema = trim_margin(self_schema) + other_schema = trim_margin(other_schema) + self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) + other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) + result = self_parsed.compare(other_parsed) + assert result.iscompatible() diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 621af52d7..2fce63f5b 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -1,8 +1,9 @@ from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location +from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.schema_reader import SchemaType, TypedSchema from tests.schemas.protobuf import ( - schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_schema_registry1 + schema_protobuf_compare_one, schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_schema_registry1 ) location: Location = Location.get("file.proto") @@ -22,3 +23,21 @@ def test_protobuf_schema_sort(): result = str(protobuf_schema) proto2 = trim_margin(schema_protobuf_order_after) assert result == proto2 + + +def test_protobuf_schema_compare(): + proto1 = trim_margin(schema_protobuf_order_after) + protobuf_schema1: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto1) + proto2 = trim_margin(schema_protobuf_compare_one) + protobuf_schema2: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto2) + result = protobuf_schema1.compatible(protobuf_schema2) + assert result is True + + +def test_protobuf_schema_compare(): + proto1 = trim_margin(schema_protobuf_order_after) + protobuf_schema1: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto1) + proto2 = trim_margin(schema_protobuf_compare_one) + protobuf_schema2: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto2) + result = protobuf_schema2.compatible(protobuf_schema1) + assert result is False From 31a399a47e54d5f45ff136aa4f72d9c5fd4be6e8 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 12 Oct 2021 01:37:22 +0300 Subject: [PATCH 041/168] Tests and Debug workaround --- karapace/compatibility/protobuf/checks.py | 31 +++-- karapace/protobuf/compare_restult.py | 114 ++-------------- karapace/protobuf/compare_type_storage.py | 157 ++++++++++++++++++++++ karapace/protobuf/enum_element.py | 5 +- karapace/protobuf/field_element.py | 61 +++++---- karapace/protobuf/message_element.py | 14 +- karapace/protobuf/one_of_element.py | 5 +- karapace/protobuf/proto_file_element.py | 24 ++-- karapace/protobuf/proto_type.py | 27 ++-- karapace/protobuf/utils.py | 2 - tests/unit/test_compare_elements.py | 76 +++++++++++ tests/unit/test_compatibility.py | 49 ++++--- tests/unit/test_protobuf_schema.py | 22 +-- 13 files changed, 381 insertions(+), 206 deletions(-) create mode 100644 karapace/protobuf/compare_type_storage.py create mode 100644 tests/unit/test_compare_elements.py diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 9248313c4..d9be4eac7 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -1,17 +1,30 @@ # TODO: PROTOBUF* this functionality must be implemented -from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaIncompatibilityType -from karapace.protobuf.proto_file_element import ProtoFileElement +from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaCompatibilityType +from karapace.protobuf.compare_restult import CompareResult, ModificationRecord from karapace.schema_reader import SchemaType, TypedSchema def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: + reader_proto_file_element: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, reader).schema + writer_proto_file_element: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, writer).schema + result: CompareResult = CompareResult() + writer_proto_file_element.schema.schema.compare(reader_proto_file_element.schema.schema, result) + if result.is_compatible(): + return SchemaCompatibilityResult.compatible() + # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? - reader_proto_file_element: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, reader) - writer_proto_file_element: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, writer) + incompatibilities = list() + record: ModificationRecord + locations: set = set() + messages: set = set() + for record in result.result: + incompatibilities.append(record.modification.__str__()) + locations.add(record.path) + messages.add(record.message) - if writer_proto_file_element.compatible(reader_proto_file_element): - return SchemaCompatibilityResult.compatible() - #TODO: move incompatibility level raising to ProtoFileElement.compatible() - return SchemaCompatibilityResult.incompatible( - incompat_type=SchemaIncompatibilityType.name_mismatch, message=f" missed ", location=[] + return SchemaCompatibilityResult( + compatibility=SchemaCompatibilityType.incompatible, + incompatibilities=list(incompatibilities), + locations=set(locations), + messages=set(messages), ) diff --git a/karapace/protobuf/compare_restult.py b/karapace/protobuf/compare_restult.py index 36ce3814b..5ec76d302 100644 --- a/karapace/protobuf/compare_restult.py +++ b/karapace/protobuf/compare_restult.py @@ -1,6 +1,4 @@ from enum import auto, Enum -from karapace.protobuf.proto_type import ProtoType -from karapace.protobuf.type_element import TypeElement class Modification(Enum): @@ -20,6 +18,7 @@ class Modification(Enum): FIELD_DROP = auto() FIELD_MOVE = auto() FIELD_LABEL_ALTER = auto() + FIELD_NAME_ALTER = auto() FIELD_KIND_ALTER = auto() FIELD_TYPE_ALTER = auto() ONE_OF_ADD = auto() @@ -32,23 +31,24 @@ class Modification(Enum): # protobuf compatibility issues is described in at # https://yokota.blog/2021/08/26/understanding-protobuf-compatibility/ - def iscompatible(self) -> bool: - return self not in [self.FIELD_LABEL_ALTER, - self.FIELD_KIND_ALTER, - self.ONE_OF_FIELD_ADD, - self.ONE_OF_FIELD_DROP, - self.FIELD_CONVERTED_TO_ONE_OF - ] + def is_compatible(self) -> bool: + return self not in [ + self.FIELD_LABEL_ALTER, self.FIELD_KIND_ALTER, self.ONE_OF_FIELD_ADD, self.ONE_OF_FIELD_DROP, + self.FIELD_CONVERTED_TO_ONE_OF + ] class ModificationRecord: def __init__(self, modification: Modification, path: str): self.modification: Modification = modification self.path: str = path + if modification.is_compatible(): + self.message: str = f"Compatible modification {self.modification} found" + else: + self.message: str = f"Incompatible modification {self.modification} found" def to_str(self): - # TODO - pass + return self.message class CompareResult: @@ -57,7 +57,7 @@ def __init__(self): self.path: list = [] def push_path(self, string: str): - self.path.append(string) + self.path.append(str(string)) def pop_path(self): self.path.pop() @@ -66,95 +66,9 @@ def add_modification(self, modification: Modification): record = ModificationRecord(modification, ".".join(self.path)) self.result.append(record) - def iscompatible(self): + def is_compatible(self): record: ModificationRecord for record in self.result: - if not record.modification.iscompatible(): + if not record.modification.is_compatible(): return False return True - - -class CompareTypes: - def __init__(self): - self.self_package_name = '' - self.other_package_name = '' - self.self_canonical_name: list = [] - self.other_canonical_name: list = [] - self.self_types = dict() - self.other_types = dict() - self.locked_messages = [] - self.environment = [] - - def add_self_type(self, name: str, type_: TypeElement): - if name: - name = name + '.' - else: - name = type_.name - self.self_types[name] = type_ - for t in type_.nested_types: - self.add_self_type(name, t) - - def add_other_type(self, name: str, type_: TypeElement): - if name: - name = name + '.' - else: - name = type_.name - self.other_types[name] = type_ - for t in type_.nested_types: - self.add_other_type(name, t) - - def get_self_type(self, name) -> TypeElement: - return self.self_types.get(self.self_type_name(name)) - - def get_other_type(self, name) -> TypeElement: - return self.other_types.get(self.other_type_name(name)) - - def self_type_name(self, type_: ProtoType): - string: str = type_.string - name: str - canonical_name: list = list(self.self_canonical_name) - if string[0] == '.': - name = string[1:] - return self.self_types.get(name) - else: - if self.self_package_name != '': - canonical_name.insert(0, self.self_package_name) - while canonical_name is not None: - pretender: str = ".".join(canonical_name) + '.' + string - t = self.self_types.get(pretender) - if t is not None: - return pretender - if self.self_types.get(string) is not None: - return string - return None - - def other_type_name(self, type_: ProtoType): - string: str = type_.string - name: str - canonical_name: list = list(self.other_canonical_name) - if string[0] == '.': - name = string[1:] - return self.other_types.get(name) - else: - if self.other_package_name != '': - canonical_name.insert(0, self.other_package_name) - while canonical_name is not None: - pretender: str = ".".join(canonical_name) + '.' + string - t = self.other_types.get(pretender) - if t is not None: - return pretender - if self.other_types.get(string) is not None: - return string - return None - - def lock_message(self, message: object) -> bool: - if message in self.locked_messages: - return False - self.locked_messages.append(message) - return True - - def unlock_message(self, message: object) -> bool: - if message in self.locked_messages: - self.locked_messages.remove(message) - return True - return False diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py new file mode 100644 index 000000000..d7a2ece8e --- /dev/null +++ b/karapace/protobuf/compare_type_storage.py @@ -0,0 +1,157 @@ +from karapace.protobuf.exception import IllegalArgumentException +from karapace.protobuf.proto_type import ProtoType +from karapace.protobuf.type_element import TypeElement +from typing import Optional + + +class CompareTypes: + def __init__(self): + + self.self_package_name = '' + self.other_package_name = '' + self.self_canonical_name: list = [] + self.other_canonical_name: list = [] + self.self_types = dict() + self.other_types = dict() + self.locked_messages = [] + self.environment = [] + + def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict): + name: str + if prefix: + name = prefix + '.' + type_element.name + else: + name = type_element.name + + from karapace.protobuf.message_element import MessageElement + if isinstance(type_element, MessageElement): # add support of MapEntry messages + if 'map_entry' in type_element.options: + from karapace.protobuf.field_element import FieldElement + key: Optional[FieldElement] = None + value: Optional[FieldElement] = None + for f in type_element.fields: + if f.name == 'key': + key = f + break + for f in type_element.fields: + if f.name == 'value': + value = f + break + types[name] = TypeRecordMap(package_name, type_element, key, value) + else: + types[name] = TypeRecord(package_name, type_element) + else: + types[name] = TypeRecord(package_name, type_element) + + for t in type_element.nested_types: + self.add_a_type(name, package_name, t, types) + + def add_self_type(self, package_name: str, type_element: TypeElement): + self.add_a_type(package_name, package_name, type_element, self.self_types) + + def add_other_type(self, package_name: str, type_element: TypeElement): + self.add_a_type(package_name, package_name, type_element, self.other_types) + + def get_self_type(self, t: ProtoType) -> Optional['TypeRecord']: + name = self.self_type_name(t) + if name is not None: + type_record = self.self_types.get(name) + return type_record + return None + + def get_other_type(self, t: ProtoType) -> Optional['TypeRecord']: + name = self.other_type_name(t) + if name is not None: + type_record = self.other_types.get(name) + return type_record + return None + + def self_type_name(self, t: ProtoType): + string: str = t.string + name: str + canonical_name: list = list(self.self_canonical_name) + if string[0] == '.': + name = string[1:] + if self.self_types.get(name): + return name + return None + if self.self_package_name != '': + canonical_name.insert(0, self.self_package_name) + while len(canonical_name) > 0: + pretender: str = ".".join(canonical_name) + '.' + string + t = self.self_types.get(pretender) + if t is not None: + return pretender + if self.self_types.get(string) is not None: + return string + return None + + def other_type_name(self, t: ProtoType): + string: str = t.string + name: str + canonical_name: list = list(self.other_canonical_name) + if string[0] == '.': + name = string[1:] + if self.other_types.get(name): + return name + return None + if self.other_package_name != '': + canonical_name.insert(0, self.other_package_name) + while len(canonical_name) > 0: + pretender: str = ".".join(canonical_name) + '.' + string + t = self.other_types.get(pretender) + if t is not None: + return pretender + if self.other_types.get(string) is not None: + return string + return None + + def self_type_short_name(self, t: ProtoType): + name = self.self_type_name(t) + if name is None: + raise IllegalArgumentException(f"Cannot determine message type {t}") + type_record: TypeRecord = self.self_types.get(name) + if name.startswith(type_record.package_name): + return name[(len(type_record.package_name) + 1):] + return name + + def other_type_short_name(self, t: ProtoType): + name = self.other_type_name(t) + if name is None: + raise IllegalArgumentException(f"Cannot determine message type {t}") + type_record: TypeRecord = self.other_types.get(name) + if name.startswith(type_record.package_name): + return name[(len(type_record.package_name) + 1):] + return name + + def lock_message(self, message: object) -> bool: + if message in self.locked_messages: + return False + self.locked_messages.append(message) + return True + + def unlock_message(self, message: object) -> bool: + if message in self.locked_messages: + self.locked_messages.remove(message) + return True + return False + + +class TypeRecord: + def __init__(self, package_name: str, type_element: TypeElement): + self.package_name = package_name + self.type_element = type_element + + +class TypeRecordMap(TypeRecord): + def __init__(self, package_name: str, type_element: TypeElement, key, value): + super().__init__(package_name, type_element) + try: + from karapace.protobuf.field_element import FieldElement + self.key: FieldElement = key + self.value: FieldElement = value + except Exception: + raise IllegalArgumentException("TypeRecordMap") + + def map_type(self) -> ProtoType: + return ProtoType.get2(f"map<{self.key.element_type}, {self.value.element_type}>") diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index bb559427a..92a8b4e21 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -1,6 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt -from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.location import Location from karapace.protobuf.type_element import TypeElement @@ -36,6 +37,8 @@ def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTyp self_tags: dict = dict() other_tags: dict = dict() constant: EnumConstantElement + if types: + pass for constant in self.constants: self_tags[constant.tag] = constant diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 6b8c87078..68fdda983 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -1,17 +1,18 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/FieldElement.kt -from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification -from karapace.protobuf.exception import IllegalArgumentException +from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_type_storage import TypeRecordMap + from karapace.protobuf.field import Field from karapace.protobuf.location import Location -from karapace.protobuf.message_element import MessageElement from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_type import ProtoType -from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_options class FieldElement: + from karapace.protobuf.compare_type_storage import CompareTypes + def __init__( self, location: Location, @@ -70,7 +71,6 @@ def options_with_special_values(self) -> list: # Only non-repeated scalar types and Enums support default values. def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTypes): - # TODO: serge if self.name != other.name: result.add_modification(Modification.FIELD_NAME_ALTER) @@ -83,37 +83,48 @@ def compare_map(self, self_map: ProtoType, other_map: ProtoType, result: Compare self.compare_type(self_map.key_type, other_map.key_type, result, types) self.compare_type(self_map.value_type, other_map.value_type, result, types) - def compare_message(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): - # TODO ... - - self_type_element: MessageElement = types.get_self_type(self_type.__str__()) - other_type_element: MessageElement = types.get_other_type(other_type.__str__()) - - self_type_name = types.self_type_name(self_type) - other_type_name = types.other_type_name(other_type) - - if self_type_name is None: - raise IllegalArgumentException(f"Cannot determine message type {self_type}") - - if other_type_name is None: - raise IllegalArgumentException(f"Cannot determine message type {other_type}") + def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + from karapace.protobuf.enum_element import EnumElement + self_type_record = types.get_self_type(self_type) + other_type_record = types.get_other_type(other_type) + self_is_scalar: bool = False + other_is_scalar: bool = False - if self_type_name != other_type_name: - result.add_modification(Modification.FIELD_TYPE_ALTER) + if isinstance(self_type_record, TypeRecordMap): + self_type = self_type_record.map_type() - self_type_element.compare(other_type_element, result, types) + if isinstance(other_type_record, TypeRecordMap): + other_type = other_type_record.map_type() + if self_type.is_scalar or (self_type_record + and isinstance(self_type_record.type_element, EnumElement)): + self_is_scalar = True - def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + if other_type.is_scalar or (other_type_record + and isinstance(other_type_record.type_element, EnumElement)): + other_is_scalar = True - if self_type.is_scalar == other_type.is_scalar and \ + if self_is_scalar == other_is_scalar and \ self_type.is_map == other_type.is_map: if self_type.is_map: self.compare_map(self_type, other_type, result, types) - elif self_type.is_scalar: + elif self_is_scalar: if self_type.compatibility_kind() != other_type.compatibility_kind(): result.add_modification(Modification.FIELD_KIND_ALTER) else: self.compare_message(self_type, other_type, result, types) else: result.add_modification(Modification.FIELD_KIND_ALTER) + + @classmethod + def compare_message(cls, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + from karapace.protobuf.message_element import MessageElement + self_type_record = types.get_self_type(self_type) + other_type_record = types.get_other_type(other_type) + self_type_element: MessageElement = self_type_record.type_element + other_type_element: MessageElement = other_type_record.type_element + + if types.self_type_short_name(self_type) != types.other_type_short_name(other_type): + result.add_modification(Modification.FIELD_NAME_ALTER) + else: + self_type_element.compare(other_type_element, result, types) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index a7f70d733..aeabda80d 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -1,7 +1,9 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt # compatibility routine added -from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_type_storage import CompareTypes +from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.type_element import TypeElement @@ -74,8 +76,8 @@ def to_schema(self) -> str: def compare(self, other: 'MessageElement', result: CompareResult, types: CompareTypes): if types.lock_message(self): - field: 'FieldElement' - subfield: 'FieldElement' + field: FieldElement + subfield: FieldElement one_of: OneOfElement self_tags: dict = dict() other_tags: dict = dict() @@ -95,7 +97,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare other_one_ofs[one_of.name] = one_of for field in other.one_ofs: - result.push_path(tag) + result.push_path(field.tag) for subfield in field.fields: tag = subfield.tag if self_tags.get(tag): @@ -103,7 +105,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare result.add_modification(Modification.FIELD_CONVERTED_TO_ONE_OF) result.pop_path() - ''' Compare fields ''' + # Compare fields for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): result.push_path(tag) @@ -115,7 +117,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare self_tags[tag].compare(other_tags[tag], result, types) result.pop_path() - ''' Compare OneOfs ''' + # Compare OneOfs for name in list(self_one_ofs.keys()) + list(set(other_one_ofs.keys()) - set(self_one_ofs.keys())): result.push_path(name) diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index f8dd6d67c..a4367aa91 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -1,6 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OneOfElement.kt -from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.utils import append_documentation, append_indented @@ -42,7 +43,7 @@ def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTy other_tags[field.tag] = field for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): - result.push_path(tag) + result.push_path(tag.__str__()) if self_tags.get(tag) is None: result.add_modification(Modification.ONE_OF_FIELD_ADD) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index ae4239a01..2d266c46c 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -1,6 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt -from karapace.protobuf.compare_restult import CompareResult, CompareTypes, Modification +from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_element import EnumElement from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.location import Location @@ -11,16 +12,16 @@ class ProtoFileElement: def __init__( - self, - location: Location, - package_name: str = None, - syntax: Syntax = None, - imports: list = None, - public_imports: list = None, - types=None, - services: list = None, - extend_declarations: list = None, - options: list = None + self, + location: Location, + package_name: str = None, + syntax: Syntax = None, + imports: list = None, + public_imports: list = None, + types=None, + services: list = None, + extend_declarations: list = None, + options: list = None ): if types is None: types = [] @@ -168,4 +169,3 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe result.pop_path() return result - diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index b14b3524a..c145f8208 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -4,10 +4,10 @@ Names a protocol buffer message, enumerated type, service, map, or a scalar. This class models a fully-qualified name using the protocol buffer package. """ +from enum import auto, Enum from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.kotlin_wrapper import check, require from karapace.protobuf.option_element import OptionElement -from enum import Enum, auto from typing import Optional @@ -70,13 +70,13 @@ def static_init(cls): cls.SCALAR_TYPES[a.string] = a cls.NUMERIC_SCALAR_TYPES: tuple = ( - cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, cls.SFIXED64, - cls.SINT32, + cls.DOUBLE, cls.FLOAT, cls.FIXED32, cls.FIXED64, cls.INT32, cls.INT64, cls.SFIXED32, cls.SFIXED64, cls.SINT32, cls.SINT64, cls.UINT32, cls.UINT64 ) - def __init__(self, is_scalar: bool, string: str, key_type: Optional['ProtoType'] = None, - value_type: Optional['ProtoType'] = None): + def __init__( + self, is_scalar: bool, string: str, key_type: Optional['ProtoType'] = None, value_type: Optional['ProtoType'] = None + ): """ Creates a scalar or message type. """ if not key_type and not value_type: self.is_scalar = is_scalar @@ -157,7 +157,7 @@ def get(enclosing_type_or_package: str, type_name: str) -> 'ProtoType': @staticmethod def get2(name: str) -> 'ProtoType': - scalar = ProtoType.SCALAR_TYPES[name] + scalar = ProtoType.SCALAR_TYPES.get(name) if scalar: return scalar require(name and len(name) != 0 and name.rfind("#") == -1, f"unexpected name: {name}") @@ -173,8 +173,8 @@ def get2(name: str) -> 'ProtoType': def get3(key_type: 'ProtoType', value_type: 'ProtoType', name: str) -> object: return ProtoType(False, name, key_type, value_type) - """ schmea compatibility check functionality karapace addon """ - """ Based on table https://developers.google.com/protocol-buffers/docs/proto3#scalar """ + # schema compatibility check functionality karapace addon + # Based on table https://developers.google.com/protocol-buffers/docs/proto3#scalar """ class CompatibilityKind(Enum): VARIANT = auto() @@ -193,26 +193,19 @@ def compatibility_kind(self) -> 'ProtoType.CompatibilityKind': "uint32": ProtoType.CompatibilityKind.VARIANT, "uint64": ProtoType.CompatibilityKind.VARIANT, "bool": ProtoType.CompatibilityKind.VARIANT, - "sint32": ProtoType.CompatibilityKind.SVARIANT, "sint64": ProtoType.CompatibilityKind.SVARIANT, - "double": ProtoType.CompatibilityKind.DOUBLE, # it is compatible by size with FIXED64 - "fixed64": ProtoType.CompatibilityKind.FIXED64, "sfixed64": ProtoType.CompatibilityKind.FIXED64, - "float": ProtoType.CompatibilityKind.FLOAT, # it is compatible by size with FIXED32 - "fixed32": ProtoType.CompatibilityKind.FIXED32, "sfixed32": ProtoType.CompatibilityKind.FIXED32, - "string": ProtoType.CompatibilityKind.LENGTH_DELIMITED, "bytes": ProtoType.CompatibilityKind.LENGTH_DELIMITED, - }.get(self.simple_name) if result: return result - else: - raise IllegalArgumentException(f"undefined type: {self.simple_name}") + + raise IllegalArgumentException(f"undefined type: {self.simple_name}") diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index d3fdb1fec..e70af8d03 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -60,7 +60,6 @@ def append_indented(data: list, value: str) -> None: data.append("\n") - MIN_TAG_VALUE = 1 MAX_TAG_VALUE = ((1 << 29) & 0xffffffffffffffff) - 1 # 536,870,911 @@ -68,7 +67,6 @@ def append_indented(data: list, value: str) -> None: RESERVED_TAG_VALUE_END = 19999 """ True if the supplied value is in the valid tag range and not reserved. """ - # class MyInt(int): # def is_valid_tag(self) -> bool: # return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or\ diff --git a/tests/unit/test_compare_elements.py b/tests/unit/test_compare_elements.py new file mode 100644 index 000000000..f66b4674a --- /dev/null +++ b/tests/unit/test_compare_elements.py @@ -0,0 +1,76 @@ +from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_type_storage import CompareTypes +from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.location import Location +from karapace.protobuf.one_of_element import OneOfElement +from karapace.protobuf.option_element import OptionElement + +location: Location = Location.get("some/folder", "file.proto") + + +def test_compare_oneof(): + self_one_of = OneOfElement( + name="page_info", + fields=[ + FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2), + FieldElement(location=location.at(5, 5), element_type="int32", name="result_per_page", tag=3) + ], + ) + + other_one_of = OneOfElement( + name="info", + fields=[ + FieldElement(location=location.at(4, 5), element_type="int32", name="page_number", tag=2), + FieldElement(location=location.at(5, 5), element_type="int32", name="result_per_page", tag=3), + FieldElement(location=location.at(6, 5), element_type="int32", name="view", tag=4) + ], + ) + + types = CompareTypes() + result = CompareResult() + self_one_of.compare(other_one_of, result, types) + assert not result.is_compatible() + assert len(result.result) == 1 + result2: list = [] + for e in result.result: + result2.append(e.modification) + assert Modification.ONE_OF_FIELD_ADD in result2 + + +def test_compare_field(): + self_field = FieldElement( + location=location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="test", + tag=3, + options=[ + OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true"), + OptionElement("delay", OptionElement.Kind.NUMBER, "200", True) + ] + ) + + other_field = FieldElement( + location=location.at(4, 3), + label=Field.Label.OPTIONAL, + element_type="bool", + name="best", + tag=3, + options=[ + OptionElement("old_default", OptionElement.Kind.BOOLEAN, "true"), + OptionElement("delay", OptionElement.Kind.NUMBER, "200", True) + ] + ) + + types = CompareTypes() + result = CompareResult() + self_field.compare(other_field, result, types) + + assert result.is_compatible() + assert len(result.result) == 1 + result2: list = [] + for e in result.result: + result2.append(e.modification) + + assert Modification.FIELD_NAME_ALTER in result2 diff --git a/tests/unit/test_compatibility.py b/tests/unit/test_compatibility.py index 01c2a917d..e80f5288a 100644 --- a/tests/unit/test_compatibility.py +++ b/tests/unit/test_compatibility.py @@ -12,7 +12,7 @@ def test_compatibility_package(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str = 1; | } | string test = 1; @@ -24,7 +24,7 @@ def test_compatibility_package(): |syntax = "proto3"; |package a2; |message TestMessage { - | message Value { + | message Value { | string str = 1; | } | string test = 1; @@ -38,7 +38,7 @@ def test_compatibility_package(): other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) result = CompareResult() self_parsed.compare(other_parsed, result) - assert result.iscompatible() + assert result.is_compatible() def test_compatibility_field_add(): @@ -46,7 +46,7 @@ def test_compatibility_field_add(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str = 1; | } | string test = 1; @@ -58,7 +58,7 @@ def test_compatibility_field_add(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str = 1; | string str2 = 2; | } @@ -71,8 +71,9 @@ def test_compatibility_field_add(): other_schema = trim_margin(other_schema) self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) - result = self_parsed.compare(other_parsed) - assert result.iscompatible() + result = CompareResult() + self_parsed.compare(other_parsed, result) + assert result.is_compatible() def test_compatibility_field_drop(): @@ -80,7 +81,7 @@ def test_compatibility_field_drop(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str = 1; | string str2 = 2; | } @@ -93,7 +94,7 @@ def test_compatibility_field_drop(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str = 1; | } | string test = 1; @@ -105,8 +106,9 @@ def test_compatibility_field_drop(): other_schema = trim_margin(other_schema) self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) - result = self_parsed.compare(other_parsed) - assert result.iscompatible() + result = CompareResult() + self_parsed.compare(other_parsed, result) + assert result.is_compatible() def test_compatibility_field_add_drop(): @@ -114,7 +116,7 @@ def test_compatibility_field_add_drop(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str2 = 1; | } | string test = 1; @@ -126,7 +128,7 @@ def test_compatibility_field_add_drop(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str = 1; | } | string test = 1; @@ -138,8 +140,9 @@ def test_compatibility_field_add_drop(): other_schema = trim_margin(other_schema) self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) - result = self_parsed.compare(other_parsed) - assert result.iscompatible() + result = CompareResult() + self_parsed.compare(other_parsed, result) + assert result.is_compatible() def test_compatibility_enum_add(): @@ -147,7 +150,7 @@ def test_compatibility_enum_add(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { + | message Value { | string str2 = 1; | int32 x = 2; | } @@ -160,15 +163,15 @@ def test_compatibility_enum_add(): |syntax = "proto3"; |package a1; |message TestMessage { - | message Value { - | string str = 1; - | Enu x = 2; + | message Value { + | string str2 = 1; + | Enu x = 2; | } | string test = 1; | .a1.TestMessage.Value val = 2; | enum Enu { | A = 0; - | B = 1; + | B = 1; | } |} |""" @@ -177,5 +180,7 @@ def test_compatibility_enum_add(): other_schema = trim_margin(other_schema) self_parsed: ProtoFileElement = ProtoParser.parse(location, self_schema) other_parsed: ProtoFileElement = ProtoParser.parse(location, other_schema) - result = self_parsed.compare(other_parsed) - assert result.iscompatible() + + result = CompareResult() + self_parsed.compare(other_parsed, result) + assert result.is_compatible() diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 2fce63f5b..5ff927ec7 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -1,6 +1,6 @@ +from karapace.protobuf.compare_restult import CompareResult from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location -from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.schema_reader import SchemaType, TypedSchema from tests.schemas.protobuf import ( schema_protobuf_compare_one, schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_schema_registry1 @@ -27,17 +27,19 @@ def test_protobuf_schema_sort(): def test_protobuf_schema_compare(): proto1 = trim_margin(schema_protobuf_order_after) - protobuf_schema1: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto1) + protobuf_schema1: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1) proto2 = trim_margin(schema_protobuf_compare_one) - protobuf_schema2: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto2) - result = protobuf_schema1.compatible(protobuf_schema2) - assert result is True + protobuf_schema2: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2) + result = CompareResult() + protobuf_schema1.schema.schema.compare(protobuf_schema2.schema.schema, result) + assert result.is_compatible() -def test_protobuf_schema_compare(): +def test_protobuf_schema_compare2(): proto1 = trim_margin(schema_protobuf_order_after) - protobuf_schema1: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto1) + protobuf_schema1: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1) proto2 = trim_margin(schema_protobuf_compare_one) - protobuf_schema2: ProtoFileElement = TypedSchema.parse(SchemaType.PROTOBUF, proto2) - result = protobuf_schema2.compatible(protobuf_schema1) - assert result is False + protobuf_schema2: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2) + result = CompareResult() + protobuf_schema2.schema.schema.compare(protobuf_schema1.schema.schema, result) + assert result.is_compatible() From 810b54dba130c9e581bf4e4accb3678c76a3e1dd Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 12 Oct 2021 15:03:58 +0300 Subject: [PATCH 042/168] debugging workaround --- karapace/protobuf/compare_type_storage.py | 19 +++++++++------- karapace/protobuf/field_element.py | 27 ++++++++++------------- karapace/protobuf/proto_file_element.py | 2 +- tests/unit/test_compare_elements.py | 4 ++-- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index d7a2ece8e..bb366a7f8 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -1,20 +1,21 @@ from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.type_element import TypeElement +from karapace.protobuf.compare_restult import CompareResult from typing import Optional - class CompareTypes: - def __init__(self): + def __init__(self, self_package_name: str, other_package_name: str, result: CompareResult): - self.self_package_name = '' - self.other_package_name = '' + self.self_package_name = self_package_name + self.other_package_name = other_package_name self.self_canonical_name: list = [] self.other_canonical_name: list = [] - self.self_types = dict() - self.other_types = dict() - self.locked_messages = [] - self.environment = [] + self.self_types: dict = dict() + self.other_types: dict = dict() + self.locked_messages: list = [] + self.environment: list = [] + self.result = result def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict): name: str @@ -82,6 +83,7 @@ def self_type_name(self, t: ProtoType): t = self.self_types.get(pretender) if t is not None: return pretender + canonical_name.pop() if self.self_types.get(string) is not None: return string return None @@ -102,6 +104,7 @@ def other_type_name(self, t: ProtoType): t = self.other_types.get(pretender) if t is not None: return pretender + canonical_name.pop() if self.other_types.get(string) is not None: return string return None diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 68fdda983..10168000b 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -2,7 +2,6 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/FieldElement.kt from karapace.protobuf.compare_restult import CompareResult, Modification from karapace.protobuf.compare_type_storage import TypeRecordMap - from karapace.protobuf.field import Field from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement @@ -14,16 +13,16 @@ class FieldElement: from karapace.protobuf.compare_type_storage import CompareTypes def __init__( - self, - location: Location, - label: Field.Label = None, - element_type: str = "", - name: str = None, - default_value: str = None, - json_name: str = None, - tag: int = None, - documentation: str = "", - options: list = None + self, + location: Location, + label: Field.Label = None, + element_type: str = "", + name: str = None, + default_value: str = None, + json_name: str = None, + tag: int = None, + documentation: str = "", + options: list = None ): self.location = location self.label = label @@ -96,12 +95,10 @@ def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: Comp if isinstance(other_type_record, TypeRecordMap): other_type = other_type_record.map_type() - if self_type.is_scalar or (self_type_record - and isinstance(self_type_record.type_element, EnumElement)): + if self_type.is_scalar or (self_type_record and isinstance(self_type_record.type_element, EnumElement)): self_is_scalar = True - if other_type.is_scalar or (other_type_record - and isinstance(other_type_record.type_element, EnumElement)): + if other_type.is_scalar or (other_type_record and isinstance(other_type_record.type_element, EnumElement)): other_is_scalar = True if self_is_scalar == other_is_scalar and \ diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 2d266c46c..b90c6522b 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -109,7 +109,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe other_indexes: dict = dict() i = 0 - compare_types = CompareTypes() + compare_types = CompareTypes(self.package_name, other.package_name) type_: TypeElement for type_ in self.types: self_types[type_.name] = type_ diff --git a/tests/unit/test_compare_elements.py b/tests/unit/test_compare_elements.py index f66b4674a..827674476 100644 --- a/tests/unit/test_compare_elements.py +++ b/tests/unit/test_compare_elements.py @@ -27,7 +27,7 @@ def test_compare_oneof(): ], ) - types = CompareTypes() + types = CompareTypes('', '') result = CompareResult() self_one_of.compare(other_one_of, result, types) assert not result.is_compatible() @@ -63,7 +63,7 @@ def test_compare_field(): ] ) - types = CompareTypes() + types = CompareTypes('', '') result = CompareResult() self_field.compare(other_field, result, types) From 1fc7f39cdfcd5356dd32d2009f784a0863bda28b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 12 Oct 2021 15:06:51 +0300 Subject: [PATCH 043/168] fixup file name --- karapace/protobuf/{compare_restult.py => compare_result.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename karapace/protobuf/{compare_restult.py => compare_result.py} (100%) diff --git a/karapace/protobuf/compare_restult.py b/karapace/protobuf/compare_result.py similarity index 100% rename from karapace/protobuf/compare_restult.py rename to karapace/protobuf/compare_result.py From cc1266d59889b9c56b19ce0be86dfb49ec5a40eb Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 12 Oct 2021 17:34:44 +0300 Subject: [PATCH 044/168] fixup bugs with tests --- karapace/compatibility/protobuf/checks.py | 2 +- karapace/protobuf/compare_result.py | 9 ++++++-- karapace/protobuf/compare_type_storage.py | 13 +++++------ karapace/protobuf/enum_element.py | 5 ++-- karapace/protobuf/field_element.py | 17 ++++++++++---- karapace/protobuf/message_element.py | 2 +- karapace/protobuf/one_of_element.py | 2 +- karapace/protobuf/proto_file_element.py | 28 +++++++++-------------- karapace/protobuf/proto_type.py | 4 +++- tests/unit/test_compare_elements.py | 6 ++--- tests/unit/test_compatibility.py | 2 +- tests/unit/test_protobuf_schema.py | 2 +- 12 files changed, 51 insertions(+), 41 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index d9be4eac7..ca1112048 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -1,6 +1,6 @@ # TODO: PROTOBUF* this functionality must be implemented from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaCompatibilityType -from karapace.protobuf.compare_restult import CompareResult, ModificationRecord +from karapace.protobuf.compare_result import CompareResult, ModificationRecord from karapace.schema_reader import SchemaType, TypedSchema diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index 5ec76d302..878ad8f2d 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -55,11 +55,16 @@ class CompareResult: def __init__(self): self.result: list = [] self.path: list = [] + self.canonical_name: list = [] - def push_path(self, string: str): + def push_path(self, string: str, canonical: bool = False): + if canonical: + self.canonical_name.append(str(string)) self.path.append(str(string)) - def pop_path(self): + def pop_path(self, canonical: bool = False): + if canonical: + self.canonical_name.pop() self.path.pop() def add_modification(self, modification: Modification): diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index bb366a7f8..03667116d 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -1,16 +1,15 @@ +from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.type_element import TypeElement -from karapace.protobuf.compare_restult import CompareResult from typing import Optional + class CompareTypes: def __init__(self, self_package_name: str, other_package_name: str, result: CompareResult): self.self_package_name = self_package_name self.other_package_name = other_package_name - self.self_canonical_name: list = [] - self.other_canonical_name: list = [] self.self_types: dict = dict() self.other_types: dict = dict() self.locked_messages: list = [] @@ -70,13 +69,13 @@ def get_other_type(self, t: ProtoType) -> Optional['TypeRecord']: def self_type_name(self, t: ProtoType): string: str = t.string name: str - canonical_name: list = list(self.self_canonical_name) + canonical_name: list = list(self.result.path) if string[0] == '.': name = string[1:] if self.self_types.get(name): return name return None - if self.self_package_name != '': + if self.self_package_name: canonical_name.insert(0, self.self_package_name) while len(canonical_name) > 0: pretender: str = ".".join(canonical_name) + '.' + string @@ -91,13 +90,13 @@ def self_type_name(self, t: ProtoType): def other_type_name(self, t: ProtoType): string: str = t.string name: str - canonical_name: list = list(self.other_canonical_name) + canonical_name: list = list(self.result.path) if string[0] == '.': name = string[1:] if self.other_types.get(name): return name return None - if self.other_package_name != '': + if self.other_package_name: canonical_name.insert(0, self.other_package_name) while len(canonical_name) > 0: pretender: str = ".".join(canonical_name) + '.' + string diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 92a8b4e21..bdccf476f 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -1,6 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt -from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.location import Location @@ -47,7 +47,7 @@ def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTyp other_tags[constant.tag] = constant for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): - + result.push_path(tag.__str__()) if self_tags.get(tag) is None: result.add_modification(Modification.ENUM_CONSTANT_ADD) elif other_tags.get(tag) is None: @@ -55,3 +55,4 @@ def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTyp else: if self_tags.get(tag).name == other_tags.get(tag).name: result.add_modification(Modification.ENUM_CONSTANT_ALTER) + result.pop_path() diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 10168000b..c683508ff 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -1,6 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/FieldElement.kt -from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import TypeRecordMap from karapace.protobuf.field import Field from karapace.protobuf.location import Location @@ -95,10 +95,19 @@ def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: Comp if isinstance(other_type_record, TypeRecordMap): other_type = other_type_record.map_type() - if self_type.is_scalar or (self_type_record and isinstance(self_type_record.type_element, EnumElement)): + self_is_enum: bool = False + other_is_enum: bool = False + + if self_type_record and isinstance(self_type_record.type_element, EnumElement): + self_is_enum = True + + if other_type_record and isinstance(other_type_record.type_element, EnumElement): + other_is_enum = True + + if self_type.is_scalar or self_is_enum: self_is_scalar = True - if other_type.is_scalar or (other_type_record and isinstance(other_type_record.type_element, EnumElement)): + if other_type.is_scalar or other_is_enum: other_is_scalar = True if self_is_scalar == other_is_scalar and \ @@ -106,7 +115,7 @@ def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: Comp if self_type.is_map: self.compare_map(self_type, other_type, result, types) elif self_is_scalar: - if self_type.compatibility_kind() != other_type.compatibility_kind(): + if self_type.compatibility_kind(self_is_enum) != other_type.compatibility_kind(other_is_enum): result.add_modification(Modification.FIELD_KIND_ALTER) else: self.compare_message(self_type, other_type, result, types) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index aeabda80d..62ad829b2 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -1,7 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt # compatibility routine added -from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index a4367aa91..7be1ce371 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -1,6 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OneOfElement.kt -from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.utils import append_documentation, append_indented diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index b90c6522b..7d621803c 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -1,9 +1,9 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt -from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_element import EnumElement -from karapace.protobuf.exception import IllegalArgumentException +from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.syntax import Syntax @@ -107,16 +107,16 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe other_types: dict = dict() self_indexes: dict = dict() other_indexes: dict = dict() - i = 0 - - compare_types = CompareTypes(self.package_name, other.package_name) + compare_types = CompareTypes(self.package_name, other.package_name, result) type_: TypeElement + i = 0 for type_ in self.types: self_types[type_.name] = type_ self_indexes[type_.name] = i package_name = self.package_name if self.package_name else '' compare_types.add_self_type(package_name, type_) i += 1 + i = 0 for type_ in other.types: other_types[type_.name] = type_ @@ -127,7 +127,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for name in list(self_types.keys()) + list(set(other_types.keys()) - set(self_types.keys())): - result.push_path(name) + result.push_path(name, True) if self_types.get(name) is None and other_types.get(name) is not None: if isinstance(other_types[name], MessageElement): @@ -135,26 +135,21 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe elif isinstance(other_types[name], EnumElement): result.add_modification(Modification.ENUM_ADD) else: - # TODO: write message - raise IllegalArgumentException() + raise IllegalStateException("Instance of element is not applicable") elif self_types.get(name) is not None and other_types.get(name) is None: if isinstance(self_types[name], MessageElement): result.add_modification(Modification.MESSAGE_DROP) elif isinstance(self_types[name], EnumElement): result.add_modification(Modification.ENUM_DROP) else: - # TODO: write message - raise IllegalArgumentException() + raise IllegalStateException("Instance of element is not applicable") else: if other_indexes[name] != self_indexes[name]: if isinstance(self_types[name], MessageElement): - # is it still compatible? + # incompatible type result.add_modification(Modification.MESSAGE_MOVE) - # elif isinstance(self_types[name], EnumElement): - # result.add_modification(Modifications.ENUM_MOVE) else: - # TODO: write message - raise IllegalArgumentException() + raise IllegalStateException("Instance of element is not applicable") else: if isinstance(self_types[name], MessageElement) \ and isinstance(other_types[name], MessageElement): @@ -165,7 +160,6 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe else: # incompatible type result.add_modification(Modification.TYPE_ALTER) - - result.pop_path() + result.pop_path(True) return result diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index c145f8208..22b346919 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -185,7 +185,9 @@ class CompatibilityKind(Enum): DOUBLE = auto() FLOAT = auto() - def compatibility_kind(self) -> 'ProtoType.CompatibilityKind': + def compatibility_kind(self, is_enum: bool) -> 'ProtoType.CompatibilityKind': + if is_enum: + return ProtoType.CompatibilityKind.VARIANT result = { "int32": ProtoType.CompatibilityKind.VARIANT, diff --git a/tests/unit/test_compare_elements.py b/tests/unit/test_compare_elements.py index 827674476..bb3fe8e09 100644 --- a/tests/unit/test_compare_elements.py +++ b/tests/unit/test_compare_elements.py @@ -1,4 +1,4 @@ -from karapace.protobuf.compare_restult import CompareResult, Modification +from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement @@ -27,8 +27,8 @@ def test_compare_oneof(): ], ) - types = CompareTypes('', '') result = CompareResult() + types = CompareTypes('', '', result) self_one_of.compare(other_one_of, result, types) assert not result.is_compatible() assert len(result.result) == 1 @@ -63,8 +63,8 @@ def test_compare_field(): ] ) - types = CompareTypes('', '') result = CompareResult() + types = CompareTypes('', '', result) self_field.compare(other_field, result, types) assert result.is_compatible() diff --git a/tests/unit/test_compatibility.py b/tests/unit/test_compatibility.py index e80f5288a..8348a1b51 100644 --- a/tests/unit/test_compatibility.py +++ b/tests/unit/test_compatibility.py @@ -1,4 +1,4 @@ -from karapace.protobuf.compare_restult import CompareResult +from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location from karapace.protobuf.proto_file_element import ProtoFileElement diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 5ff927ec7..7b0578d0b 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -1,4 +1,4 @@ -from karapace.protobuf.compare_restult import CompareResult +from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location from karapace.schema_reader import SchemaType, TypedSchema From 008fb07157f223e6682bc98e6c67ab8a85fe75ec Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 15 Oct 2021 15:21:46 +0300 Subject: [PATCH 045/168] integration test workaround backup --- karapace/compatibility/protobuf/checks.py | 8 +- karapace/protobuf/schema.py | 11 ++- karapace/schema_reader.py | 3 +- tests/integration/test_schema_protobuf.py | 112 ++++++++++++++++++++++ 4 files changed, 127 insertions(+), 7 deletions(-) create mode 100644 tests/integration/test_schema_protobuf.py diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index ca1112048..1548acb71 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -1,14 +1,14 @@ # TODO: PROTOBUF* this functionality must be implemented from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaCompatibilityType from karapace.protobuf.compare_result import CompareResult, ModificationRecord +from karapace.protobuf.schema import ProtobufSchema from karapace.schema_reader import SchemaType, TypedSchema -def check_protobuf_schema_compatibility(reader: str, writer: str) -> SchemaCompatibilityResult: - reader_proto_file_element: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, reader).schema - writer_proto_file_element: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, writer).schema +def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: + result: CompareResult = CompareResult() - writer_proto_file_element.schema.schema.compare(reader_proto_file_element.schema.schema, result) + reader.compare(writer,result) if result.is_compatible(): return SchemaCompatibilityResult.compatible() # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 541622a89..72d1ea3ea 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -1,7 +1,9 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Schema.kt # Ported partially for required functionality. +from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.enum_element import EnumElement +from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.option_element import OptionElement @@ -108,9 +110,11 @@ class ProtobufSchema: DEFAULT_LOCATION = Location.get("") def __init__(self, schema: str): + if type(schema).__name__ != 'str': + raise IllegalArgumentException("Non str type of schema string") self.dirty = schema self.cache_string = "" - self.schema = ProtoParser.parse(self.DEFAULT_LOCATION, schema) + self.proto_file_element = ProtoParser.parse(self.DEFAULT_LOCATION, schema) def __str__(self) -> str: if not self.cache_string: @@ -123,7 +127,7 @@ def to_json(self) -> str: def to_schema(self): strings: list = [] - shm: ProtoFileElement = self.schema + shm: ProtoFileElement = self.proto_file_element if shm.syntax: strings.append("syntax = \"") strings.append(str(shm.syntax)) @@ -166,3 +170,6 @@ def to_schema(self): for service in shm.services: strings.append(str(service.to_schema())) return "".join(strings) + + def compare(self, other: 'ProtobufSchema', result: CompareResult): + self.proto_file_element.compare(other.proto_file_element, result) diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 81c907073..ac858cada 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -90,7 +90,8 @@ def parse_protobuf(schema_str: str): return ts # TypeError - Raised when the user forgets to encode the schema as a string. except Exception as e: # FIXME: bare exception - log.exception("Unexpected error:") + log.exception(f"Unexpected error: {e} \n schema:[{schema_str}]") + raise InvalidSchema from e @staticmethod diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py new file mode 100644 index 000000000..e5a3993e2 --- /dev/null +++ b/tests/integration/test_schema_protobuf.py @@ -0,0 +1,112 @@ +""" +karapace - schema tests + +Copyright (c) 2019 Aiven Ltd +See LICENSE for details +""" +from http import HTTPStatus +from kafka import KafkaProducer +from karapace import config +from karapace.protobuf.kotlin_wrapper import trim_margin +from karapace.rapu import is_success +from karapace.schema_registry_apis import KarapaceSchemaRegistry, SchemaErrorMessages +from karapace.utils import Client +from tests.utils import ( + create_field_name_factory, create_schema_name_factory, create_subject_name_factory, KafkaServers, + repeat_until_successful_request +) +from typing import List, Tuple + +import json as jsonlib +import os +import pytest +import requests + +baseurl = "http://localhost:8081" + + +def add_slashes(text: str) -> str: + escape_dict = { + '\a': '\\a', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', + '\v': '\\v', + '\'': "\\'", + '\"': '\\"', + '\\': '\\\\' + } + trans_table = str.maketrans(escape_dict) + return text.translate(trans_table) + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_protobuf_schema_compatibility(registry_async_client: Client, trail: str) -> None: + subject = create_subject_name_factory(f"test_protobuf_schema_compatibility-{trail}")() + + res = await registry_async_client.put(f"config/{subject}{trail}", json={"compatibility": "BACKWARD"}) + assert res.status == 200 + + original_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str2 = 1; + | int32 x = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + original_schema = trim_margin(original_schema) + + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", json={"schemaType": "PROTOBUF", "schema": original_schema} + ) + assert res.status == 200 + assert "id" in res.json() + + + evolved_schema = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str2 = 1; + | Enu x = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + | enum Enu { + | A = 0; + | B = 1; + | } + |} + |""" + evolved_schema = trim_margin(evolved_schema) + + res = await registry_async_client.post( + f"compatibility/subjects/{subject}/versions/latest{trail}", + json={"schemaType": "PROTOBUF", "schema": evolved_schema}, + ) + assert res.status == 200 + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", json={"schemaType": "PROTOBUF", "schema": evolved_schema} + ) + assert res.status == 200 + assert "id" in res.json() + +# res = await registry_async_client.post( +# f"compatibility/subjects/{subject}/versions/latest{trail}", +# json={"schemaType": "PROTOBUF", "schema": jsonlib.dumps(original_schema)}, +# ) +# assert res.status == 200 +# res = await registry_async_client.post( +# f"subjects/{subject}/versions{trail}", json={"schemaType": "PROTOBUF", "schema": original_schema} +# ) +# assert res.status == 200 +# assert "id" in res.json() From 1cead31e21607c28a91ad48029dd98b79c6cf57a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 17 Oct 2021 11:24:49 +0300 Subject: [PATCH 046/168] add more integration tests/ tests workarond --- karapace/compatibility/protobuf/checks.py | 3 +- karapace/schema_reader.py | 2 +- karapace/schema_registry_apis.py | 15 ++- requirements.txt | 21 ++++ tests/integration/test_schema_protobuf.py | 130 +++++++++++++++++----- tests/unit/test_protobuf_schema.py | 51 ++++++++- 6 files changed, 187 insertions(+), 35 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 1548acb71..3e71ffc5a 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -2,13 +2,12 @@ from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaCompatibilityType from karapace.protobuf.compare_result import CompareResult, ModificationRecord from karapace.protobuf.schema import ProtobufSchema -from karapace.schema_reader import SchemaType, TypedSchema def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: result: CompareResult = CompareResult() - reader.compare(writer,result) + reader.compare(writer, result) if result.is_compatible(): return SchemaCompatibilityResult.compatible() # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index ac858cada..96533afd7 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -90,7 +90,7 @@ def parse_protobuf(schema_str: str): return ts # TypeError - Raised when the user forgets to encode the schema as a string. except Exception as e: # FIXME: bare exception - log.exception(f"Unexpected error: {e} \n schema:[{schema_str}]") + log.exception("Unexpected error: %s \n schema:[%s]", e, schema_str) raise InvalidSchema from e diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 13cb2369f..22b94248e 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -840,10 +840,17 @@ def write_new_schema_local(self, subject, body, content_type): # We didn't find an existing schema and the schema is compatible so go and create one schema_id = self.ksr.get_schema_id(new_schema) version = max(self.ksr.subjects[subject]["schemas"]) + 1 - self.log.info( - "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, - new_schema.to_json(), schema_id - ) + if new_schema.schema_type is SchemaType.PROTOBUF: + self.log.info( + "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, + version, new_schema.__str__(), schema_id + ) + else: + self.log.info( + "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, + version, new_schema.to_json(), schema_id + ) + self.send_schema_message( subject=subject, schema=new_schema, diff --git a/requirements.txt b/requirements.txt index d4791526c..d2d52955d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,24 @@ networkx==2.5 # git+https://github.com/aiven/avro.git@513b153bac5040af6bba5847aef202adb680b67b#subdirectory=lang/py3/ git+git://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 + +pytest~=6.2.2 +pip~=21.0.1 +argparse~=1.4.0 +toml~=0.10.1 +zlib~=1.2.11 +wheel~=0.36.2 +openssl~=1.1.1j +py~=1.10.0 +attrs~=20.3.0 +avro-python3~=1.11.0+snapshot +idna~=2.10 +certifi~=2020.12.5 +chardet~=3.0.4 +urllib3~=1.25.11 +decorator~=4.4.2 +pyrsistent~=0.17.3 +six~=1.15.0 +setuptools~=50.3.0 +zipp~=3.4.0 +filelock~=3.0.12 diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index e5a3993e2..c2839e94b 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -4,26 +4,20 @@ Copyright (c) 2019 Aiven Ltd See LICENSE for details """ -from http import HTTPStatus -from kafka import KafkaProducer -from karapace import config from karapace.protobuf.kotlin_wrapper import trim_margin -from karapace.rapu import is_success -from karapace.schema_registry_apis import KarapaceSchemaRegistry, SchemaErrorMessages from karapace.utils import Client -from tests.utils import ( - create_field_name_factory, create_schema_name_factory, create_subject_name_factory, KafkaServers, - repeat_until_successful_request -) -from typing import List, Tuple - -import json as jsonlib -import os +from tests.utils import create_subject_name_factory + +import json +import logging import pytest import requests baseurl = "http://localhost:8081" +compatibility_test_url = "https://raw.githubusercontent.com/confluentinc/schema-registry/master/protobuf-provider/" + \ + "src/test/resources/diff-schema-examples.json" + def add_slashes(text: str) -> str: escape_dict = { @@ -42,6 +36,9 @@ def add_slashes(text: str) -> str: return text.translate(trans_table) +log = logging.getLogger(__name__) + + @pytest.mark.parametrize("trail", ["", "/"]) async def test_protobuf_schema_compatibility(registry_async_client: Client, trail: str) -> None: subject = create_subject_name_factory(f"test_protobuf_schema_compatibility-{trail}")() @@ -65,12 +62,14 @@ async def test_protobuf_schema_compatibility(registry_async_client: Client, trai original_schema = trim_margin(original_schema) res = await registry_async_client.post( - f"subjects/{subject}/versions{trail}", json={"schemaType": "PROTOBUF", "schema": original_schema} + f"subjects/{subject}/versions{trail}", json={ + "schemaType": "PROTOBUF", + "schema": original_schema + } ) assert res.status == 200 assert "id" in res.json() - evolved_schema = """ |syntax = "proto3"; |package a1; @@ -91,22 +90,99 @@ async def test_protobuf_schema_compatibility(registry_async_client: Client, trai res = await registry_async_client.post( f"compatibility/subjects/{subject}/versions/latest{trail}", - json={"schemaType": "PROTOBUF", "schema": evolved_schema}, + json={ + "schemaType": "PROTOBUF", + "schema": evolved_schema + }, ) assert res.status == 200 + assert res.json() == {"is_compatible": True} + res = await registry_async_client.post( - f"subjects/{subject}/versions{trail}", json={"schemaType": "PROTOBUF", "schema": evolved_schema} + f"subjects/{subject}/versions{trail}", json={ + "schemaType": "PROTOBUF", + "schema": evolved_schema + } ) assert res.status == 200 assert "id" in res.json() -# res = await registry_async_client.post( -# f"compatibility/subjects/{subject}/versions/latest{trail}", -# json={"schemaType": "PROTOBUF", "schema": jsonlib.dumps(original_schema)}, -# ) -# assert res.status == 200 -# res = await registry_async_client.post( -# f"subjects/{subject}/versions{trail}", json={"schemaType": "PROTOBUF", "schema": original_schema} -# ) -# assert res.status == 200 -# assert "id" in res.json() + res = await registry_async_client.post( + f"compatibility/subjects/{subject}/versions/latest{trail}", + json={ + "schemaType": "PROTOBUF", + "schema": original_schema + }, + ) + assert res.json() == {"is_compatible": True} + assert res.status == 200 + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", json={ + "schemaType": "PROTOBUF", + "schema": original_schema + } + ) + assert res.status == 200 + assert "id" in res.json() + + +class Schemas: + url = requests.get(compatibility_test_url) + sch = json.loads(url.text) + schemas = dict() + descriptions = [] + max_count = 120 + count = 0 + for a in sch: + descriptions.append(a["description"]) + schemas[a["description"]] = dict(a) + count +=1 + if a["description"] == 'Detect incompatible message index change': + break + if count == max_count: + break + + + +@pytest.mark.parametrize("trail", ["", "/"]) +@pytest.mark.parametrize("desc", Schemas.descriptions) +async def test_schema_registry_examples(registry_async_client: Client, trail: str, desc) -> None: + subject = create_subject_name_factory(f"test_protobuf_schema_compatibility-{trail}")() + + res = await registry_async_client.put(f"config/{subject}{trail}", json={"compatibility": "BACKWARD"}) + assert res.status == 200 + + description = desc + + schema = Schemas.schemas[description] + original_schema = schema["original_schema"] + evolved_schema = schema["update_schema"] + compatible = schema["compatible"] + + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", json={ + "schemaType": "PROTOBUF", + "schema": original_schema + } + ) + assert res.status == 200 + assert "id" in res.json() + + res = await registry_async_client.post( + f"compatibility/subjects/{subject}/versions/latest{trail}", + json={ + "schemaType": "PROTOBUF", + "schema": evolved_schema + }, + ) + assert res.status == 200 + assert res.json() == {"is_compatible": compatible} + + res = await registry_async_client.post( + f"subjects/{subject}/versions{trail}", json={ + "schemaType": "PROTOBUF", + "schema": evolved_schema + } + ) + assert res.status == 200 + assert "id" in res.json() diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 7b0578d0b..0b020be1f 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -1,9 +1,11 @@ from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location +from karapace.protobuf.schema import ProtobufSchema from karapace.schema_reader import SchemaType, TypedSchema from tests.schemas.protobuf import ( - schema_protobuf_compare_one, schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_schema_registry1 + schema_protobuf_compare_one, schema_protobuf_order_after, schema_protobuf_order_before, + schema_protobuf_schema_registry1 ) location: Location = Location.get("file.proto") @@ -43,3 +45,50 @@ def test_protobuf_schema_compare2(): result = CompareResult() protobuf_schema2.schema.schema.compare(protobuf_schema1.schema.schema, result) assert result.is_compatible() + + +def test_protobuf_schema_compare3(): + proto1 = """ + |syntax = "proto3"; + |package a1; + |message TestMessage { + | message Value { + | string str2 = 1; + | int32 x = 2; + | } + | string test = 1; + | .a1.TestMessage.Value val = 2; + |} + |""" + + proto1 = trim_margin(proto1) + + proto2 = """ + |syntax = "proto3"; + |package a1; + | + |message TestMessage { + | string test = 1; + | .a1.TestMessage.Value val = 2; + | + | message Value { + | string str2 = 1; + | Enu x = 2; + | } + | enum Enu { + | A = 0; + | B = 1; + | } + |} + |""" + + proto2 = trim_margin(proto2) + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert result.is_compatible() + + From c638b5a61caa1846539564ae2c408624477b2918 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 17 Oct 2021 19:51:58 +0300 Subject: [PATCH 047/168] add more tests/test driven bugfixes --- karapace/compatibility/protobuf/checks.py | 17 +- karapace/protobuf/compare_result.py | 6 +- karapace/protobuf/field_element.py | 28 ++- karapace/protobuf/message_element.py | 7 +- karapace/protobuf/proto_type.py | 4 +- karapace/protobuf/schema.py | 5 - requirements.txt | 21 -- tests/integration/test_schema_protobuf.py | 12 +- tests/unit/test_compare_elements.py | 2 +- tests/unit/test_protobuf_schema.py | 236 +++++++++++++++++++++- 10 files changed, 283 insertions(+), 55 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 3e71ffc5a..58c25cfcc 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -3,11 +3,17 @@ from karapace.protobuf.compare_result import CompareResult, ModificationRecord from karapace.protobuf.schema import ProtobufSchema +import logging + +log = logging.getLogger(__name__) -def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: +def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: result: CompareResult = CompareResult() - reader.compare(writer, result) + log.debug("READER: %s", reader.to_schema()) + log.debug("WRITER: %s", writer.to_schema()) + writer.compare(reader, result) + log.debug("IS_COMPATIBLE %s", result.is_compatible()) if result.is_compatible(): return SchemaCompatibilityResult.compatible() # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? @@ -17,9 +23,10 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: Protobuf locations: set = set() messages: set = set() for record in result.result: - incompatibilities.append(record.modification.__str__()) - locations.add(record.path) - messages.add(record.message) + if not record.modification.is_compatible(): + incompatibilities.append(record.modification.__str__()) + locations.add(record.path) + messages.add(record.message) return SchemaCompatibilityResult( compatibility=SchemaCompatibilityType.incompatible, diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index 878ad8f2d..a9c7f26af 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -27,14 +27,14 @@ class Modification(Enum): ONE_OF_FIELD_ADD = auto() ONE_OF_FIELD_DROP = auto() ONE_OF_FIELD_MOVE = auto() - FIELD_CONVERTED_TO_ONE_OF = auto() + FEW_FIELDS_CONVERTED_TO_ONE_OF = auto() # protobuf compatibility issues is described in at # https://yokota.blog/2021/08/26/understanding-protobuf-compatibility/ def is_compatible(self) -> bool: return self not in [ - self.FIELD_LABEL_ALTER, self.FIELD_KIND_ALTER, self.ONE_OF_FIELD_ADD, self.ONE_OF_FIELD_DROP, - self.FIELD_CONVERTED_TO_ONE_OF + self.MESSAGE_MOVE, self.MESSAGE_DROP, self.FIELD_LABEL_ALTER, self.FIELD_KIND_ALTER, self.FIELD_TYPE_ALTER, + self.ONE_OF_FIELD_DROP, self.FEW_FIELDS_CONVERTED_TO_ONE_OF ] diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index c683508ff..31043e19c 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -73,16 +73,16 @@ def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTy if self.name != other.name: result.add_modification(Modification.FIELD_NAME_ALTER) - if self.label != other.label: - result.add_modification(Modification.FIELD_LABEL_ALTER) - self.compare_type(ProtoType.get2(self.element_type), ProtoType.get2(other.element_type), result, types) + self.compare_type(ProtoType.get2(self.element_type), ProtoType.get2(other.element_type), other.label, result, types) def compare_map(self, self_map: ProtoType, other_map: ProtoType, result: CompareResult, types: CompareTypes): - self.compare_type(self_map.key_type, other_map.key_type, result, types) - self.compare_type(self_map.value_type, other_map.value_type, result, types) + self.compare_type(self_map.key_type, other_map.key_type, "", result, types) + self.compare_type(self_map.value_type, other_map.value_type, "", result, types) - def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + def compare_type( + self, self_type: ProtoType, other_type: ProtoType, other_label: str, result: CompareResult, types: CompareTypes + ): from karapace.protobuf.enum_element import EnumElement self_type_record = types.get_self_type(self_type) other_type_record = types.get_other_type(other_type) @@ -109,13 +109,25 @@ def compare_type(self, self_type: ProtoType, other_type: ProtoType, result: Comp if other_type.is_scalar or other_is_enum: other_is_scalar = True - if self_is_scalar == other_is_scalar and \ self_type.is_map == other_type.is_map: if self_type.is_map: self.compare_map(self_type, other_type, result, types) elif self_is_scalar: - if self_type.compatibility_kind(self_is_enum) != other_type.compatibility_kind(other_is_enum): + self_compatibility_kind = self_type.compatibility_kind(self_is_enum) + other_compatibility_kind = other_type.compatibility_kind(other_is_enum) + if other_label == '': + other_label = None + if self.label != other_label \ + and self_compatibility_kind in \ + [ProtoType.CompatibilityKind.VARIANT, + ProtoType.CompatibilityKind.DOUBLE, + ProtoType.CompatibilityKind.FLOAT, + ProtoType.CompatibilityKind.FIXED64, + ProtoType.CompatibilityKind.FIXED32, + ProtoType.CompatibilityKind.SVARIANT]: + result.add_modification(Modification.FIELD_LABEL_ALTER) + if self_compatibility_kind != other_compatibility_kind: result.add_modification(Modification.FIELD_KIND_ALTER) else: self.compare_message(self_type, other_type, result, types) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 62ad829b2..cec8299fa 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -97,12 +97,15 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare other_one_ofs[one_of.name] = one_of for field in other.one_ofs: - result.push_path(field.tag) + result.push_path(field.name) + convert_count = 0 for subfield in field.fields: tag = subfield.tag if self_tags.get(tag): self_tags.pop(tag) - result.add_modification(Modification.FIELD_CONVERTED_TO_ONE_OF) + convert_count += 1 + if convert_count > 1: + result.add_modification(Modification.FEW_FIELDS_CONVERTED_TO_ONE_OF) result.pop_path() # Compare fields diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 22b346919..3a1fa4996 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -87,7 +87,7 @@ def __init__( """ The type of the map's values. Only present when [is_map] is True. """ self.value_type = None else: - if key_type.is_scalar() and key_type != self.BYTES and key_type != self.DOUBLE and key_type != self.FLOAT: + if key_type.is_scalar and key_type != self.BYTES and key_type != self.DOUBLE and key_type != self.FLOAT: self.is_scalar = False self.string = string self.is_map = True @@ -165,7 +165,7 @@ def get2(name: str) -> 'ProtoType': comma = name.rfind(",") require(comma != -1, f"expected ',' in map type: {name}") key = ProtoType.get2(name[4:comma].strip()) - value = ProtoType.get2(name[comma + 1:len(name)].strip()) + value = ProtoType.get2(name[comma + 1:len(name) - 1].strip()) return ProtoType(False, name, key, value) return ProtoType(False, name) diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 72d1ea3ea..0e312c369 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -11,10 +11,6 @@ from karapace.protobuf.proto_parser import ProtoParser from karapace.protobuf.utils import append_documentation, append_indented -import logging - -log = logging.getLogger(__name__) - def add_slashes(text: str) -> str: escape_dict = { @@ -119,7 +115,6 @@ def __init__(self, schema: str): def __str__(self) -> str: if not self.cache_string: self.cache_string = self.to_schema() - log.warning("CACHE_STRING:%s", self.cache_string) return self.cache_string def to_json(self) -> str: diff --git a/requirements.txt b/requirements.txt index d2d52955d..d4791526c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,24 +17,3 @@ networkx==2.5 # git+https://github.com/aiven/avro.git@513b153bac5040af6bba5847aef202adb680b67b#subdirectory=lang/py3/ git+git://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 - -pytest~=6.2.2 -pip~=21.0.1 -argparse~=1.4.0 -toml~=0.10.1 -zlib~=1.2.11 -wheel~=0.36.2 -openssl~=1.1.1j -py~=1.10.0 -attrs~=20.3.0 -avro-python3~=1.11.0+snapshot -idna~=2.10 -certifi~=2020.12.5 -chardet~=3.0.4 -urllib3~=1.25.11 -decorator~=4.4.2 -pyrsistent~=0.17.3 -six~=1.15.0 -setuptools~=50.3.0 -zipp~=3.4.0 -filelock~=3.0.12 diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index c2839e94b..9fbef76fe 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -134,16 +134,16 @@ class Schemas: max_count = 120 count = 0 for a in sch: + # if a["description"] == "Detect compatible add field to oneof": descriptions.append(a["description"]) schemas[a["description"]] = dict(a) - count +=1 + count += 1 if a["description"] == 'Detect incompatible message index change': break if count == max_count: break - @pytest.mark.parametrize("trail", ["", "/"]) @pytest.mark.parametrize("desc", Schemas.descriptions) async def test_schema_registry_examples(registry_async_client: Client, trail: str, desc) -> None: @@ -184,5 +184,9 @@ async def test_schema_registry_examples(registry_async_client: Client, trail: st "schema": evolved_schema } ) - assert res.status == 200 - assert "id" in res.json() + + if compatible: + assert res.status == 200 + assert "id" in res.json() + else: + assert res.status == 409 diff --git a/tests/unit/test_compare_elements.py b/tests/unit/test_compare_elements.py index bb3fe8e09..6c236229a 100644 --- a/tests/unit/test_compare_elements.py +++ b/tests/unit/test_compare_elements.py @@ -30,7 +30,7 @@ def test_compare_oneof(): result = CompareResult() types = CompareTypes('', '', result) self_one_of.compare(other_one_of, result, types) - assert not result.is_compatible() + assert result.is_compatible() assert len(result.result) == 1 result2: list = [] for e in result.result: diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 0b020be1f..b15768173 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -4,8 +4,7 @@ from karapace.protobuf.schema import ProtobufSchema from karapace.schema_reader import SchemaType, TypedSchema from tests.schemas.protobuf import ( - schema_protobuf_compare_one, schema_protobuf_order_after, schema_protobuf_order_before, - schema_protobuf_schema_registry1 + schema_protobuf_compare_one, schema_protobuf_order_after, schema_protobuf_order_before, schema_protobuf_schema_registry1 ) location: Location = Location.get("file.proto") @@ -33,7 +32,7 @@ def test_protobuf_schema_compare(): proto2 = trim_margin(schema_protobuf_compare_one) protobuf_schema2: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2) result = CompareResult() - protobuf_schema1.schema.schema.compare(protobuf_schema2.schema.schema, result) + protobuf_schema1.schema.compare(protobuf_schema2.schema, result) assert result.is_compatible() @@ -43,7 +42,7 @@ def test_protobuf_schema_compare2(): proto2 = trim_margin(schema_protobuf_compare_one) protobuf_schema2: TypedSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2) result = CompareResult() - protobuf_schema2.schema.schema.compare(protobuf_schema1.schema.schema, result) + protobuf_schema2.schema.compare(protobuf_schema1.schema, result) assert result.is_compatible() @@ -92,3 +91,232 @@ def test_protobuf_schema_compare3(): assert result.is_compatible() +def test_protobuf_message_compatible_label_alter(): + proto1 = """ + |syntax = "proto3"; + |message Goods { + | optional Packet record = 1; + | string driver = 2; + | message Packet { + | bytes order = 1; + | } + |} + |""" + proto1 = trim_margin(proto1) + + proto2 = """ + |syntax = "proto3"; + |message Goods { + | repeated Packet record = 1; + | string driver = 2; + | message Packet { + | bytes order = 1; + | } + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert result.is_compatible() + + +def test_protobuf_field_type_incompatible_alter(): + proto1 = """ + |syntax = "proto3"; + |message Goods { + | string order = 1; + | map items_int32 = 2; + |} + |""" + proto1 = trim_margin(proto1) + + proto2 = """ + |syntax = "proto3"; + |message Goods { + | string order = 1; + | map items_string = 2; + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert not result.is_compatible() + + +def test_protobuf_field_label_compatible_alter(): + proto1 = """ + |syntax = "proto3"; + |message Goods { + | optional string driver = 1; + | Order order = 2; + | message Order { + | string item = 1; + | } + |} + |""" + + proto1 = trim_margin(proto1) + proto2 = """ + |syntax = "proto3"; + |message Goods { + | repeated string driver = 1; + | Order order = 2; + | message Order { + | string item = 1; + | } + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert result.is_compatible() + + +def test_protobuf_field_incompatible_drop_from_oneof(): + proto1 = """ + |syntax = "proto3"; + |message Goods { + | oneof item { + | string name_a = 1; + | string name_b = 2; + | int32 id = 3; + | } + |} + |""" + + proto1 = trim_margin(proto1) + proto2 = """ + |syntax = "proto3"; + |message Goods { + | oneof item { + | string name_a = 1; + | string name_b = 2; + | } + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert not result.is_compatible() + + +def test_protobuf_field_incompatible_alter_to_oneof(): + proto1 = """ + |syntax = "proto3"; + |message Goods { + | string name = 1; + | string reg_name = 2; + |} + |""" + + proto1 = trim_margin(proto1) + proto2 = """ + |syntax = "proto3"; + |message Goods { + | oneof reg_data { + | string name = 1; + | string reg_name = 2; + | int32 id = 3; + | } + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert not result.is_compatible() + + +def test_protobuf_field_compatible_alter_to_oneof(): + proto1 = """ + |syntax = "proto3"; + |message Goods { + | string name = 1; + | string foo = 2; + |} + |""" + + proto1 = trim_margin(proto1) + proto2 = """ + |syntax = "proto3"; + |message Goods { + | string name = 1; + | oneof new_oneof { + | string foo = 2; + | int32 bar = 3; + | } + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert result.is_compatible() + + +def test_protobuf_add_compatible_field_to_oneof(): + proto1 = """ + |syntax = "proto3"; + |message TestMessage { + | oneof new_oneof { + | string test_string = 1; + | string test_string2 = 2; + | } + |} + |""" + + proto1 = trim_margin(proto1) + proto2 = """ + |syntax = "proto3"; + |message TestMessage { + | oneof new_oneof { + | string test_string = 1; + | string test_string2 = 2; + | int32 other_id = 3; + | } + |} + |""" + + proto2 = trim_margin(proto2) + + protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema + protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema + result = CompareResult() + + protobuf_schema1.compare(protobuf_schema2, result) + + assert result.is_compatible() From c77833b3b2386b53f1d3cd3b0483f30e6f80147c Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 17 Oct 2021 20:38:40 +0300 Subject: [PATCH 048/168] remove debug workaround --- tests/unit/test_protobuf_schema.py | 32 ------------------------------ 1 file changed, 32 deletions(-) diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index b15768173..9feb1f4b4 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -288,35 +288,3 @@ def test_protobuf_field_compatible_alter_to_oneof(): assert result.is_compatible() -def test_protobuf_add_compatible_field_to_oneof(): - proto1 = """ - |syntax = "proto3"; - |message TestMessage { - | oneof new_oneof { - | string test_string = 1; - | string test_string2 = 2; - | } - |} - |""" - - proto1 = trim_margin(proto1) - proto2 = """ - |syntax = "proto3"; - |message TestMessage { - | oneof new_oneof { - | string test_string = 1; - | string test_string2 = 2; - | int32 other_id = 3; - | } - |} - |""" - - proto2 = trim_margin(proto2) - - protobuf_schema1: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto1).schema - protobuf_schema2: ProtobufSchema = TypedSchema.parse(SchemaType.PROTOBUF, proto2).schema - result = CompareResult() - - protobuf_schema1.compare(protobuf_schema2, result) - - assert result.is_compatible() From 048964c2c1ce316edf5056672e17295e2c2ecd24 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 17 Oct 2021 21:33:00 +0300 Subject: [PATCH 049/168] pylint fixup --- tests/unit/test_protobuf_schema.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 9feb1f4b4..bece9b241 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -286,5 +286,3 @@ def test_protobuf_field_compatible_alter_to_oneof(): protobuf_schema1.compare(protobuf_schema2, result) assert result.is_compatible() - - From c88847a010494d7b67ba4736dfc3328ceb673c3a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 18 Oct 2021 01:04:08 +0300 Subject: [PATCH 050/168] Sync to aiven/karapace master (#14) * docs: add note about HA * schemas: better error messages for invalid schemas * schema_reader: added typing * schema_reader: isolated CONFIG message handler * schema_reader: isolated DELETE_SUBJECT message handler * schema_reader: isolated SUBJECT message handler * schema_reader: holding lock while manipulating schemas * schema_reader: separated logic for schema hard delete * schema_reader: simplified message handling The previuos version of the code was not adding a soft deleted schema to `self.subjects[subject]["schemas"]`, this meant that `KafkaSchemaReader.get_schemas(include_deleted=True)` was not complete * schema_reader: fix memory leak KafkaSchemaReader.schemas was keeping an schema alive while the process was running, this was a memory leak that could only be resolved with a restart of the process. fix: #283 * schema_reader: simplified subject delete * schema_reader: fix refcount issue The TypedSchema object was not being used, as soon as the schema was deleted from the last subject it was cleared from the container. The correct behavior is to reuse the object, so that each subject will add to the refcount. Co-authored-by: Augusto F. Hack Co-authored-by: Tommi Vainikainen Co-authored-by: Andrey Pleskach --- README.rst | 1 + karapace/schema_reader.py | 210 ++++++++++++++++--------------- karapace/schema_registry_apis.py | 10 +- tests/integration/test_schema.py | 155 +++++++++++++++++++++-- 4 files changed, 266 insertions(+), 110 deletions(-) diff --git a/README.rst b/README.rst index 6b8e3a721..1c40b3995 100644 --- a/README.rst +++ b/README.rst @@ -33,6 +33,7 @@ Features * Moderate memory consumption * Asynchronous architecture based on aiohttp * Supports Avro and JSON Schema. Protobuf development is tracked with `Issue 67`_. +* Leader/Replica architecture for HA and load balancing. .. _Issue 67: https://github.com/aiven/karapace/issues/67 diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 81c907073..b23a20f98 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -19,7 +19,8 @@ from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue from threading import Lock, Thread -from typing import Dict +from typing import Dict, Optional +from weakref import WeakValueDictionary import json import logging @@ -80,7 +81,7 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen try: ts = TypedSchema(parse_avro_schema_definition(schema_str), SchemaType.AVRO, schema_str) return ts - except SchemaParseException as e: + except (SchemaParseException, JSONDecodeError, TypeError) as e: raise InvalidSchema from e @staticmethod @@ -134,7 +135,6 @@ def __init__(self, config, master_coordinator=None): self.timeout_ms = 200 self.config = config self.subjects = {} - self.schemas: Dict[int, TypedSchema] = {} self.global_schema_id = 0 self.offset = 0 self.admin_client = None @@ -150,7 +150,12 @@ def __init__(self, config, master_coordinator=None): sentry_config["tags"] = {} self.stats = StatsClient(sentry_config=sentry_config) - def init_consumer(self): + # A schema has the same `id` even if registered in two different subjects. This container + # has a weak reference to every schema in use, used to retrieve its `id`. Weak references + # are used to allow for free memory when a schema is cleared from all subjects. + self.schemas: Dict[int, TypedSchema] = WeakValueDictionary() + + def init_consumer(self) -> None: # Group not set on purpose, all consumers read the same data session_timeout_ms = self.config["session_timeout_ms"] request_timeout_ms = max(session_timeout_ms, KafkaConsumer.DEFAULT_CONFIG["request_timeout_ms"]) @@ -174,7 +179,7 @@ def init_consumer(self): metadata_max_age_ms=self.config["metadata_max_age_ms"], ) - def init_admin_client(self): + def init_admin_client(self) -> bool: try: self.admin_client = KafkaAdminClient( api_version_auto_timeout_ms=constants.API_VERSION_AUTO_TIMEOUT_MS, @@ -198,7 +203,7 @@ def init_admin_client(self): return False @staticmethod - def get_new_schema_topic(config): + def get_new_schema_topic(config: dict) -> NewTopic: return NewTopic( name=config["topic_name"], num_partitions=constants.SCHEMA_TOPIC_NUM_PARTITIONS, @@ -206,7 +211,7 @@ def get_new_schema_topic(config): topic_configs={"cleanup.policy": "compact"} ) - def create_schema_topic(self): + def create_schema_topic(self) -> bool: schema_topic = self.get_new_schema_topic(self.config) try: self.log.info("Creating topic: %r", schema_topic) @@ -223,21 +228,19 @@ def create_schema_topic(self): time.sleep(5) return False - def get_schema_id(self, new_schema): - with self.id_lock: - schemas = self.schemas.items() - for schema_id, schema in schemas: - if schema == new_schema: - return schema_id + def get_schema_id(self, new_schema: TypedSchema) -> int: with self.id_lock: + for schema_id, schema in self.schemas.items(): + if schema == new_schema: + return schema_id self.global_schema_id += 1 return self.global_schema_id - def close(self): + def close(self) -> None: self.log.info("Closing schema_reader") self.running = False - def run(self): + def run(self) -> None: while self.running: try: if not self.admin_client: @@ -263,7 +266,7 @@ def run(self): self.stats.unexpected_exception(ex=e, where="schema_reader_exit") self.log.exception("Unexpected exception closing schema reader") - def handle_messages(self): + def handle_messages(self) -> None: raw_msgs = self.consumer.poll(timeout_ms=self.timeout_ms) if self.ready is False and raw_msgs == {}: self.ready = True @@ -300,104 +303,111 @@ def handle_messages(self): if self.ready and add_offsets: self.queue.put(self.offset) - def handle_msg(self, key: dict, value: dict): - if key["keytype"] == "CONFIG": - if "subject" in key and key["subject"] is not None: - if not value: - self.log.info("Deleting compatibility config completely for subject: %r", key["subject"]) - self.subjects[key["subject"]].pop("compatibility", None) - return - self.log.info( - "Setting subject: %r config to: %r, value: %r", key["subject"], value["compatibilityLevel"], value - ) - if not key["subject"] in self.subjects: - self.log.info("Adding first version of subject: %r with no schemas", key["subject"]) - self.subjects[key["subject"]] = {"schemas": {}} - subject_data = self.subjects.get(key["subject"]) - subject_data["compatibility"] = value["compatibilityLevel"] - else: - self.log.info("Setting global config to: %r, value: %r", value["compatibilityLevel"], value) - self.config["compatibility"] = value["compatibilityLevel"] - elif key["keytype"] == "SCHEMA": + def _handle_msg_config(self, key: dict, value: Optional[dict]) -> None: + subject = key.get("subject") + if subject is not None: + if subject not in self.subjects: + self.log.info("Adding first version of subject: %r with no schemas", subject) + self.subjects[subject] = {"schemas": {}} + if not value: - subject, version = key["subject"], key["version"] - self.log.info("Deleting subject: %r version: %r completely", subject, version) - if subject not in self.subjects: - self.log.error("Subject %s did not exist, should have", subject) - elif version not in self.subjects[subject]["schemas"]: - self.log.error("Version %d for subject %s did not exist, should have", version, subject) - else: - self.subjects[subject]["schemas"].pop(version, None) - return - schema_type = value.get("schemaType", "AVRO") - schema_str = value["schema"] + self.log.info("Deleting compatibility config completely for subject: %r", subject) + self.subjects[subject].pop("compatibility", None) + else: + self.log.info("Setting subject: %r config to: %r, value: %r", subject, value["compatibilityLevel"], value) + self.subjects[subject]["compatibility"] = value["compatibilityLevel"] + elif value is not None: + self.log.info("Setting global config to: %r, value: %r", value["compatibilityLevel"], value) + self.config["compatibility"] = value["compatibilityLevel"] + + def _handle_msg_delete_subject(self, key: dict, value: Optional[dict]) -> None: # pylint: disable=unused-argument + if value is None: + self.log.error("DELETE_SUBJECT record doesnt have a value, should have") + return + + subject = value["subject"] + if subject not in self.subjects: + self.log.error("Subject: %r did not exist, should have", subject) + else: + self.log.info("Deleting subject: %r, value: %r", subject, value) + version = value["version"] + for schema in self.subjects[subject]["schemas"].values(): + if schema["version"] <= version: + schema["deleted"] = True + + def _handle_msg_schema_hard_delete(self, key: dict) -> None: + subject, version = key["subject"], key["version"] + + if subject not in self.subjects: + self.log.error("Hard delete: Subject %s did not exist, should have", subject) + elif version not in self.subjects[subject]["schemas"]: + self.log.error("Hard delete: Version %d for subject %s did not exist, should have", version, subject) + else: + self.log.info("Hard delete: subject: %r version: %r", subject, version) + self.subjects[subject]["schemas"].pop(version, None) + + def _handle_msg_schema(self, key: dict, value: Optional[dict]) -> None: + if not value: + self._handle_msg_schema_hard_delete(key) + return + + schema_type = value.get("schemaType", "AVRO") + schema_str = value["schema"] + schema_subject = value["subject"] + schema_id = value["id"] + schema_version = value["version"] + schema_deleted = value.get("deleted", False) + + # The TypedSchema object must be re-used, otherwise the refcount will be incorrect and the + # schema will be freed prematurely + typed_schema = self.schemas.get(schema_id) + + if typed_schema is None: try: typed_schema = TypedSchema.parse(schema_type=SchemaType(schema_type), schema_str=schema_str) except InvalidSchema: try: schema_json = json.loads(schema_str) typed_schema = TypedSchema( - schema_type=SchemaType(schema_type), schema=schema_json, schema_str=schema_str + schema_type=SchemaType(schema_type), + schema=schema_json, + schema_str=schema_str, ) except JSONDecodeError: - self.log.error("Invalid json: %s", value["schema"]) + self.log.exception("Invalid schema: %s", schema_str) return - self.log.debug("Got typed schema %r", typed_schema) - subject = value["subject"] - if subject not in self.subjects: - self.log.info("Adding first version of subject: %r, value: %r", subject, value) - self.subjects[subject] = { - "schemas": { - value["version"]: { - "schema": typed_schema, - "version": value["version"], - "id": value["id"], - "deleted": value.get("deleted", False), - } - } - } - self.log.info("Setting schema_id: %r with schema: %r", value["id"], typed_schema) - self.schemas[value["id"]] = typed_schema - if value["id"] > self.global_schema_id: # Not an existing schema - self.global_schema_id = value["id"] - elif value.get("deleted", False) is True: - self.log.info("Deleting subject: %r, version: %r", subject, value["version"]) - if not value["version"] in self.subjects[subject]["schemas"]: - self.schemas[value["id"]] = typed_schema - else: - self.subjects[subject]["schemas"][value["version"]]["deleted"] = True - elif value.get("deleted", False) is False: - self.log.info("Adding new version of subject: %r, value: %r", subject, value) - self.subjects[subject]["schemas"][value["version"]] = { - "schema": typed_schema, - "version": value["version"], - "id": value["id"], - "deleted": value.get("deleted", False), - } - self.log.info("Setting schema_id: %r with schema: %r", value["id"], value["schema"]) - with self.id_lock: - self.schemas[value["id"]] = typed_schema - if value["id"] > self.global_schema_id: # Not an existing schema - self.global_schema_id = value["id"] + + if schema_subject not in self.subjects: + self.log.info("Adding first version of subject: %r with no schemas", schema_subject) + self.subjects[schema_subject] = {"schemas": {}} + + subjects_schemas = self.subjects[schema_subject]["schemas"] + + if schema_version in subjects_schemas: + self.log.info("Updating entry for subject: %r, value: %r", schema_subject, value) + else: + self.log.info("Adding new version of subject: %r, value: %r", schema_subject, value) + + subjects_schemas[schema_version] = { + "schema": typed_schema, + "version": schema_version, + "id": schema_id, + "deleted": schema_deleted, + } + with self.id_lock: + self.schemas[schema_id] = typed_schema + self.global_schema_id = max(self.global_schema_id, schema_id) + + def handle_msg(self, key: dict, value: Optional[dict]) -> None: + if key["keytype"] == "CONFIG": + self._handle_msg_config(key, value) + elif key["keytype"] == "SCHEMA": + self._handle_msg_schema(key, value) elif key["keytype"] == "DELETE_SUBJECT": - self.log.info("Deleting subject: %r, value: %r", value["subject"], value) - if not value["subject"] in self.subjects: - self.log.error("Subject: %r did not exist, should have", value["subject"]) - else: - updated_schemas = { - key: self._delete_schema_below_version(schema, value["version"]) - for key, schema in self.subjects[value["subject"]]["schemas"].items() - } - self.subjects[value["subject"]]["schemas"] = updated_schemas + self._handle_msg_delete_subject(key, value) elif key["keytype"] == "NOOP": # for spec completeness pass - @staticmethod - def _delete_schema_below_version(schema, version): - if schema["version"] <= version: - schema["deleted"] = True - return schema - def get_schemas(self, subject, *, include_deleted=False): if include_deleted: return self.subjects[subject]["schemas"] diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 13cb2369f..b9d630964 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -1,6 +1,8 @@ +from avro.schema import SchemaParseException from contextlib import closing from enum import Enum, unique from http import HTTPStatus +from json import JSONDecodeError from karapace import version as karapace_version from karapace.avro_compatibility import is_incompatible from karapace.compatibility import check_compatibility, CompatibilityModes @@ -762,12 +764,16 @@ def write_new_schema_local(self, subject, body, content_type): schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO)) try: new_schema = TypedSchema.parse(schema_type=schema_type, schema_str=body["schema"]) - except (InvalidSchema, InvalidSchemaType): + except (InvalidSchema, InvalidSchemaType) as e: self.log.warning("Invalid schema: %r", body["schema"], exc_info=True) + if isinstance(e.__cause__, (SchemaParseException, JSONDecodeError)): + human_error = f"{e.__cause__.args[0]}" # pylint: disable=no-member + else: + human_error = "Provided schema is not valid" self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, - "message": f"Invalid {schema_type} schema", + "message": f"Invalid {schema_type} schema. Error: {human_error}", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index 4320c6ee9..2ce5bec97 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -2375,9 +2375,9 @@ async def test_schema_hard_delete_whole_schema(registry_async_client: Client) -> assert res.json()["message"] == f"Subject '{subject}' not found." -async def test_schema_hard_delete_and_recreate(registry_async_client: Client) -> None: - subject = create_subject_name_factory("test_schema_hard_delete_and_recreate")() - schema_name = create_schema_name_factory("test_schema_hard_delete_and_recreate")() +async def test_schema_soft_delete_and_recreate(registry_async_client: Client) -> None: + subject = create_subject_name_factory("test_schema_soft_delete_and_recreate")() + schema_name = create_schema_name_factory("test_schema_soft_delete_and_recreate")() res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) assert res.status == 200 @@ -2414,10 +2414,35 @@ async def test_schema_hard_delete_and_recreate(registry_async_client: Client) -> assert "id" in res.json() assert schema_id == res.json()["id"], "after soft delete the same schema registered, the same identifier" - # Soft delete whole schema + +async def test_schema_hard_delete_and_recreate(registry_async_client: Client) -> None: + subject_factory = create_subject_name_factory("test_schema_hard_delete_and_recreate") + subject = subject_factory() + schema_name = create_schema_name_factory("test_schema_hard_delete_and_recreate")() + + res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) + assert res.status == 200 + schema = { + "type": "record", + "name": schema_name, + "fields": [{ + "type": { + "type": "enum", + "name": "enumtest", + "symbols": ["first", "second"], + }, + "name": "faa", + }] + } + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schema": jsonlib.dumps(schema)}, + ) + assert res.status == 200 + first_schema_id = res.json()["id"] + res = await registry_async_client.delete(f"subjects/{subject}") assert res.status_code == 200 - # Hard delete whole schema res = await registry_async_client.delete(f"subjects/{subject}?permanent=true") assert res.status_code == 200 @@ -2426,11 +2451,125 @@ async def test_schema_hard_delete_and_recreate(registry_async_client: Client) -> assert res.json()["error_code"] == 40401 assert res.json()["message"] == f"Subject '{subject}' not found." - # Recreate with same subject after hard delete + # Recreate after hard delete on all subjects frees the schema, and a new id is used res = await registry_async_client.post( f"subjects/{subject}/versions", json={"schema": jsonlib.dumps(schema)}, ) assert res.status == 200 - assert "id" in res.json() - assert schema_id == res.json()["id"], "after permanent deleted the same schema registered, the same identifier" + msg = "permanent deleted of the schema on all subjects causes a new identifier to be used" + second_schema_id = res.json()["id"] + assert first_schema_id != second_schema_id, msg + + # Register the same schema in another subject, this time the schema should not be freed + subject_keepalive = subject_factory() + res = await registry_async_client.post( + f"subjects/{subject_keepalive}/versions", + json={"schema": jsonlib.dumps(schema)}, + ) + assert res.status == 200 + assert second_schema_id == res.json()["id"] + + res = await registry_async_client.delete(f"subjects/{subject}") + assert res.status_code == 200 + + res = await registry_async_client.delete(f"subjects/{subject}?permanent=true") + assert res.status_code == 200 + + res = await registry_async_client.post( + f"subjects/{subject}/versions", + json={"schema": jsonlib.dumps(schema)}, + ) + assert res.status == 200 + msg = "the identifier does not change when the schema is permanent deleted in only one of the subjects" + assert second_schema_id == res.json()["id"], msg + + +async def test_regression_schema_hard_delete_order_must_not_matter(registry_async_client: Client) -> None: + """Regression: A hard delete on the last registered subject would free the schema. + + The correct behavior is to only free the schema after a hard delete on *all* subjects. + """ + subject_factory = create_subject_name_factory("test_schema_hard_delete_regression") + first_subject = subject_factory() + second_subject = subject_factory() + schema_name = create_schema_name_factory("test_schema_hard_delete_regression")() + + res = await registry_async_client.put("config", json={"compatibility": "BACKWARD"}) + assert res.status == 200 + schema = { + "type": "record", + "name": schema_name, + "fields": [{ + "type": { + "type": "enum", + "name": "enumtest", + "symbols": ["first", "second"], + }, + "name": "faa", + }] + } + res = await registry_async_client.post( + f"subjects/{first_subject}/versions", + json={"schema": jsonlib.dumps(schema)}, + ) + assert res.status == 200 + schema_id = res.json()["id"] + + res = await registry_async_client.post( + f"subjects/{second_subject}/versions", + json={"schema": jsonlib.dumps(schema)}, + ) + assert res.status == 200 + assert schema_id == res.json()["id"] + + # Regression: The hard delete is performed on the last subject the schema was registered + res = await registry_async_client.delete(f"subjects/{second_subject}") + assert res.status_code == 200 + res = await registry_async_client.delete(f"subjects/{second_subject}?permanent=true") + assert res.status_code == 200 + + res = await registry_async_client.post( + f"subjects/{second_subject}/versions", + json={"schema": jsonlib.dumps(schema)}, + ) + assert res.status == 200 + msg = "the identifier does not change when the schema is permanent deleted in only one of the subjects" + assert schema_id == res.json()["id"], msg + + +async def test_invalid_schema_should_provide_good_error_messages(registry_async_client: Client) -> None: + """The user should receive an informative error message when the format is invalid""" + subject_name_factory = create_subject_name_factory("test_schema_subject_post_invalid_data") + test_subject = subject_name_factory() + + schema_str = jsonlib.dumps({"type": "string"}) + res = await registry_async_client.post( + f"subjects/{test_subject}/versions", + json={"schema": schema_str[:-1]}, + ) + assert res.json()["message"] == "Invalid AVRO schema. Error: Expecting ',' delimiter: line 1 column 18 (char 17)" + + # Unfortunately the AVRO library doesn't provide a good error message, it just raises an TypeError + schema_str = jsonlib.dumps({"type": "enum", "name": "error"}) + res = await registry_async_client.post( + f"subjects/{test_subject}/versions", + json={"schema": schema_str}, + ) + assert res.json()["message"] == "Invalid AVRO schema. Error: Provided schema is not valid" + + # This is an upstream bug in the python AVRO library, until the bug is fixed we should at least have a nice error message + schema_str = jsonlib.dumps({"type": "enum", "name": "error", "symbols": {}}) + res = await registry_async_client.post( + f"subjects/{test_subject}/versions", + json={"schema": schema_str}, + ) + assert res.json()["message"] == "Invalid AVRO schema. Error: error is a reserved type name." + + # This is an upstream bug in the python AVRO library, until the bug is fixed we should at least have a nice error message + schema_str = jsonlib.dumps({"type": "enum", "name": "error", "symbols": ["A", "B"]}) + res = await registry_async_client.post( + f"subjects/{test_subject}/versions", + json={"schema": schema_str}, + ) + assert res.json()["message"] == "Invalid AVRO schema. Error: error is a reserved type name." From c7063a9e1bd310a322d26d0e08e191e7d7d9c5c1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 8 Nov 2021 16:23:24 +0200 Subject: [PATCH 051/168] message verification workaround --- karapace/protobuf/io.py | 162 ++++++++++++++++++++++ karapace/serialization.py | 6 +- tests/integration/test_schema_protobuf.py | 2 +- 3 files changed, 166 insertions(+), 4 deletions(-) create mode 100644 karapace/protobuf/io.py diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py new file mode 100644 index 000000000..570b34d7e --- /dev/null +++ b/karapace/protobuf/io.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +# -*- mode: python -*- +# -*- coding: utf-8 -*- + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from io import BytesIO +from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.schema import ProtobufSchema + +import importlib.util +import logging +import os + +logger = logging.getLogger(__name__) + + +class ProtobufDatumReader(): + """Deserialize Avro-encoded data into a Python data structure.""" + + @staticmethod + def check_props(schema_one, schema_two, prop_list): + for prop in prop_list: + if getattr(schema_one, prop) != getattr(schema_two, prop): + return False + return True + + @staticmethod + def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: + # TODO (serge): schema comparison by fields required + + if str(writer_schema) == str(reader_schema): + return True + return False + + def __init__(self, writer_schema=None, reader_schema=None): + """ + As defined in the Avro specification, we call the schema encoded + in the data the "writer's schema", and the schema expected by the + reader the "reader's schema". + """ + self._writer_schema = writer_schema + self._reader_schema = reader_schema + + # read/write properties + def set_writer_schema(self, writer_schema): + self._writer_schema = writer_schema + + writer_schema = property(lambda self: self._writer_schema, set_writer_schema) + + def set_reader_schema(self, reader_schema): + self._reader_schema = reader_schema + + reader_schema = property(lambda self: self._reader_schema, set_reader_schema) + + @staticmethod + def read_varint(bio: BytesIO): + """Read a variable-length integer. + + :returns: Integer + """ + varint = 0 + read_bytes = 0 + + while True: + char = bio.read(1) + if len(char) == 0: + if read_bytes == 0: + return 0 + # raise EOFError('EOF while reading varint, value is %i so far' % + # varint) + + byte = ord(char) + varint += (byte & 0x7F) << (7 * read_bytes) + + read_bytes += 1 + + if not byte & 0x80: + return varint + + def read_indexes(self, bio: BytesIO): + size: int = self.read_varint(bio) + result = [] + if size == 0: + result.append(0) + return result + i = 0 + while i < size: + result.append(self.read_varint(bio)) + i += 1 + + def read(self, bio: BytesIO): + if self.reader_schema is None: + self.reader_schema = self.writer_schema + return self.read_data(self.writer_schema, self.reader_schema, bio) + + @staticmethod + def find_message_name(schema: ProtobufSchema, indexes: list) -> str: + result: list = [] + dot: bool = False + types = schema.schema.types + for index in indexes: + if dot: + result.append(".") + else: + dot = True + + try: + message = types[index] + except Exception: + raise IllegalArgumentException(f"Invalid message indexes: {indexes}") + + if message and isinstance(message, MessageElement): + result.append(message.name) + types = message.nested_types + else: + raise IllegalArgumentException(f"Invalid message indexes: {indexes}") + + # for java we also need package name. But in case we will use protoc + # for compiling to python we can ignore it at all + + return "".join(result) + + def read_data(self, writer_schema, reader_schema, bio: BytesIO): + # TODO (serge): check and polish it + if not ProtobufDatumReader.match_schemas(writer_schema, reader_schema): + fail_msg = 'Schemas do not match.' + raise ProtobufSchemaResolutionException(fail_msg, writer_schema, reader_schema) + + indexes = self.read_indexes(bio) + name = self.find_message_name(writer_schema, indexes) + + with open("tmp.proto", "w") as proto_text: + proto_text.write(str(writer_schema)) + proto_text.close() + + os.system("protoc --python_out=./ tmp.proto") + + spec = importlib.util.spec_from_file_location("tmp_pb2", "./tmp_pb2.py") + tmp_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(tmp_module) + class_to_call = getattr(tmp_module, name) + class_instance = class_to_call() + class_instance.ParseFromString(bio.read()) + + # + return class_instance diff --git a/karapace/serialization.py b/karapace/serialization.py index 9d57786e7..d560faee5 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -1,6 +1,7 @@ from avro.io import BinaryDecoder, BinaryEncoder, DatumReader, DatumWriter from json import load from jsonschema import ValidationError +from karapace.protobuf.io import ProtobufDatumReader from karapace.schema_reader import InvalidSchema, SchemaType, TypedSchema from karapace.utils import Client, json_encode from typing import Dict, Optional @@ -191,9 +192,8 @@ def read_value(schema: TypedSchema, bio: io.BytesIO): raise InvalidPayload from e return value if schema.schema_type is SchemaType.PROTOBUF: - # TODO: PROTOBUF* we need use protobuf validator there - value = bio.read() - return value + reader = ProtobufDatumReader(schema.schema) + return reader.read(bio) raise ValueError("Unknown schema type") diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 9fbef76fe..0e026ae49 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -30,7 +30,7 @@ def add_slashes(text: str) -> str: '\v': '\\v', '\'': "\\'", '\"': '\\"', - '\\': '\\\\' + '\\': '\\\\ } trans_table = str.maketrans(escape_dict) return text.translate(trans_table) From 1570b4cbc473fb90c2881106a84c3fe7ca3bbabe Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 9 Nov 2021 22:27:28 +0200 Subject: [PATCH 052/168] style improving workaround --- karapace/protobuf/compare_result.py | 18 +++++----- karapace/protobuf/compare_type_storage.py | 44 +++++++++++------------ karapace/protobuf/enum_element.py | 7 ++-- karapace/protobuf/exception.py | 2 +- karapace/protobuf/field_element.py | 10 +++--- karapace/protobuf/kotlin_wrapper.py | 2 +- karapace/protobuf/location.py | 9 ++--- karapace/protobuf/message_element.py | 26 ++++++++------ karapace/protobuf/one_of_element.py | 7 ++-- karapace/protobuf/option_element.py | 2 +- karapace/protobuf/proto_file_element.py | 11 +++--- karapace/protobuf/proto_parser.py | 34 +++++++++--------- karapace/protobuf/proto_type.py | 4 +-- karapace/protobuf/schema.py | 6 ++-- karapace/protobuf/syntax.py | 6 ++-- karapace/protobuf/syntax_reader.py | 20 +++++------ karapace/protobuf/type_element.py | 11 +++--- tests/integration/test_schema_protobuf.py | 4 +-- 18 files changed, 118 insertions(+), 105 deletions(-) diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index a9c7f26af..6e9697195 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -40,14 +40,14 @@ def is_compatible(self) -> bool: class ModificationRecord: def __init__(self, modification: Modification, path: str): - self.modification: Modification = modification - self.path: str = path + self.modification = modification + self.path = path if modification.is_compatible(): - self.message: str = f"Compatible modification {self.modification} found" + self.message = f"Compatible modification {self.modification} found" else: - self.message: str = f"Incompatible modification {self.modification} found" + self.message = f"Incompatible modification {self.modification} found" - def to_str(self): + def to_str(self) -> str: return self.message @@ -57,21 +57,21 @@ def __init__(self): self.path: list = [] self.canonical_name: list = [] - def push_path(self, string: str, canonical: bool = False): + def push_path(self, string: str, canonical: bool = False) -> None: if canonical: self.canonical_name.append(str(string)) self.path.append(str(string)) - def pop_path(self, canonical: bool = False): + def pop_path(self, canonical: bool = False) -> None: if canonical: self.canonical_name.pop() self.path.pop() - def add_modification(self, modification: Modification): + def add_modification(self, modification: Modification) -> None: record = ModificationRecord(modification, ".".join(self.path)) self.result.append(record) - def is_compatible(self): + def is_compatible(self) -> bool: record: ModificationRecord for record in self.result: if not record.modification.is_compatible(): diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 03667116d..09f07c0bd 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -2,7 +2,7 @@ from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.type_element import TypeElement -from typing import Optional +from typing import Dict, List, Optional, Union class CompareTypes: @@ -10,13 +10,13 @@ def __init__(self, self_package_name: str, other_package_name: str, result: Comp self.self_package_name = self_package_name self.other_package_name = other_package_name - self.self_types: dict = dict() - self.other_types: dict = dict() - self.locked_messages: list = [] - self.environment: list = [] + self.self_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = dict() + self.other_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = dict() + self.locked_messages: List[object] = [] + self.environment: List[object] = [] self.result = result - def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict): + def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict) -> None: name: str if prefix: name = prefix + '.' + type_element.name @@ -24,9 +24,9 @@ def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, name = type_element.name from karapace.protobuf.message_element import MessageElement + from karapace.protobuf.field_element import FieldElement if isinstance(type_element, MessageElement): # add support of MapEntry messages if 'map_entry' in type_element.options: - from karapace.protobuf.field_element import FieldElement key: Optional[FieldElement] = None value: Optional[FieldElement] = None for f in type_element.fields: @@ -46,27 +46,27 @@ def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, for t in type_element.nested_types: self.add_a_type(name, package_name, t, types) - def add_self_type(self, package_name: str, type_element: TypeElement): + def add_self_type(self, package_name: str, type_element: TypeElement) -> None: self.add_a_type(package_name, package_name, type_element, self.self_types) - def add_other_type(self, package_name: str, type_element: TypeElement): + def add_other_type(self, package_name: str, type_element: TypeElement) -> None: self.add_a_type(package_name, package_name, type_element, self.other_types) - def get_self_type(self, t: ProtoType) -> Optional['TypeRecord']: + def get_self_type(self, t: ProtoType) -> Union[None, 'TypeRecord', 'TypeRecordMap']: name = self.self_type_name(t) if name is not None: type_record = self.self_types.get(name) return type_record return None - def get_other_type(self, t: ProtoType) -> Optional['TypeRecord']: + def get_other_type(self, t: ProtoType) -> Union[None, 'TypeRecord', 'TypeRecordMap']: name = self.other_type_name(t) if name is not None: type_record = self.other_types.get(name) return type_record return None - def self_type_name(self, t: ProtoType): + def self_type_name(self, t: ProtoType) -> Optional[str]: string: str = t.string name: str canonical_name: list = list(self.result.path) @@ -79,15 +79,15 @@ def self_type_name(self, t: ProtoType): canonical_name.insert(0, self.self_package_name) while len(canonical_name) > 0: pretender: str = ".".join(canonical_name) + '.' + string - t = self.self_types.get(pretender) - if t is not None: + pt = self.self_types.get(pretender) + if pt is not None: return pretender canonical_name.pop() if self.self_types.get(string) is not None: return string return None - def other_type_name(self, t: ProtoType): + def other_type_name(self, t: ProtoType) -> Optional[str]: string: str = t.string name: str canonical_name: list = list(self.result.path) @@ -100,15 +100,15 @@ def other_type_name(self, t: ProtoType): canonical_name.insert(0, self.other_package_name) while len(canonical_name) > 0: pretender: str = ".".join(canonical_name) + '.' + string - t = self.other_types.get(pretender) - if t is not None: + pt = self.other_types.get(pretender) + if pt is not None: return pretender canonical_name.pop() if self.other_types.get(string) is not None: return string return None - def self_type_short_name(self, t: ProtoType): + def self_type_short_name(self, t: ProtoType) -> Optional[str]: name = self.self_type_name(t) if name is None: raise IllegalArgumentException(f"Cannot determine message type {t}") @@ -117,7 +117,7 @@ def self_type_short_name(self, t: ProtoType): return name[(len(type_record.package_name) + 1):] return name - def other_type_short_name(self, t: ProtoType): + def other_type_short_name(self, t: ProtoType) -> Optional[str]: name = self.other_type_name(t) if name is None: raise IllegalArgumentException(f"Cannot determine message type {t}") @@ -146,12 +146,12 @@ def __init__(self, package_name: str, type_element: TypeElement): class TypeRecordMap(TypeRecord): - def __init__(self, package_name: str, type_element: TypeElement, key, value): + def __init__(self, package_name: str, type_element: TypeElement, key: object, value: object): super().__init__(package_name, type_element) try: from karapace.protobuf.field_element import FieldElement - self.key: FieldElement = key - self.value: FieldElement = value + self.key = key + self.value = value except Exception: raise IllegalArgumentException("TypeRecordMap") diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index bdccf476f..151e3ae7f 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -1,5 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumElement.kt +from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_constant_element import EnumConstantElement @@ -33,7 +34,7 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) - def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTypes): + def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTypes) -> None: self_tags: dict = dict() other_tags: dict = dict() constant: EnumConstantElement @@ -46,8 +47,8 @@ def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTyp for constant in other.constants: other_tags[constant.tag] = constant - for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): - result.push_path(tag.__str__()) + for tag in chain(self_tags.keys(), other_tags.keys() - self_tags.keys()): + result.push_path(str(tag)) if self_tags.get(tag) is None: result.add_modification(Modification.ENUM_CONSTANT_ADD) elif other_tags.get(tag) is None: diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index 4a0a337fa..89f6978a6 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -1,4 +1,4 @@ -def error(message: str): +def error(message: str) -> None: raise Exception(message) diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 31043e19c..a4acf9a72 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -69,20 +69,20 @@ def options_with_special_values(self) -> list: # Only non-repeated scalar types and Enums support default values. - def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTypes): + def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTypes) -> None: if self.name != other.name: result.add_modification(Modification.FIELD_NAME_ALTER) self.compare_type(ProtoType.get2(self.element_type), ProtoType.get2(other.element_type), other.label, result, types) - def compare_map(self, self_map: ProtoType, other_map: ProtoType, result: CompareResult, types: CompareTypes): + def compare_map(self, self_map: ProtoType, other_map: ProtoType, result: CompareResult, types: CompareTypes) -> None: self.compare_type(self_map.key_type, other_map.key_type, "", result, types) self.compare_type(self_map.value_type, other_map.value_type, "", result, types) def compare_type( self, self_type: ProtoType, other_type: ProtoType, other_label: str, result: CompareResult, types: CompareTypes - ): + ) -> None: from karapace.protobuf.enum_element import EnumElement self_type_record = types.get_self_type(self_type) other_type_record = types.get_other_type(other_type) @@ -135,7 +135,9 @@ def compare_type( result.add_modification(Modification.FIELD_KIND_ALTER) @classmethod - def compare_message(cls, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes): + def compare_message( + cls, self_type: ProtoType, other_type: ProtoType, result: CompareResult, types: CompareTypes + ) -> None: from karapace.protobuf.message_element import MessageElement self_type_record = types.get_self_type(self_type) other_type_record = types.get_other_type(other_type) diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index 7d38d7e09..0946c0343 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -3,7 +3,7 @@ import textwrap -def check(q: bool, message: str): +def check(q: bool, message: str) -> None: if not q: raise IllegalStateException(message) diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index cb3e206db..b59b886ae 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -1,5 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Location.kt +from typing import Optional class Location: @@ -16,14 +17,14 @@ def __init__(self, base: str, path: str, line: int = -1, column: int = -1): self.line = line self.column = column - def at(self, line: int, column: int): + def at(self, line: int, column: int) -> 'Location': return Location(self.base, self.path, line, column) - def without_base(self): + def without_base(self) -> 'Location': """ Returns a copy of this location with an empty base. """ return Location("", self.path, self.line, self.column) - def with_path_only(self): + def with_path_only(self) -> 'Location': """ Returns a copy of this location including only its path. """ return Location("", self.path, -1, -1) @@ -44,7 +45,7 @@ def __str__(self) -> str: return result @staticmethod - def get(*args): + def get(*args) -> Optional['Location']: result = None if len(args) == 1: # (path) path = args[0] diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index cec8299fa..d7ad7794c 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -1,13 +1,19 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/MessageElement.kt # compatibility routine added +from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes +from karapace.protobuf.extensions_element import ExtensionsElement from karapace.protobuf.field_element import FieldElement +from karapace.protobuf.group_element import GroupElement from karapace.protobuf.location import Location from karapace.protobuf.one_of_element import OneOfElement +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.reserved_document import ReservedElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented +from typing import List class MessageElement(TypeElement): @@ -16,13 +22,13 @@ def __init__( location: Location, name: str, documentation: str = "", - nested_types: list = None, - options: list = None, - reserveds: list = None, - fields: list = None, - one_ofs: list = None, - extensions: list = None, - groups: list = None, + nested_types: List[str] = None, + options: List[OptionElement] = None, + reserveds: List[ReservedElement] = None, + fields: List[FieldElement] = None, + one_ofs: List[OneOfElement] = None, + extensions: List[ExtensionsElement] = None, + groups: List[GroupElement] = None, ): super().__init__(location, name, documentation, options or [], nested_types or []) self.reserveds = reserveds or [] @@ -73,7 +79,7 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) - def compare(self, other: 'MessageElement', result: CompareResult, types: CompareTypes): + def compare(self, other: 'MessageElement', result: CompareResult, types: CompareTypes) -> None: if types.lock_message(self): field: FieldElement @@ -109,7 +115,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare result.pop_path() # Compare fields - for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): + for tag in chain(self_tags.keys(), other_tags.keys() - self_tags.keys()): result.push_path(tag) if self_tags.get(tag) is None: @@ -121,7 +127,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare result.pop_path() # Compare OneOfs - for name in list(self_one_ofs.keys()) + list(set(other_one_ofs.keys()) - set(self_one_ofs.keys())): + for name in chain(self_one_ofs.keys(), other_one_ofs.keys() - self_one_ofs.keys()): result.push_path(name) if self_one_ofs.get(name) is None: diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index 7be1ce371..fd5b714ce 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -1,5 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OneOfElement.kt +from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.utils import append_documentation, append_indented @@ -33,7 +34,7 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) - def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTypes): + def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTypes) -> None: self_tags: dict = dict() other_tags: dict = dict() @@ -42,8 +43,8 @@ def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTy for field in other.fields: other_tags[field.tag] = field - for tag in list(self_tags.keys()) + list(set(other_tags.keys()) - set(self_tags.keys())): - result.push_path(tag.__str__()) + for tag in chain(self_tags.keys(), other_tags.keys() - self_tags.keys()): + result.push_path(str(tag)) if self_tags.get(tag) is None: result.add_modification(Modification.ONE_OF_FIELD_ADD) diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 54c5f3e02..0981f1315 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -91,7 +91,7 @@ def format_list_map_value(self, value) -> str: def __repr__(self) -> str: return self.to_schema() - def __eq__(self, other): + def __eq__(self, other) -> bool: return str(self) == str(other) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 7d621803c..c37980287 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -1,5 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ProtoFileElement.kt +from itertools import chain from karapace.protobuf.compare_result import CompareResult, Modification from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_element import EnumElement @@ -35,7 +36,7 @@ def __init__( self.public_imports = public_imports or [] self.imports = imports or [] - def to_schema(self): + def to_schema(self) -> str: strings: list = [ "// Proto schema formatted by Wire, do not edit.\n", "// Source: ", str(self.location.with_path_only()), "\n" @@ -82,17 +83,17 @@ def to_schema(self): return "".join(strings) @staticmethod - def empty(path): + def empty(path) -> 'ProtoFileElement': return ProtoFileElement(Location.get(path)) # TODO: there maybe be faster comparison workaround - def __eq__(self, other: 'ProtoFileElement'): # type: ignore + def __eq__(self, other: 'ProtoFileElement') -> bool: # type: ignore a = self.to_schema() b = other.to_schema() return a == b - def __repr__(self): + def __repr__(self) -> str: return self.to_schema() def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareResult: @@ -125,7 +126,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe compare_types.add_other_type(package_name, type_) i += 1 - for name in list(self_types.keys()) + list(set(other_types.keys()) - set(self_types.keys())): + for name in chain(self_types.keys(), other_types.keys() - self_types.keys()): result.push_path(name, True) diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 42be44a38..3b75ba77a 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -25,7 +25,7 @@ from karapace.protobuf.syntax_reader import SyntaxReader from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import MAX_TAG_VALUE -from typing import Union +from typing import List, Union class Context(Enum): @@ -70,17 +70,17 @@ def permits_extend(self) -> bool: class ProtoParser: def __init__(self, location: Location, data: str): self.location = location - self.imports: list = [] - self.nested_types: list = [] - self.services: list = [] - self.extends_list: list = [] - self.options: list = [] + self.imports: List[str] = [] + self.nested_types: List[str] = [] + self.services: List[str] = [] + self.extends_list: List[str] = [] + self.options: List[str] = [] self.declaration_count = 0 self.syntax: Union[Syntax, None] = None self.package_name: Union[str, None] = None self.prefix = "" self.data = data - self.public_imports: list = [] + self.public_imports: List[str] = [] self.reader = SyntaxReader(data, location) def read_proto_file(self) -> ProtoFileElement: @@ -111,8 +111,10 @@ def read_proto_file(self) -> ProtoFileElement: elif isinstance(declaration, ExtendElement): self.extends_list.append(declaration) - def read_declaration(self, documentation: str, context: Context): - + def read_declaration( + self, documentation: str, context: Context + ) -> Union[None, OptionElement, ReservedElement, RpcElement, MessageElement, EnumElement, EnumConstantElement, + ServiceElement, ExtendElement, ExtensionsElement, OneOfElement, GroupElement, FieldElement]: index = self.declaration_count self.declaration_count += 1 @@ -185,13 +187,13 @@ def read_declaration(self, documentation: str, context: Context): def read_message(self, location: Location, documentation: str) -> MessageElement: """ Reads a message declaration. """ name: str = self.reader.read_name() - fields: list = [] - one_ofs: list = [] - nested_types: list = [] - extensions: list = [] - options: list = [] - reserveds: list = [] - groups: list = [] + fields: List[FieldElement] = [] + one_ofs: List[OneOfElement] = [] + nested_types: List[TypeElement] = [] + extensions: List[ExtensionsElement] = [] + options: List[OptionElement] = [] + reserveds: List[ReservedElement] = [] + groups: List[GroupElement] = [] previous_prefix = self.prefix self.prefix = f"{self.prefix}{name}." diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 3a1fa4996..8d822936e 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -138,10 +138,10 @@ def nested_type(self, name: str) -> object: # ProtoType return ProtoType(False, f"{self.string}.{name}") - def __eq__(self, other): + def __eq__(self, other) -> bool: return isinstance(other, ProtoType) and self.string == other.string - def __ne__(self, other): + def __ne__(self, other) -> bool: return not isinstance(other, ProtoType) or self.string != other.string def __str__(self) -> str: diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index ca02022a2..6130a5a8d 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -81,7 +81,7 @@ def enum_element_string(element: EnumElement) -> str: return element.to_schema() -def option_element_string(option: OptionElement): +def option_element_string(option: OptionElement) -> str: result: str if option.kind == OptionElement.Kind.STRING: name: str @@ -112,7 +112,7 @@ def __str__(self) -> str: self.cache_string = self.to_schema() return self.cache_string - def to_schema(self): + def to_schema(self) -> str: strings: list = [] shm: ProtoFileElement = self.proto_file_element if shm.syntax: @@ -158,5 +158,5 @@ def to_schema(self): strings.append(str(service.to_schema())) return "".join(strings) - def compare(self, other: 'ProtobufSchema', result: CompareResult): + def compare(self, other: 'ProtobufSchema', result: CompareResult) -> CompareResult: self.proto_file_element.compare(other.proto_file_element, result) diff --git a/karapace/protobuf/syntax.py b/karapace/protobuf/syntax.py index 85f54e52d..a7d80e045 100644 --- a/karapace/protobuf/syntax.py +++ b/karapace/protobuf/syntax.py @@ -10,11 +10,11 @@ class Syntax(Enum): PROTO_3 = "proto3" @classmethod - def _missing_(cls, string): + def _missing_(cls, string) -> None: raise IllegalArgumentException(f"unexpected syntax: {string}") - def __str__(self): + def __str__(self) -> str: return self.value - def __repr__(self): + def __repr__(self) -> str: return self.value diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 36952d233..8c3bf256c 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -33,18 +33,18 @@ def __init__(self, data: str, location: Location): def exhausted(self) -> bool: return self.pos == len(self.data) - def read_char(self): + def read_char(self) -> str: """ Reads a non-whitespace character """ char = self.peek_char() self.pos += 1 return char - def require(self, c: str): + def require(self, c: str) -> None: """ Reads a non-whitespace character 'c' """ self.expect(self.read_char() == c, f"expected '{c}'") - def peek_char(self, ch: str = None): + def peek_char(self, ch: str = None) -> Union[bool, str]: """ Peeks a non-whitespace character and returns it. The only difference between this and [read_char] is that this doesn't consume the char. """ @@ -58,7 +58,7 @@ def peek_char(self, ch: str = None): self.expect(self.pos < len(self.data), "unexpected end of file") return self.data[self.pos] - def push_back(self, ch: str): + def push_back(self, ch: str) -> None: """ Push back the most recently read character. """ if self.data[self.pos - 1] == ch: self.pos -= 1 @@ -336,7 +336,7 @@ def try_append_trailing_documentation(self, documentation: str) -> str: return trailing_documentation return f"{documentation}\n{trailing_documentation}" - def skip_whitespace(self, skip_comments: bool): + def skip_whitespace(self, skip_comments: bool) -> None: """ Skips whitespace characters and optionally comments. When this returns, either self.pos == self.data.length or a non-whitespace character. """ @@ -349,9 +349,9 @@ def skip_whitespace(self, skip_comments: bool): elif skip_comments and c == "/": self.read_comment() else: - return + return None - def newline(self): + def newline(self) -> None: """ Call this every time a '\n' is encountered. """ self.line += 1 self.line_start = self.pos @@ -359,16 +359,16 @@ def newline(self): def location(self) -> Location: return self._location.at(self.line + 1, self.pos - self.line_start + 1) - def expect(self, condition: bool, message: str): + def expect(self, condition: bool, message: str) -> None: location = self.location() if not condition: self.unexpected(message, location) - def expect_with_location(self, condition: bool, location: Location, message: str): + def expect_with_location(self, condition: bool, location: Location, message: str) -> None: if not condition: self.unexpected(message, location) - def unexpected(self, message: str, location: Location = None): + def unexpected(self, message: str, location: Location = None) -> None: if not location: location = self.location() w = f"Syntax error in {str(location)}: {message}" diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 865575d14..d94872139 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -6,12 +6,11 @@ class TypeElement: def __init__(self, location: Location, name: str, documentation: str, options: list, nested_types: list): - - self.location: Location = location - self.name: str = name - self.documentation: str = documentation - self.options: list = options - self.nested_types: list = nested_types + self.location = location + self.name = name + self.documentation = documentation + self.options = options + self.nested_types = nested_types def to_schema(self) -> str: pass diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 9fbef76fe..351e8d156 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -15,8 +15,8 @@ baseurl = "http://localhost:8081" -compatibility_test_url = "https://raw.githubusercontent.com/confluentinc/schema-registry/master/protobuf-provider/" + \ - "src/test/resources/diff-schema-examples.json" +compatibility_test_url = "https://raw.githubusercontent.com/confluentinc/schema-registry/0530b0107749512b997f49cc79fe423f21b43b87/" + \ + "protobuf-provider/src/test/resources/diff-schema-examples.json" def add_slashes(text: str) -> str: From 93fc461e84fff34684cc82928338cbe62c1313ff Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 10 Nov 2021 16:35:30 +0200 Subject: [PATCH 053/168] PR/styles fixup workaround --- karapace/protobuf/compare_type_storage.py | 27 +++++++---------------- karapace/protobuf/service_element.py | 15 ++++++++++--- karapace/protobuf/type_element.py | 7 ++++-- karapace/protobuf/utils.py | 8 ++++--- tests/integration/test_schema_protobuf.py | 3 ++- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 09f07c0bd..9ef3618f1 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -22,21 +22,13 @@ def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, name = prefix + '.' + type_element.name else: name = type_element.name - from karapace.protobuf.message_element import MessageElement from karapace.protobuf.field_element import FieldElement + if isinstance(type_element, MessageElement): # add support of MapEntry messages if 'map_entry' in type_element.options: - key: Optional[FieldElement] = None - value: Optional[FieldElement] = None - for f in type_element.fields: - if f.name == 'key': - key = f - break - for f in type_element.fields: - if f.name == 'value': - value = f - break + key: Optional[FieldElement] = next((f for f in type_element.fields if f.name == 'key'), None) + value: Optional[FieldElement] = next((f for f in type_element.fields if f.name == 'value'), None) types[name] = TypeRecordMap(package_name, type_element, key, value) else: types[name] = TypeRecord(package_name, type_element) @@ -67,9 +59,8 @@ def get_other_type(self, t: ProtoType) -> Union[None, 'TypeRecord', 'TypeRecordM return None def self_type_name(self, t: ProtoType) -> Optional[str]: - string: str = t.string - name: str - canonical_name: list = list(self.result.path) + string = t.string + canonical_name = list(self.result.path) if string[0] == '.': name = string[1:] if self.self_types.get(name): @@ -83,13 +74,12 @@ def self_type_name(self, t: ProtoType) -> Optional[str]: if pt is not None: return pretender canonical_name.pop() - if self.self_types.get(string) is not None: + if self.self_types.get(string): return string return None def other_type_name(self, t: ProtoType) -> Optional[str]: - string: str = t.string - name: str + string = t.string canonical_name: list = list(self.result.path) if string[0] == '.': name = string[1:] @@ -104,7 +94,7 @@ def other_type_name(self, t: ProtoType) -> Optional[str]: if pt is not None: return pretender canonical_name.pop() - if self.other_types.get(string) is not None: + if self.other_types.get(string): return string return None @@ -149,7 +139,6 @@ class TypeRecordMap(TypeRecord): def __init__(self, package_name: str, type_element: TypeElement, key: object, value: object): super().__init__(package_name, type_element) try: - from karapace.protobuf.field_element import FieldElement self.key = key self.value = value except Exception: diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 42c41bbe0..0728e925c 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -1,12 +1,21 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ServiceElement.kt - from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement +from karapace.protobuf.rpc_element import RpcElement from karapace.protobuf.utils import append_documentation, append_indented +from typing import List class ServiceElement: - def __init__(self, location: Location, name: str, documentation: str = "", rpcs: list = None, options: list = None): + def __init__( + self, + location: Location, + name: str, + documentation: str = "", + rpcs: List[RpcElement] = None, + options: List[OptionElement] = None + ): self.location = location self.name = name self.documentation = documentation @@ -14,7 +23,7 @@ def __init__(self, location: Location, name: str, documentation: str = "", rpcs: self.options = options or [] def to_schema(self) -> str: - result: list = list() + result: List[str] = [] append_documentation(result, self.documentation) result.append(f"service {self.name} {{") if self.options: diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index d94872139..2c1824fda 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -1,11 +1,14 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/TypeElement.kt - from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement +from typing import List class TypeElement: - def __init__(self, location: Location, name: str, documentation: str, options: list, nested_types: list): + def __init__( + self, location: Location, name: str, documentation: str, options: List[OptionElement], nested_types: List[object] + ): self.location = location self.name = name self.documentation = documentation diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index e70af8d03..eba32f738 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -1,13 +1,15 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/Util.kt +from typing import List + def protobuf_encode(a: str) -> str: # TODO: PROTOBUF return a -def append_documentation(data: list, documentation: str) -> None: +def append_documentation(data: List[str], documentation: str) -> None: if not documentation: return @@ -22,7 +24,7 @@ def append_documentation(data: list, documentation: str) -> None: data.append("\n") -def append_options(data: list, options: list) -> None: +def append_options(data: List[str], options: List[object]) -> None: count = len(options) if count == 1: data.append('[') @@ -49,7 +51,7 @@ def try_to_schema(obj: object) -> str: raise AttributeError -def append_indented(data: list, value: str) -> None: +def append_indented(data: List[str], value: str) -> None: lines = value.split("\n") if len(lines) > 1 and not lines[-1]: del lines[-1] diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 351e8d156..91a893b82 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -15,7 +15,8 @@ baseurl = "http://localhost:8081" -compatibility_test_url = "https://raw.githubusercontent.com/confluentinc/schema-registry/0530b0107749512b997f49cc79fe423f21b43b87/" + \ +compatibility_test_url = "https://raw.githubusercontent.com/confluentinc/schema-registry/" + \ + "0530b0107749512b997f49cc79fe423f21b43b87/" + \ "protobuf-provider/src/test/resources/diff-schema-examples.json" From 0b14b5bbb141a0be5855146c1035d60f7641bafc Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 10 Nov 2021 18:54:14 +0200 Subject: [PATCH 054/168] fixup --- karapace/protobuf/compare_type_storage.py | 70 +++++++++-------------- 1 file changed, 26 insertions(+), 44 deletions(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 9ef3618f1..9309d8098 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -5,6 +5,28 @@ from typing import Dict, List, Optional, Union +def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: dict) -> Optional[str]: + string = t.string + + if string.startswith('.'): + name = string[1:] + if types.get(name): + return name + return None + canonical_name = list(result_path) + if package_name: + canonical_name.insert(0, package_name) + while len(canonical_name) > 0: + pretender: str = ".".join(canonical_name) + '.' + string + pt = types.get(pretender) + if pt is not None: + return pretender + canonical_name.pop() + if types.get(string): + return string + return None + + class CompareTypes: def __init__(self, self_package_name: str, other_package_name: str, result: CompareResult): @@ -45,61 +67,21 @@ def add_other_type(self, package_name: str, type_element: TypeElement) -> None: self.add_a_type(package_name, package_name, type_element, self.other_types) def get_self_type(self, t: ProtoType) -> Union[None, 'TypeRecord', 'TypeRecordMap']: - name = self.self_type_name(t) + name = compute_name(t, self.result.path, self.self_package_name, self.self_types) if name is not None: type_record = self.self_types.get(name) return type_record return None def get_other_type(self, t: ProtoType) -> Union[None, 'TypeRecord', 'TypeRecordMap']: - name = self.other_type_name(t) + name = compute_name(t, self.result.path, self.other_package_name, self.other_types) if name is not None: type_record = self.other_types.get(name) return type_record return None - def self_type_name(self, t: ProtoType) -> Optional[str]: - string = t.string - canonical_name = list(self.result.path) - if string[0] == '.': - name = string[1:] - if self.self_types.get(name): - return name - return None - if self.self_package_name: - canonical_name.insert(0, self.self_package_name) - while len(canonical_name) > 0: - pretender: str = ".".join(canonical_name) + '.' + string - pt = self.self_types.get(pretender) - if pt is not None: - return pretender - canonical_name.pop() - if self.self_types.get(string): - return string - return None - - def other_type_name(self, t: ProtoType) -> Optional[str]: - string = t.string - canonical_name: list = list(self.result.path) - if string[0] == '.': - name = string[1:] - if self.other_types.get(name): - return name - return None - if self.other_package_name: - canonical_name.insert(0, self.other_package_name) - while len(canonical_name) > 0: - pretender: str = ".".join(canonical_name) + '.' + string - pt = self.other_types.get(pretender) - if pt is not None: - return pretender - canonical_name.pop() - if self.other_types.get(string): - return string - return None - def self_type_short_name(self, t: ProtoType) -> Optional[str]: - name = self.self_type_name(t) + name = compute_name(t, self.result.path, self.self_package_name, self.self_types) if name is None: raise IllegalArgumentException(f"Cannot determine message type {t}") type_record: TypeRecord = self.self_types.get(name) @@ -108,7 +90,7 @@ def self_type_short_name(self, t: ProtoType) -> Optional[str]: return name def other_type_short_name(self, t: ProtoType) -> Optional[str]: - name = self.other_type_name(t) + name = compute_name(t, self.result.path, self.other_package_name, self.other_types) if name is None: raise IllegalArgumentException(f"Cannot determine message type {t}") type_record: TypeRecord = self.other_types.get(name) From cb9b72ac4a77972a2a63795b70b35ab5823d47d7 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 10 Nov 2021 23:19:23 +0200 Subject: [PATCH 055/168] style workarounds --- karapace/protobuf/syntax_reader.py | 30 +++++++++++------------------- karapace/protobuf/type_element.py | 15 +++++++-------- karapace/protobuf/utils.py | 7 ------- 3 files changed, 18 insertions(+), 34 deletions(-) diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 8c3bf256c..6bfe27eee 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -5,20 +5,6 @@ from typing import Union -def hex_digit(c: str) -> int: - if ord(c) in range(ord('0'), ord('9') + 1): - return ord(c) - ord('0') - if ord(c) in range(ord('a'), ord('f') + 1): - return ord(c) - ord('a') + 10 - if ord(c) in range(ord('A'), ord('F') + 1): - return ord(c) - ord('A') + 10 - return -1 - - -def min_of(a: int, b: int) -> int: - return a if a < b else b - - class SyntaxReader: def __init__(self, data: str, location: Location): """ Next character to be read """ @@ -113,12 +99,18 @@ def read_quoted_string(self) -> str: self.newline() self.unexpected("unterminated string") + return "" def read_numeric_escape(self, radix: int, length: int) -> str: value = -1 - end_pos = min_of(self.pos + length, len(self.data)) + end_pos = min(self.pos + length, len(self.data)) + while self.pos < end_pos: - digit = hex_digit(self.data[self.pos]) + try: + digit = int(self.data[self.pos], radix) + except ValueError: + digit = -1 + if digit == -1 or digit >= radix: break @@ -187,11 +179,11 @@ def read_int(self) -> int: try: radix = 10 if tag.startswith("0x") or tag.startswith("0X"): - tag = tag[len("0x"):] radix = 16 return int(tag, radix) - except OSError as err: - print("OS error: {0}".format(err)) + + # except OSError as err: + # print("OS error: {0}".format(err)) except ValueError: self.unexpected(f"expected an integer but was {tag}") return -22 # this return never be called but mypy think we need it diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 2c1824fda..20b1659be 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -1,19 +1,18 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/TypeElement.kt +from dataclasses import dataclass from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement from typing import List +@dataclass(frozen=True) class TypeElement: - def __init__( - self, location: Location, name: str, documentation: str, options: List[OptionElement], nested_types: List[object] - ): - self.location = location - self.name = name - self.documentation = documentation - self.options = options - self.nested_types = nested_types + location: Location + name: str + documentation: str + options: List[OptionElement] + nested_types: List[object] def to_schema(self) -> str: pass diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index eba32f738..57b0bcef6 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -68,10 +68,3 @@ def append_indented(data: List[str], value: str) -> None: RESERVED_TAG_VALUE_START = 19000 RESERVED_TAG_VALUE_END = 19999 """ True if the supplied value is in the valid tag range and not reserved. """ - -# class MyInt(int): -# def is_valid_tag(self) -> bool: -# return (MIN_TAG_VALUE <= self <= RESERVED_TAG_VALUE_START) or\ -# (RESERVED_TAG_VALUE_END + 1 <= self <= MAX_TAG_VALUE + 1) - -# builtins.int = MyInt From 09b788fe7906ddd860bf9e56c21de02f363d7ced Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 20 Nov 2021 23:40:21 +0200 Subject: [PATCH 056/168] backup of debug workaround --- karapace/protobuf/protobuf_to_dict.py | 0 tests/unit/test_protobuf_serialization.py | 75 +++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 karapace/protobuf/protobuf_to_dict.py create mode 100644 tests/unit/test_protobuf_serialization.py diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/test_protobuf_serialization.py b/tests/unit/test_protobuf_serialization.py new file mode 100644 index 000000000..029bbbd16 --- /dev/null +++ b/tests/unit/test_protobuf_serialization.py @@ -0,0 +1,75 @@ +import logging + +from karapace.config import read_config +from karapace.serialization import ( + HEADER_FORMAT, InvalidMessageHeader, InvalidMessageSchema, InvalidPayload, SchemaRegistryDeserializer, + SchemaRegistrySerializer, START_BYTE +) +from tests.utils import test_objects_avro + +import avro +import copy +import io +import json +import pytest +import struct + +log = logging.getLogger(__name__) + + +async def make_ser_deser(config_path, mock_client): + with open(config_path) as handler: + config = read_config(handler) + serializer = SchemaRegistrySerializer(config_path=config_path, config=config) + deserializer = SchemaRegistryDeserializer(config_path=config_path, config=config) + await serializer.registry_client.close() + await deserializer.registry_client.close() + serializer.registry_client = mock_client + deserializer.registry_client = mock_client + return serializer, deserializer + + +async def test_happy_flow(default_config_path, mock_registry_client): + serializer, deserializer = await make_ser_deser(default_config_path, mock_registry_client) + for o in serializer, deserializer: + assert len(o.ids_to_schemas) == 0 + schema = await serializer.get_schema_for_subject("top") + for o in test_objects_avro: + assert o == await deserializer.deserialize(await serializer.serialize(schema, o)) + for o in serializer, deserializer: + assert len(o.ids_to_schemas) == 1 + assert 1 in o.ids_to_schemas + + +async def test_serialization_fails(default_config_path, mock_registry_client): + serializer, _ = await make_ser_deser(default_config_path, mock_registry_client) + with pytest.raises(InvalidMessageSchema): + schema = await serializer.get_schema_for_subject("topic") + await serializer.serialize(schema, {"foo": "bar"}) + + +async def test_deserialization_fails(default_config_path, mock_registry_client): + _, deserializer = await make_ser_deser(default_config_path, mock_registry_client) + invalid_header_payload = struct.pack(">bII", 1, 500, 500) + with pytest.raises(InvalidMessageHeader): + await deserializer.deserialize(invalid_header_payload) + + # for now we ignore the packed in schema id + invalid_data_payload = struct.pack(">bII", START_BYTE, 1, 500) + with pytest.raises(InvalidPayload): + await deserializer.deserialize(invalid_data_payload) + + # but we can pass in a perfectly fine doc belonging to a diff schema + schema = await mock_registry_client.get_schema_for_id(1) + schema = copy.deepcopy(schema.to_json()) + schema["name"] = "BadUser" + schema["fields"][0]["type"] = "int" + obj = {"name": 100, "favorite_number": 2, "favorite_color": "bar"} + writer = avro.io.DatumWriter(avro.io.schema.parse(json.dumps(schema))) + with io.BytesIO() as bio: + enc = avro.io.BinaryEncoder(bio) + bio.write(struct.pack(HEADER_FORMAT, START_BYTE, 1)) + writer.write(obj, enc) + enc_bytes = bio.getvalue() + with pytest.raises(InvalidPayload): + await deserializer.deserialize(enc_bytes) From 4409b5938d43197b4245763eca2874ad8577b3a4 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 20 Nov 2021 23:41:28 +0200 Subject: [PATCH 057/168] backup of debug workaround --- karapace/protobuf/exception.py | 18 ++ karapace/protobuf/io.py | 110 +++++++++-- karapace/protobuf/protobuf_to_dict.py | 212 ++++++++++++++++++++++ karapace/serialization.py | 27 ++- mypy.ini | 15 ++ tests/integration/test_rest_consumer.py | 36 ++++ tests/integration/test_schema_protobuf.py | 2 +- tests/unit/conftest.py | 23 ++- tests/unit/test_protobuf_schema.py | 7 + tests/unit/test_protobuf_serialization.py | 91 +++++----- tests/unit/test_serialization.py | 3 + tests/utils.py | 40 +++- 12 files changed, 512 insertions(+), 72 deletions(-) create mode 100644 mypy.ini diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index 89f6978a6..a603a3cbe 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -1,3 +1,6 @@ +import json + + def error(message: str) -> None: raise Exception(message) @@ -26,5 +29,20 @@ class ProtobufException(Error): """Generic Protobuf schema error.""" +class ProtobufTypeException(Error): + """Generic Protobuf type error.""" + + class SchemaParseException(ProtobufException): """Error while parsing a Protobuf schema descriptor.""" + + +class ProtobufSchemaResolutionException(ProtobufException): + def __init__(self, fail_msg, writer_schema=None, reader_schema=None): + writer_dump = json.dumps(json.loads(str(writer_schema)), indent=2) + reader_dump = json.dumps(json.loads(str(reader_schema)), indent=2) + if writer_schema: + fail_msg += "\nWriter's Schema: %s" % writer_dump + if reader_schema: + fail_msg += "\nReader's Schema: %s" % reader_dump + ProtobufException.__init__(self, fail_msg) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 570b34d7e..2b7870906 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -19,19 +19,29 @@ # limitations under the License. from io import BytesIO -from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException +from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, ProtobufTypeException from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.protobuf_to_dict import dict_to_protobuf, protobuf_to_dict from karapace.protobuf.schema import ProtobufSchema +from karapace.protobuf.type_element import TypeElement +import hashlib +import importlib import importlib.util import logging import os +ZERO_BYTE = b'\x00' + logger = logging.getLogger(__name__) -class ProtobufDatumReader(): - """Deserialize Avro-encoded data into a Python data structure.""" +def calculate_class_name(name: str) -> str: + return "c_" + hashlib.md5(name.encode('utf-8')).hexdigest() + + +class ProtobufDatumReader: + """Deserialize Protobuf-encoded data into a Python data structure.""" @staticmethod def check_props(schema_one, schema_two, prop_list): @@ -50,7 +60,7 @@ def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) def __init__(self, writer_schema=None, reader_schema=None): """ - As defined in the Avro specification, we call the schema encoded + As defined in the Protobuf specification, we call the schema encoded in the data the "writer's schema", and the schema expected by the reader the "reader's schema". """ @@ -69,7 +79,7 @@ def set_reader_schema(self, reader_schema): reader_schema = property(lambda self: self._reader_schema, set_reader_schema) @staticmethod - def read_varint(bio: BytesIO): + def read_varint(bio: BytesIO) -> int: """Read a variable-length integer. :returns: Integer @@ -107,13 +117,13 @@ def read_indexes(self, bio: BytesIO): def read(self, bio: BytesIO): if self.reader_schema is None: self.reader_schema = self.writer_schema - return self.read_data(self.writer_schema, self.reader_schema, bio) + return protobuf_to_dict(self.read_data(self.writer_schema, self.reader_schema, bio)) @staticmethod def find_message_name(schema: ProtobufSchema, indexes: list) -> str: result: list = [] dot: bool = False - types = schema.schema.types + types = schema.proto_file_element.types for index in indexes: if dot: result.append(".") @@ -144,19 +154,95 @@ def read_data(self, writer_schema, reader_schema, bio: BytesIO): indexes = self.read_indexes(bio) name = self.find_message_name(writer_schema, indexes) - - with open("tmp.proto", "w") as proto_text: + proto_name = calculate_class_name(str(writer_schema)) + with open(f"{proto_name}.proto", "w") as proto_text: proto_text.write(str(writer_schema)) proto_text.close() - os.system("protoc --python_out=./ tmp.proto") + os.system(f"protoc --python_out=./ {proto_name}.proto") - spec = importlib.util.spec_from_file_location("tmp_pb2", "./tmp_pb2.py") + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") tmp_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(tmp_module) class_to_call = getattr(tmp_module, name) class_instance = class_to_call() class_instance.ParseFromString(bio.read()) - # return class_instance + + +class ProtobufDatumWriter: + """ProtobufDatumWriter for generic python objects.""" + + def __init__(self, writer_schema=None): + self._writer_schema = writer_schema + a: ProtobufSchema = writer_schema + el: TypeElement + self._message_name = '' + for idx, el in enumerate(a.proto_file_element.types): + if isinstance(el, MessageElement): + self._message_name = el.name + self._message_index = idx + break + + if self._message_name == '': + raise ProtobufTypeException("No message in protobuf schema") + + # read/write properties + def set_writer_schema(self, writer_schema): + self._writer_schema = writer_schema + + writer_schema = property(lambda self: self._writer_schema, set_writer_schema) + + @staticmethod + def write_varint(bio: BytesIO, value): + + if value == 0: + bio.write(ZERO_BYTE) + return 1 + + written_bytes = 0 + while value > 0: + to_write = value & 0x7f + value = value >> 7 + + if value > 0: + to_write |= 0x80 + + bio.write(bytearray(to_write)[0]) + written_bytes += 1 + + return written_bytes + + def write_indexes(self, bio: BytesIO, value): + self.write_varint(bio, value) + + def write_index(self, writer: BytesIO): + self.write_indexes(writer, self._message_index) + + def write(self, datum: dict, writer: BytesIO): + # validate datum + + proto_name = calculate_class_name(str(self.writer_schema)) + with open(f"{proto_name}.proto", "w") as proto_text: + proto_text.write(str(self.writer_schema)) + proto_text.close() + + os.system(f"protoc --python_out=./ {proto_name}.proto") + name = self._message_name + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") + tmp_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(tmp_module) + class_to_call = getattr(tmp_module, name) + class_instance = class_to_call() + + try: + dict_to_protobuf(class_instance, datum) + except Exception: + raise ProtobufTypeException(self.writer_schema, datum) + + writer.write(class_instance.SerializeToString()) + + +if __name__ == '__main__': + raise Exception('Not a standalone module') diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index e69de29bb..e367dc836 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -0,0 +1,212 @@ +""" +This module provide a small Python library for creating dicts from protocol buffers +Module based on code : +https://github.com/advboxes/perceptron-benchmark/blob/master/perceptron/utils/protobuf/protobuf_to_dict.py +License: (Apache 2.0) https://github.com/advboxes/perceptron-benchmark/blob/master/LICENSE +""" + +from google.protobuf.descriptor import FieldDescriptor +from google.protobuf.message import Message + +__all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", "REVERSE_TYPE_CALLABLE_MAP"] + +EXTENSION_CONTAINER = '___X' + +TYPE_CALLABLE_MAP = { + FieldDescriptor.TYPE_DOUBLE: float, + FieldDescriptor.TYPE_FLOAT: float, + FieldDescriptor.TYPE_INT32: int, + FieldDescriptor.TYPE_INT64: int, + FieldDescriptor.TYPE_UINT32: int, + FieldDescriptor.TYPE_UINT64: int, + FieldDescriptor.TYPE_SINT32: int, + FieldDescriptor.TYPE_SINT64: int, + FieldDescriptor.TYPE_FIXED32: int, + FieldDescriptor.TYPE_FIXED64: int, + FieldDescriptor.TYPE_SFIXED32: int, + FieldDescriptor.TYPE_SFIXED64: int, + FieldDescriptor.TYPE_BOOL: bool, + FieldDescriptor.TYPE_STRING: str, + FieldDescriptor.TYPE_BYTES: lambda b: b.encode("base64"), + FieldDescriptor.TYPE_ENUM: int, +} + + +def repeated(type_callable): + """ + + Returns: + type_callable: + """ + + return lambda value_list: [type_callable(value) for value in value_list] + + +def enum_label_name(field, value): + """ enum label name + + Returns: + : label name + """ + return field.enum_type.values_by_number[int(value)].name + + +def protobuf_to_dict(pb, use_enum_labels=False): + """ protobuf to dict + + Args: + pb: data in proto format + use_enum_labels: ?? + Returns: + result_dict: data in dict format + + """ + type_callable_map = TYPE_CALLABLE_MAP + result_dict = {} + extensions = {} + for field, value in pb.ListFields(): + type_callable = _get_field_value_adaptor(pb, field, type_callable_map, use_enum_labels) + if field.label == FieldDescriptor.LABEL_REPEATED: + type_callable = repeated(type_callable) + + if field.is_extension: + extensions[str(field.number)] = type_callable(value) + continue + + result_dict[field.name] = type_callable(value) + + if extensions: + result_dict[EXTENSION_CONTAINER] = extensions + return result_dict + + +def _get_field_value_adaptor(pb, field, type_callable_map, use_enum_labels=False): + + if field.type == FieldDescriptor.TYPE_MESSAGE: + # recursively encode protobuf sub-message + return lambda pb: protobuf_to_dict(pb, use_enum_labels) + + if use_enum_labels and field.type == FieldDescriptor.TYPE_ENUM: + return lambda value: enum_label_name(field, value) + + if field.type in type_callable_map: + return type_callable_map[field.type] + + raise TypeError("Field %s.%s has unrecognised type id %d" % (pb.__class__.__name__, field.name, field.type)) + + +def get_bytes(value): + """ base64 decode + """ + + return value.decode('base64') + + +REVERSE_TYPE_CALLABLE_MAP = { + FieldDescriptor.TYPE_BYTES: get_bytes, +} + + +def dict_to_protobuf(pb_klass_or_instance, values, strict=True): + """Populates a protobuf model from a dictionary. + + :param pb_klass_or_instance: a protobuf message class, or an protobuf instance + :type pb_klass_or_instance: a type or instance of a subclass of google.protobuf.message.Message + :param dict values: a dictionary of values. Repeated and nested values are + fully supported. + :param bool strict: complain if keys in the map are not fields on the message. + """ + + # dict type_callable_map: a mapping of protobuf types to callables for setting + # values on the target instance. + type_callable_map = REVERSE_TYPE_CALLABLE_MAP + if isinstance(pb_klass_or_instance, Message): + instance = pb_klass_or_instance + else: + instance = pb_klass_or_instance() + return _dict_to_protobuf(instance, values, type_callable_map, strict) + + +def _get_field_mapping(pb, dict_value, strict): + field_mapping = [] + key = "" + for key, value in dict_value.items(): + if key == EXTENSION_CONTAINER: + continue + if key not in pb.DESCRIPTOR.fields_by_name: + if strict: + raise KeyError("%s does not have a field called %s" % (pb, key)) + continue + field_mapping.append((pb.DESCRIPTOR.fields_by_name[key], value, getattr(pb, key, None))) + + for ext_num, ext_val in dict_value.get(EXTENSION_CONTAINER, {}).items(): + try: + ext_num = int(ext_num) + except ValueError: + raise ValueError("Extension keys must be integers.") + # pylint: disable=protected-access + if ext_num not in pb._extensions_by_number: + if strict: + raise KeyError( + "%s does not have a extension with number %s. Perhaps you forgot \ + to import it?" % (pb, key) + ) + continue + # pylint: disable=protected-access + ext_field = pb._extensions_by_number[ext_num] + pb_val = None + pb_val = pb.Extensions[ext_field] + field_mapping.append((ext_field, ext_val, pb_val)) + + return field_mapping + + +def _dict_to_protobuf(pb, value, type_callable_map, strict): + """ dict to protobuf + + Args: + pb: data in dict format + + Returns: + pb: data in proto format + """ + + fields = _get_field_mapping(pb, value, strict) + + for field, input_value, pb_value in fields: + if field.label == FieldDescriptor.LABEL_REPEATED: + for item in input_value: + if field.type == FieldDescriptor.TYPE_MESSAGE: + m = pb_value.add() + _dict_to_protobuf(m, item, type_callable_map, strict) + elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, (str, bytes)): + pb_value.append(_string_to_enum(field, item)) + else: + pb_value.append(item) + continue + if field.type == FieldDescriptor.TYPE_MESSAGE: + _dict_to_protobuf(pb_value, input_value, type_callable_map, strict) + continue + + if field.type in type_callable_map: + input_value = type_callable_map[field.type](input_value) + + if field.is_extension: + pb.Extensions[field] = input_value + continue + + if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, (str, bytes)): + input_value = _string_to_enum(field, input_value) + + setattr(pb, field.name, input_value) + + return pb + + +def _string_to_enum(field, input_value): + enum_dict = field.enum_type.values_by_name + try: + input_value = enum_dict[input_value].number + except KeyError: + raise KeyError("`%s` is not a valid value for field `%s`" % (input_value, field.name)) + return input_value diff --git a/karapace/serialization.py b/karapace/serialization.py index d560faee5..8522465ab 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -1,7 +1,8 @@ from avro.io import BinaryDecoder, BinaryEncoder, DatumReader, DatumWriter from json import load from jsonschema import ValidationError -from karapace.protobuf.io import ProtobufDatumReader +from karapace.protobuf.exception import ProtobufTypeException +from karapace.protobuf.io import ProtobufDatumReader, ProtobufDatumWriter from karapace.schema_reader import InvalidSchema, SchemaType, TypedSchema from karapace.utils import Client, json_encode from typing import Dict, Optional @@ -184,13 +185,13 @@ def read_value(schema: TypedSchema, bio: io.BytesIO): reader = DatumReader(schema.schema) return reader.read(BinaryDecoder(bio)) if schema.schema_type is SchemaType.JSONSCHEMA: - value = load(bio) try: schema.schema.validate(value) except ValidationError as e: raise InvalidPayload from e return value + if schema.schema_type is SchemaType.PROTOBUF: reader = ProtobufDatumReader(schema.schema) return reader.read(bio) @@ -207,9 +208,12 @@ def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict): except ValidationError as e: raise InvalidPayload from e bio.write(json_encode(value, binary=True)) + elif schema.schema_type is SchemaType.PROTOBUF: # TODO: PROTOBUF* we need use protobuf validator there - bio.write(value) + writer = ProtobufDatumWriter(schema.schema) + writer.write_index(bio) + writer.write(value, bio) else: raise ValueError("Unknown schema type") @@ -220,11 +224,18 @@ async def serialize(self, schema: TypedSchema, value: dict) -> bytes: schema_id = self.schemas_to_ids[schema.__str__()] with io.BytesIO() as bio: bio.write(struct.pack(HEADER_FORMAT, START_BYTE, schema_id)) - try: - write_value(schema, bio, value) - return bio.getvalue() - except avro.io.AvroTypeException as e: - raise InvalidMessageSchema("Object does not fit to stored schema") from e + if schema.schema_type is SchemaType.PROTOBUF: + try: + write_value(schema, bio, value) + return bio.getvalue() + except ProtobufTypeException as e: + raise InvalidMessageSchema("Object does not fit to stored schema") from e + else: + try: + write_value(schema, bio, value) + return bio.getvalue() + except avro.io.AvroTypeException as e: + raise InvalidMessageSchema("Object does not fit to stored schema") from e class SchemaRegistryDeserializer(SchemaRegistrySerializerDeserializer): diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..566cd7a32 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,15 @@ +[mypy] +python_version = 3.7 +warn_redundant_casts = True + +[mypy-tests.unit.almond.flink.*] +ignore_errors = False +disallow_untyped_defs = True +disallow_incomplete_defs = True +check_untyped_defs = True +no_implicit_optional = True +warn_unused_ignores = True +warn_no_return = True +warn_unreachable = True +strict_equality = True + diff --git a/tests/integration/test_rest_consumer.py b/tests/integration/test_rest_consumer.py index b564ac754..533a4b824 100644 --- a/tests/integration/test_rest_consumer.py +++ b/tests/integration/test_rest_consumer.py @@ -311,3 +311,39 @@ async def test_publish_consume_avro(rest_async_client, admin_client, trail, sche data_values = [x["value"] for x in data] for expected, actual in zip(publish_payload, data_values): assert expected == actual, f"Expecting {actual} to be {expected}" + + +@pytest.mark.parametrize("schema_type", ["protobuf"]) +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_publish_consume_protobuf(rest_async_client, admin_client, trail, schema_type): + header = REST_HEADERS[schema_type] + group_name = "e2e_protobuf_group" + instance_id = await new_consumer(rest_async_client, group_name, fmt=schema_type, trail=trail) + assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" + consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" + tn = new_topic(admin_client) + assign_payload = {"partitions": [{"topic": tn, "partition": 0}]} + res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) + assert res.ok + publish_payload = schema_data[schema_type][1] + await repeat_until_successful_request( + rest_async_client.post, + f"topics/{tn}{trail}", + json_data={ + "value_schema": schema_data[schema_type][0], + "records": [{ + "value": o + } for o in publish_payload] + }, + headers=header, + error_msg="Unexpected response status for offset commit", + timeout=10, + sleep=1, + ) + resp = await rest_async_client.get(consume_path, headers=header) + assert resp.ok, f"Expected a successful response: {resp}" + data = resp.json() + assert len(data) == len(publish_payload), f"Expected to read test_objects from fetch request but got {data}" + data_values = [x["value"] for x in data] + for expected, actual in zip(publish_payload, data_values): + assert expected == actual, f"Expecting {actual} to be {expected}" diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 7076d31dc..91a893b82 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -31,7 +31,7 @@ def add_slashes(text: str) -> str: '\v': '\\v', '\'': "\\'", '\"': '\\"', - '\\': '\\\\ + '\\': '\\\\' } trans_table = str.maketrans(escape_dict) return text.translate(trans_table) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 70c8034de..5e0d45697 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,5 +1,6 @@ +from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.schema_reader import SchemaType, TypedSchema -from tests.utils import schema_avro_json +from tests.utils import schema_avro_json, schema_protobuf import pytest @@ -19,6 +20,26 @@ async def post_new_schema(self, *args, **kwargs): return 1 +class MockProtobufClient: + # pylint: disable=W0613 + def __init__(self, *args, **kwargs): + pass + + async def get_schema_for_id(self, *args, **kwargs): + return TypedSchema.parse(SchemaType.PROTOBUF, trim_margin(schema_protobuf)) + + async def get_latest_schema(self, *args, **kwargs): + return 1, TypedSchema.parse(SchemaType.PROTOBUF, trim_margin(schema_protobuf)) + + async def post_new_schema(self, *args, **kwargs): + return 1 + + @pytest.fixture(name="mock_registry_client") def create_basic_registry_client() -> MockClient: return MockClient() + + +@pytest.fixture(name="mock_protobuf_registry_client") +def create_basic_protobuf_registry_client() -> MockProtobufClient: + return MockProtobufClient() diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index bece9b241..d290a279f 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -1,4 +1,5 @@ from karapace.protobuf.compare_result import CompareResult +from karapace.protobuf.io import ProtobufDatumReader from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location from karapace.protobuf.schema import ProtobufSchema @@ -286,3 +287,9 @@ def test_protobuf_field_compatible_alter_to_oneof(): protobuf_schema1.compare(protobuf_schema2, result) assert result.is_compatible() + +def test_protobuf_deserializer(): + raw = b'\x00\x00\x00\x00\x01\x00\x08\x05\x10\x02' + reader = ProtobufDatumReader(schema.schema) + + return reader.read(bio) \ No newline at end of file diff --git a/tests/unit/test_protobuf_serialization.py b/tests/unit/test_protobuf_serialization.py index 029bbbd16..4d2379a91 100644 --- a/tests/unit/test_protobuf_serialization.py +++ b/tests/unit/test_protobuf_serialization.py @@ -1,18 +1,8 @@ -import logging - from karapace.config import read_config -from karapace.serialization import ( - HEADER_FORMAT, InvalidMessageHeader, InvalidMessageSchema, InvalidPayload, SchemaRegistryDeserializer, - SchemaRegistrySerializer, START_BYTE -) -from tests.utils import test_objects_avro +from karapace.serialization import SchemaRegistryDeserializer, SchemaRegistrySerializer +from tests.utils import test_objects_protobuf -import avro -import copy -import io -import json -import pytest -import struct +import logging log = logging.getLogger(__name__) @@ -29,47 +19,50 @@ async def make_ser_deser(config_path, mock_client): return serializer, deserializer -async def test_happy_flow(default_config_path, mock_registry_client): - serializer, deserializer = await make_ser_deser(default_config_path, mock_registry_client) +async def test_happy_flow(default_config_path, mock_protobuf_registry_client): + serializer, deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) for o in serializer, deserializer: assert len(o.ids_to_schemas) == 0 schema = await serializer.get_schema_for_subject("top") - for o in test_objects_avro: - assert o == await deserializer.deserialize(await serializer.serialize(schema, o)) + for o in test_objects_protobuf: + a = await serializer.serialize(schema, o) + u = await deserializer.deserialize(a) + assert o == u for o in serializer, deserializer: assert len(o.ids_to_schemas) == 1 assert 1 in o.ids_to_schemas -async def test_serialization_fails(default_config_path, mock_registry_client): - serializer, _ = await make_ser_deser(default_config_path, mock_registry_client) - with pytest.raises(InvalidMessageSchema): - schema = await serializer.get_schema_for_subject("topic") - await serializer.serialize(schema, {"foo": "bar"}) - - -async def test_deserialization_fails(default_config_path, mock_registry_client): - _, deserializer = await make_ser_deser(default_config_path, mock_registry_client) - invalid_header_payload = struct.pack(">bII", 1, 500, 500) - with pytest.raises(InvalidMessageHeader): - await deserializer.deserialize(invalid_header_payload) - - # for now we ignore the packed in schema id - invalid_data_payload = struct.pack(">bII", START_BYTE, 1, 500) - with pytest.raises(InvalidPayload): - await deserializer.deserialize(invalid_data_payload) - - # but we can pass in a perfectly fine doc belonging to a diff schema - schema = await mock_registry_client.get_schema_for_id(1) - schema = copy.deepcopy(schema.to_json()) - schema["name"] = "BadUser" - schema["fields"][0]["type"] = "int" - obj = {"name": 100, "favorite_number": 2, "favorite_color": "bar"} - writer = avro.io.DatumWriter(avro.io.schema.parse(json.dumps(schema))) - with io.BytesIO() as bio: - enc = avro.io.BinaryEncoder(bio) - bio.write(struct.pack(HEADER_FORMAT, START_BYTE, 1)) - writer.write(obj, enc) - enc_bytes = bio.getvalue() - with pytest.raises(InvalidPayload): - await deserializer.deserialize(enc_bytes) +# async def test_serialization_fails(default_config_path, mock_protobuf_registry_client): +# serializer, _ = await make_ser_deser(default_config_path, mock_protobuf_registry_client) +# with pytest.raises(InvalidMessageSchema): +# schema = await serializer.get_schema_for_subject("topic") +# await serializer.serialize(schema, {"foo": "bar"}) +# +# +# async def test_deserialization_fails(default_config_path, mock_protobuf_registry_client): +# _, deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) +# invalid_header_payload = struct.pack(">bII", 1, 500, 500) +# with pytest.raises(InvalidMessageHeader): +# await deserializer.deserialize(invalid_header_payload) +# +# # for now we ignore the packed in schema id +# invalid_data_payload = struct.pack(">bII", START_BYTE, 1, 500) +# with pytest.raises(InvalidPayload): +# await deserializer.deserialize(invalid_data_payload) +# +# # but we can pass in a perfectly fine doc belonging to a diff schema +# schema = await mock_protobuf_registry_client.get_schema_for_id(1) +# schema = copy.deepcopy(schema.to_json()) +# schema["name"] = "BadUser" +# schema["fields"][0]["type"] = "int" +# obj = {"name": 100, "favorite_number": 2, "favorite_color": "bar"} +# writer = avro.io.DatumWriter(avro.io.schema.parse(json.dumps(schema))) +# with io.BytesIO() as bio: +# enc = avro.io.BinaryEncoder(bio) +# bio.write(struct.pack(HEADER_FORMAT, START_BYTE, 1)) +# writer.write(obj, enc) +# enc_bytes = bio.getvalue() +# with pytest.raises(InvalidPayload): +# await deserializer.deserialize(enc_bytes) +# diff --git a/tests/unit/test_serialization.py b/tests/unit/test_serialization.py index 146a3c745..3ce4a19c0 100644 --- a/tests/unit/test_serialization.py +++ b/tests/unit/test_serialization.py @@ -9,9 +9,12 @@ import copy import io import json +import logging import pytest import struct +log = logging.getLogger(__name__) + async def make_ser_deser(config_path, mock_client): with open(config_path) as handler: diff --git a/tests/utils.py b/tests/utils.py index c504dfc66..840689bc1 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,7 @@ from aiohttp.client_exceptions import ClientOSError, ServerDisconnectedError from dataclasses import dataclass from kafka.errors import TopicAlreadyExistsError +from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.utils import Client from typing import Callable, List from urllib.parse import quote @@ -64,9 +65,42 @@ }, ] +# protobuf schemas in tests must be filtered by trim_margin() from kotlin_wrapper module + +schema_protobuf = """ +|syntax = "proto3"; +| +|option java_package = "com.codingharbour.protobuf"; +|option java_outer_classname = "TestEnumOrder"; +| +|message Message { +| int32 query = 1; +| Enum speed = 2; +|} +|enum Enum { +| HIGH = 0; +| MIDDLE = 1; +| LOW = 2; +|} +| +""" +schema_protobuf = trim_margin(schema_protobuf) + +test_objects_protobuf = [ + { + 'query': 5, + 'speed': 'LOW' + }, + { + 'query': 10, + 'speed': 'MIDDLE' + }, +] + schema_data = { "avro": (schema_avro_json, test_objects_avro), - "jsonschema": (schema_jsonschema_json, test_objects_jsonschema) + "jsonschema": (schema_jsonschema_json, test_objects_jsonschema), + "protobuf": (schema_protobuf, test_objects_protobuf) } second_schema_json = json.dumps({ @@ -98,6 +132,10 @@ "Content-Type": "application/vnd.kafka.avro.v2+json", "Accept": "application/vnd.kafka.avro.v2+json, application/vnd.kafka.v2+json, application/json, */*" }, + "protobuf": { + "Content-Type": "application/vnd.kafka.protobuf.v2+json", + "Accept": "application/vnd.kafka.protobuf.v2+json, application/vnd.kafka.v2+json, application/json, */*" + } } From 74b615a539757885196a817e886be8ad477107b1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 23 Nov 2021 12:54:49 +0200 Subject: [PATCH 058/168] workarounds --- karapace/protobuf/io.py | 2 +- karapace/protobuf/protobuf_to_dict.py | 266 ++++++++++++++++++-------- pytest.ini | 2 +- requirements.txt | 3 + tests/unit/test_protobuf_schema.py | 5 - tests/utils.py | 2 +- 6 files changed, 190 insertions(+), 90 deletions(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 2b7870906..3e4c22425 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -117,7 +117,7 @@ def read_indexes(self, bio: BytesIO): def read(self, bio: BytesIO): if self.reader_schema is None: self.reader_schema = self.writer_schema - return protobuf_to_dict(self.read_data(self.writer_schema, self.reader_schema, bio)) + return protobuf_to_dict(self.read_data(self.writer_schema, self.reader_schema, bio), True) @staticmethod def find_message_name(schema: ProtobufSchema, indexes: list) -> str: diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index e367dc836..9e566506a 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -1,71 +1,93 @@ """ This module provide a small Python library for creating dicts from protocol buffers Module based on code : -https://github.com/advboxes/perceptron-benchmark/blob/master/perceptron/utils/protobuf/protobuf_to_dict.py -License: (Apache 2.0) https://github.com/advboxes/perceptron-benchmark/blob/master/LICENSE +https://github.com/wearefair/protobuf-to-dict +LICENSE: https://github.com/wearefair/protobuf-to-dict/blob/master/LICENSE """ -from google.protobuf.descriptor import FieldDescriptor +# -*- coding:utf-8 -*- +import six +import datetime +from dateutil.parser import parse as date_parser + from google.protobuf.message import Message +from google.protobuf.descriptor import FieldDescriptor +from google.protobuf.timestamp_pb2 import Timestamp + +__all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", + "REVERSE_TYPE_CALLABLE_MAP"] + +Timestamp_type_name = 'Timestamp' + + +def datetime_to_timestamp(dt): + ts = Timestamp() + ts.FromDatetime(dt) + return ts + + +def timestamp_to_datetime(ts): + dt = ts.ToDatetime() + return dt -__all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", "REVERSE_TYPE_CALLABLE_MAP"] EXTENSION_CONTAINER = '___X' + TYPE_CALLABLE_MAP = { FieldDescriptor.TYPE_DOUBLE: float, FieldDescriptor.TYPE_FLOAT: float, FieldDescriptor.TYPE_INT32: int, - FieldDescriptor.TYPE_INT64: int, + FieldDescriptor.TYPE_INT64: int if six.PY3 else six.integer_types[1], FieldDescriptor.TYPE_UINT32: int, - FieldDescriptor.TYPE_UINT64: int, + FieldDescriptor.TYPE_UINT64: int if six.PY3 else six.integer_types[1], FieldDescriptor.TYPE_SINT32: int, - FieldDescriptor.TYPE_SINT64: int, + FieldDescriptor.TYPE_SINT64: int if six.PY3 else six.integer_types[1], FieldDescriptor.TYPE_FIXED32: int, - FieldDescriptor.TYPE_FIXED64: int, + FieldDescriptor.TYPE_FIXED64: int if six.PY3 else six.integer_types[1], FieldDescriptor.TYPE_SFIXED32: int, - FieldDescriptor.TYPE_SFIXED64: int, + FieldDescriptor.TYPE_SFIXED64: int if six.PY3 else six.integer_types[1], FieldDescriptor.TYPE_BOOL: bool, - FieldDescriptor.TYPE_STRING: str, - FieldDescriptor.TYPE_BYTES: lambda b: b.encode("base64"), + FieldDescriptor.TYPE_STRING: six.text_type, + FieldDescriptor.TYPE_BYTES: six.binary_type, FieldDescriptor.TYPE_ENUM: int, } def repeated(type_callable): - """ - - Returns: - type_callable: - """ - return lambda value_list: [type_callable(value) for value in value_list] -def enum_label_name(field, value): - """ enum label name - - Returns: - : label name - """ - return field.enum_type.values_by_number[int(value)].name +def enum_label_name(field, value, lowercase_enum_lables=False): + label = field.enum_type.values_by_number[int(value)].name + label = label.lower() if lowercase_enum_lables else label + return label -def protobuf_to_dict(pb, use_enum_labels=False): - """ protobuf to dict +def _is_map_entry(field): + return (field.type == FieldDescriptor.TYPE_MESSAGE and + field.message_type.has_options and + field.message_type.GetOptions().map_entry) - Args: - pb: data in proto format - use_enum_labels: ?? - Returns: - result_dict: data in dict format - """ - type_callable_map = TYPE_CALLABLE_MAP +def protobuf_to_dict(pb, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=False, + including_default_value_fields=False, lowercase_enum_lables=False): result_dict = {} extensions = {} for field, value in pb.ListFields(): - type_callable = _get_field_value_adaptor(pb, field, type_callable_map, use_enum_labels) + if field.message_type and field.message_type.has_options and field.message_type.GetOptions().map_entry: + result_dict[field.name] = dict() + value_field = field.message_type.fields_by_name['value'] + type_callable = _get_field_value_adaptor( + pb, value_field, type_callable_map, + use_enum_labels, including_default_value_fields, + lowercase_enum_lables) + for k, v in value.items(): + result_dict[field.name][k] = type_callable(v) + continue + type_callable = _get_field_value_adaptor(pb, field, type_callable_map, + use_enum_labels, including_default_value_fields, + lowercase_enum_lables) if field.label == FieldDescriptor.LABEL_REPEATED: type_callable = repeated(type_callable) @@ -75,67 +97,90 @@ def protobuf_to_dict(pb, use_enum_labels=False): result_dict[field.name] = type_callable(value) + # Serialize default value if including_default_value_fields is True. + if including_default_value_fields: + for field in pb.DESCRIPTOR.fields: + # Singular message fields and oneof fields will not be affected. + if (( + field.label != FieldDescriptor.LABEL_REPEATED and + field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE) or + field.containing_oneof): + continue + if field.name in result_dict: + # Skip the field which has been serailized already. + continue + if _is_map_entry(field): + result_dict[field.name] = {} + elif field.label == FieldDescriptor.LABEL_REPEATED: + result_dict[field.name] = [] + elif field.type == FieldDescriptor.TYPE_ENUM and use_enum_labels: + result_dict[field.name] = enum_label_name(field, field.default_value, lowercase_enum_lables) + else: + result_dict[field.name] = field.default_value + if extensions: result_dict[EXTENSION_CONTAINER] = extensions return result_dict -def _get_field_value_adaptor(pb, field, type_callable_map, use_enum_labels=False): +def _get_field_value_adaptor(pb, field, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=False, + including_default_value_fields=False, lowercase_enum_lables=False): + if field.message_type and field.message_type.name == Timestamp_type_name: + return timestamp_to_datetime if field.type == FieldDescriptor.TYPE_MESSAGE: # recursively encode protobuf sub-message - return lambda pb: protobuf_to_dict(pb, use_enum_labels) + return lambda pb: protobuf_to_dict( + pb, type_callable_map=type_callable_map, + use_enum_labels=use_enum_labels, + including_default_value_fields=including_default_value_fields, + lowercase_enum_lables=lowercase_enum_lables, + ) if use_enum_labels and field.type == FieldDescriptor.TYPE_ENUM: - return lambda value: enum_label_name(field, value) + return lambda value: enum_label_name(field, value, lowercase_enum_lables) if field.type in type_callable_map: return type_callable_map[field.type] - raise TypeError("Field %s.%s has unrecognised type id %d" % (pb.__class__.__name__, field.name, field.type)) - - -def get_bytes(value): - """ base64 decode - """ - - return value.decode('base64') + raise TypeError("Field %s.%s has unrecognised type id %d" % ( + pb.__class__.__name__, field.name, field.type)) REVERSE_TYPE_CALLABLE_MAP = { - FieldDescriptor.TYPE_BYTES: get_bytes, } -def dict_to_protobuf(pb_klass_or_instance, values, strict=True): +def dict_to_protobuf(pb_klass_or_instance, values, type_callable_map=REVERSE_TYPE_CALLABLE_MAP, + strict=True, ignore_none=False, use_date_parser_for_fields=None): """Populates a protobuf model from a dictionary. :param pb_klass_or_instance: a protobuf message class, or an protobuf instance :type pb_klass_or_instance: a type or instance of a subclass of google.protobuf.message.Message :param dict values: a dictionary of values. Repeated and nested values are fully supported. + :param dict type_callable_map: a mapping of protobuf types to callables for setting + values on the target instance. :param bool strict: complain if keys in the map are not fields on the message. + :param bool strict: ignore None-values of fields, treat them as empty field + :param bool strict: when false: accept enums both in lowercase and uppercase + :param list use_date_parser_for_fields: a list of fields that need to use date_parser """ - - # dict type_callable_map: a mapping of protobuf types to callables for setting - # values on the target instance. - type_callable_map = REVERSE_TYPE_CALLABLE_MAP if isinstance(pb_klass_or_instance, Message): instance = pb_klass_or_instance else: instance = pb_klass_or_instance() - return _dict_to_protobuf(instance, values, type_callable_map, strict) + return _dict_to_protobuf(instance, values, type_callable_map, strict, ignore_none, use_date_parser_for_fields) def _get_field_mapping(pb, dict_value, strict): field_mapping = [] - key = "" for key, value in dict_value.items(): if key == EXTENSION_CONTAINER: continue if key not in pb.DESCRIPTOR.fields_by_name: if strict: - raise KeyError("%s does not have a field called %s" % (pb, key)) + raise KeyError("%s does not have a field called %s" % (type(pb), key)) continue field_mapping.append((pb.DESCRIPTOR.fields_by_name[key], value, getattr(pb, key, None))) @@ -144,15 +189,10 @@ def _get_field_mapping(pb, dict_value, strict): ext_num = int(ext_num) except ValueError: raise ValueError("Extension keys must be integers.") - # pylint: disable=protected-access if ext_num not in pb._extensions_by_number: if strict: - raise KeyError( - "%s does not have a extension with number %s. Perhaps you forgot \ - to import it?" % (pb, key) - ) + raise KeyError("%s does not have a extension with number %s. Perhaps you forgot to import it?" % (pb, key)) continue - # pylint: disable=protected-access ext_field = pb._extensions_by_number[ext_num] pb_val = None pb_val = pb.Extensions[ext_field] @@ -161,31 +201,53 @@ def _get_field_mapping(pb, dict_value, strict): return field_mapping -def _dict_to_protobuf(pb, value, type_callable_map, strict): - """ dict to protobuf - - Args: - pb: data in dict format - - Returns: - pb: data in proto format - """ - +def _dict_to_protobuf(pb, value, type_callable_map, strict, ignore_none, use_date_parser_for_fields): fields = _get_field_mapping(pb, value, strict) for field, input_value, pb_value in fields: + if ignore_none and input_value is None: + continue if field.label == FieldDescriptor.LABEL_REPEATED: + if field.message_type and field.message_type.has_options and field.message_type.GetOptions().map_entry: + key_field = field.message_type.fields_by_name['key'] + value_field = field.message_type.fields_by_name['value'] + for key, value in input_value.items(): + if value_field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE: + _dict_to_protobuf(getattr(pb, field.name)[key], value, type_callable_map, strict, ignore_none, use_date_parser_for_fields) + else: + if ignore_none and value is None: + continue + try: + if key_field.type in type_callable_map: + key = type_callable_map[key_field.type](key) + if value_field.type in type_callable_map: + value = type_callable_map[value_field.type](value) + getattr(pb, field.name)[key] = value + except Exception as exc: + raise RuntimeError(f"type: {type(pb)}, field: {field.name}, value: {value}") from exc + continue for item in input_value: if field.type == FieldDescriptor.TYPE_MESSAGE: m = pb_value.add() - _dict_to_protobuf(m, item, type_callable_map, strict) - elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, (str, bytes)): - pb_value.append(_string_to_enum(field, item)) + _dict_to_protobuf(m, item, type_callable_map, strict, ignore_none, use_date_parser_for_fields) + elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, six.string_types): + pb_value.append(_string_to_enum(field, item, strict)) else: pb_value.append(item) continue - if field.type == FieldDescriptor.TYPE_MESSAGE: - _dict_to_protobuf(pb_value, input_value, type_callable_map, strict) + if isinstance(input_value, datetime.datetime): + input_value = datetime_to_timestamp(input_value) + # Instead of setattr we need to use CopyFrom for composite fields + # Otherwise we will get AttributeError: + # Assignment not allowed to composite field “field name” in protocol message object + getattr(pb, field.name).CopyFrom(input_value) + continue + elif use_date_parser_for_fields and field.name in use_date_parser_for_fields: + input_value = datetime_to_timestamp(date_parser(input_value)) + getattr(pb, field.name).CopyFrom(input_value) + continue + elif field.type == FieldDescriptor.TYPE_MESSAGE: + _dict_to_protobuf(pb_value, input_value, type_callable_map, strict, ignore_none, use_date_parser_for_fields) continue if field.type in type_callable_map: @@ -195,18 +257,58 @@ def _dict_to_protobuf(pb, value, type_callable_map, strict): pb.Extensions[field] = input_value continue - if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, (str, bytes)): - input_value = _string_to_enum(field, input_value) + if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, six.string_types): + input_value = _string_to_enum(field, input_value, strict) - setattr(pb, field.name, input_value) + try: + setattr(pb, field.name, input_value) + except Exception as exc: + raise RuntimeError(f"type: {type(pb)}, field: {field.name}, value: {value}") from exc return pb -def _string_to_enum(field, input_value): - enum_dict = field.enum_type.values_by_name +def _string_to_enum(field, input_value, strict=False): try: - input_value = enum_dict[input_value].number + input_value = field.enum_type.values_by_name[input_value].number except KeyError: - raise KeyError("`%s` is not a valid value for field `%s`" % (input_value, field.name)) + if strict: + raise KeyError("`%s` is not a valid value for field `%s`" % (input_value, field.name)) + else: + return _string_to_enum(field, input_value.upper(), strict=True) return input_value + + +def get_field_names_and_options(pb): + """ + Return a tuple of field names and options. + """ + desc = pb.DESCRIPTOR + + for field in desc.fields: + field_name = field.name + options_dict = {} + if field.has_options: + options = field.GetOptions() + for subfield, value in options.ListFields(): + options_dict[subfield.name] = value + yield field, field_name, options_dict + + +class FieldsMissing(ValueError): + pass + + +def validate_dict_for_required_pb_fields(pb, dic): + """ + Validate that the dictionary has all the required fields for creating a protobuffer object + from pb class. If a field is missing, raise FieldsMissing. + In order to mark a field as optional, add [(is_optional) = true] to the field. + Take a look at the tests for an example. + """ + missing_fields = [] + for field, field_name, field_options in get_field_names_and_options(pb): + if not field_options.get('is_optional', False) and field_name not in dic: + missing_fields.append(field_name) + if missing_fields: + raise FieldsMissing('Missing fields: {}'.format(', '.join(missing_fields))) diff --git a/pytest.ini b/pytest.ini index 3417588cc..3f731a9e9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] addopts = -ra -q --tb=short --showlocals --numprocesses auto -timeout = 60 +timeout = 600 diff --git a/requirements.txt b/requirements.txt index d4791526c..41bd71493 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,9 @@ jsonschema==3.2.0 lz4==3.0.2 requests==2.23.0 networkx==2.5 +six +python-dateutil==2.8.2 + # Patched dependencies # diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index d290a279f..5efc22562 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -288,8 +288,3 @@ def test_protobuf_field_compatible_alter_to_oneof(): assert result.is_compatible() -def test_protobuf_deserializer(): - raw = b'\x00\x00\x00\x00\x01\x00\x08\x05\x10\x02' - reader = ProtobufDatumReader(schema.schema) - - return reader.read(bio) \ No newline at end of file diff --git a/tests/utils.py b/tests/utils.py index 840689bc1..a3d0f5a30 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -89,7 +89,7 @@ test_objects_protobuf = [ { 'query': 5, - 'speed': 'LOW' + 'speed': 'HIGH' }, { 'query': 10, From 9423ba2e5de880d15a1ac80400644b28ff5d7faf Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 23 Nov 2021 22:33:51 +0200 Subject: [PATCH 059/168] fixup annotations --- karapace/protobuf/compare_result.py | 4 ++-- karapace/protobuf/compare_type_storage.py | 6 +++--- karapace/protobuf/enum_constant_element.py | 2 +- karapace/protobuf/enum_element.py | 4 +++- karapace/protobuf/exception.py | 4 ++-- karapace/protobuf/extend_element.py | 2 +- karapace/protobuf/extensions_element.py | 2 +- karapace/protobuf/field_element.py | 2 +- karapace/protobuf/group_element.py | 2 +- karapace/protobuf/kotlin_wrapper.py | 2 +- karapace/protobuf/location.py | 2 +- karapace/protobuf/message_element.py | 2 +- karapace/protobuf/one_of_element.py | 2 +- karapace/protobuf/option_element.py | 2 +- karapace/protobuf/option_reader.py | 4 ++-- karapace/protobuf/proto_file_element.py | 2 +- karapace/protobuf/proto_parser.py | 2 +- karapace/protobuf/proto_type.py | 6 +++--- karapace/protobuf/reserved_document.py | 2 +- karapace/protobuf/rpc_element.py | 2 +- karapace/protobuf/schema.py | 2 +- karapace/protobuf/service_element.py | 2 +- karapace/protobuf/syntax_reader.py | 2 +- karapace/protobuf/type_element.py | 2 +- karapace/serialization.py | 6 +++--- mypy.ini | 14 ++++++++++++++ 26 files changed, 50 insertions(+), 34 deletions(-) create mode 100644 mypy.ini diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index 6e9697195..10984d474 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -39,7 +39,7 @@ def is_compatible(self) -> bool: class ModificationRecord: - def __init__(self, modification: Modification, path: str): + def __init__(self, modification: Modification, path: str) -> None: self.modification = modification self.path = path if modification.is_compatible(): @@ -52,7 +52,7 @@ def to_str(self) -> str: class CompareResult: - def __init__(self): + def __init__(self) -> None: self.result: list = [] self.path: list = [] self.canonical_name: list = [] diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 9309d8098..d7b630909 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -28,7 +28,7 @@ def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: class CompareTypes: - def __init__(self, self_package_name: str, other_package_name: str, result: CompareResult): + def __init__(self, self_package_name: str, other_package_name: str, result: CompareResult) -> None: self.self_package_name = self_package_name self.other_package_name = other_package_name @@ -112,13 +112,13 @@ def unlock_message(self, message: object) -> bool: class TypeRecord: - def __init__(self, package_name: str, type_element: TypeElement): + def __init__(self, package_name: str, type_element: TypeElement) -> None: self.package_name = package_name self.type_element = type_element class TypeRecordMap(TypeRecord): - def __init__(self, package_name: str, type_element: TypeElement, key: object, value: object): + def __init__(self, package_name: str, type_element: TypeElement, key: object, value: object) -> None: super().__init__(package_name, type_element) try: self.key = key diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 24755da31..b0bf4846a 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -12,7 +12,7 @@ def __init__( tag: int, documentation: str = "", options: list = None, - ): + ) -> None: self.location = location self.name = name diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 151e3ae7f..215de8efa 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -10,7 +10,9 @@ class EnumElement(TypeElement): - def __init__(self, location: Location, name: str, documentation: str = "", options: list = None, constants: list = None): + def __init__( + self, location: Location, name: str, documentation: str = "", options: list = None, constants: list = None + ) -> None: # Enums do not allow nested type declarations. super().__init__(location, name, documentation, options or [], []) self.constants = constants or [] diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index 89f6978a6..b7e2e9cf5 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -7,13 +7,13 @@ class ProtobufParserRuntimeException(Exception): class IllegalStateException(Exception): - def __init__(self, message="IllegalStateException"): + def __init__(self, message="IllegalStateException") -> None: self.message = message super().__init__(self.message) class IllegalArgumentException(Exception): - def __init__(self, message="IllegalArgumentException"): + def __init__(self, message="IllegalArgumentException") -> None: self.message = message super().__init__(self.message) diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index 947456aaf..ac3d187b9 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -6,7 +6,7 @@ class ExtendElement: - def __init__(self, location: Location, name: str, documentation: str = "", fields: list = None): + def __init__(self, location: Location, name: str, documentation: str = "", fields: list = None) -> None: self.location = location self.name = name self.documentation = documentation diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 5349a9e86..5ebb28a80 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -7,7 +7,7 @@ class ExtensionsElement: - def __init__(self, location: Location, documentation: str = "", values: list = None): + def __init__(self, location: Location, documentation: str = "", values: list = None) -> None: self.location = location self.documentation = documentation """ An [Int] or [IntRange] tag. """ diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index a4acf9a72..acf9216fe 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -23,7 +23,7 @@ def __init__( tag: int = None, documentation: str = "", options: list = None - ): + ) -> None: self.location = location self.label = label self.element_type = element_type diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index 45a004659..0b96cecad 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -15,7 +15,7 @@ def __init__( tag: int, documentation: str = "", fields: list = None - ): + ) -> None: self.label = label self.location = location self.name = name diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index 0946c0343..67d4019f5 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -51,7 +51,7 @@ class OptionsList(list): class KotlinRange: - def __init__(self, minimum, maximum): + def __init__(self, minimum, maximum) -> None: self.minimum = minimum self.maximum = maximum diff --git a/karapace/protobuf/location.py b/karapace/protobuf/location.py index b59b886ae..a87f0626b 100644 --- a/karapace/protobuf/location.py +++ b/karapace/protobuf/location.py @@ -6,7 +6,7 @@ class Location: """ Locates a .proto file, or a self.position within a .proto file, on the file system """ - def __init__(self, base: str, path: str, line: int = -1, column: int = -1): + def __init__(self, base: str, path: str, line: int = -1, column: int = -1) -> None: """ str - The base directory of this location; path - The path to this location relative to [base] line - The line number of this location, or -1 for no specific line number diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index d7ad7794c..2014eb165 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -29,7 +29,7 @@ def __init__( one_ofs: List[OneOfElement] = None, extensions: List[ExtensionsElement] = None, groups: List[GroupElement] = None, - ): + ) -> None: super().__init__(location, name, documentation, options or [], nested_types or []) self.reserveds = reserveds or [] self.fields = fields or [] diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index fd5b714ce..540cba1e8 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -7,7 +7,7 @@ class OneOfElement: - def __init__(self, name: str, documentation: str = "", fields=None, groups=None, options=None): + def __init__(self, name: str, documentation: str = "", fields=None, groups=None, options=None) -> None: self.name = name self.documentation = documentation self.fields = fields or [] diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 0981f1315..b7e54bac2 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -21,7 +21,7 @@ class Kind(Enum): LIST = 6 OPTION = 7 - def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool = None): + def __init__(self, name: str, kind: Kind, value, is_parenthesized: bool = None) -> None: self.name = name self.kind = kind self.value = value diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index c502bc8f3..027b3622f 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -7,7 +7,7 @@ class KindAndValue: - def __init__(self, kind: OptionElement.Kind, value: object): + def __init__(self, kind: OptionElement.Kind, value: object) -> None: self.kind = kind self.value = value @@ -15,7 +15,7 @@ def __init__(self, kind: OptionElement.Kind, value: object): class OptionReader: reader: SyntaxReader - def __init__(self, reader: SyntaxReader): + def __init__(self, reader: SyntaxReader) -> None: self.reader = reader def read_options(self) -> list: diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index c37980287..5dfe72fe8 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -23,7 +23,7 @@ def __init__( services: list = None, extend_declarations: list = None, options: list = None - ): + ) -> None: if types is None: types = [] self.location = location diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 3b75ba77a..49dbcdcf0 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -68,7 +68,7 @@ def permits_extend(self) -> bool: class ProtoParser: - def __init__(self, location: Location, data: str): + def __init__(self, location: Location, data: str) -> None: self.location = location self.imports: List[str] = [] self.nested_types: List[str] = [] diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 8d822936e..5ff5507b1 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -11,7 +11,7 @@ from typing import Optional -def static_init(cls): +def static_init(cls) -> object: if getattr(cls, "static_init", None): cls.static_init() return cls @@ -25,7 +25,7 @@ def simple_name(self) -> str: return self.string[dot + 1:] @classmethod - def static_init(cls): + def static_init(cls) -> None: cls.BOOL = cls(True, "bool") cls.BYTES = cls(True, "bytes") cls.DOUBLE = cls(True, "double") @@ -76,7 +76,7 @@ def static_init(cls): def __init__( self, is_scalar: bool, string: str, key_type: Optional['ProtoType'] = None, value_type: Optional['ProtoType'] = None - ): + ) -> None: """ Creates a scalar or message type. """ if not key_type and not value_type: self.is_scalar = is_scalar diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 2a9d27185..e12499ce5 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -7,7 +7,7 @@ class ReservedElement: - def __init__(self, location: Location, documentation: str = "", values: list = None): + def __init__(self, location: Location, documentation: str = "", values: list = None) -> None: self.location = location self.documentation = documentation """ A [String] name or [Int] or [IntRange] tag. """ diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index 33f2a60ec..a48852094 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -16,7 +16,7 @@ def __init__( request_streaming: bool = False, response_streaming: bool = False, options: list = None - ): + ) -> None: self.location = location self.name = name self.documentation = documentation diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 6130a5a8d..4067055ad 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -100,7 +100,7 @@ def option_element_string(option: OptionElement) -> str: class ProtobufSchema: DEFAULT_LOCATION = Location.get("") - def __init__(self, schema: str): + def __init__(self, schema: str) -> None: if type(schema).__name__ != 'str': raise IllegalArgumentException("Non str type of schema string") self.dirty = schema diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 0728e925c..10a8d28dd 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -15,7 +15,7 @@ def __init__( documentation: str = "", rpcs: List[RpcElement] = None, options: List[OptionElement] = None - ): + ) -> None: self.location = location self.name = name self.documentation = documentation diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 6bfe27eee..e15af5af3 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -6,7 +6,7 @@ class SyntaxReader: - def __init__(self, data: str, location: Location): + def __init__(self, data: str, location: Location) -> None: """ Next character to be read """ self.pos = 0 """ The number of newline characters """ diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 20b1659be..76b84fd0e 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -6,7 +6,7 @@ from typing import List -@dataclass(frozen=True) +@dataclass class TypeElement: location: Location name: str diff --git a/karapace/serialization.py b/karapace/serialization.py index 9d57786e7..7c5c9f6ee 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -66,7 +66,7 @@ def topic_record_name_strategy(topic_name: str, record_name: str) -> str: class SchemaRegistryClient: - def __init__(self, schema_registry_url: str = "http://localhost:8081"): + def __init__(self, schema_registry_url: str = "http://localhost:8081") -> None: self.client = Client(server_uri=schema_registry_url, client=aiohttp.ClientSession()) self.base_url = schema_registry_url @@ -177,7 +177,7 @@ async def get_schema_for_id(self, schema_id: int) -> TypedSchema: return schema_typed -def read_value(schema: TypedSchema, bio: io.BytesIO): +def read_value(schema: TypedSchema, bio: io.BytesIO) -> object: if schema.schema_type is SchemaType.AVRO: reader = DatumReader(schema.schema) @@ -197,7 +197,7 @@ def read_value(schema: TypedSchema, bio: io.BytesIO): raise ValueError("Unknown schema type") -def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict): +def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict) -> None: if schema.schema_type is SchemaType.AVRO: writer = DatumWriter(schema.schema) writer.write(value, BinaryEncoder(bio)) diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..2da13169e --- /dev/null +++ b/mypy.ini @@ -0,0 +1,14 @@ +[mypy] +python_version = 3.7 +warn_redundant_casts = True + +[mypy-tests.unit.almond.flink.*] +ignore_errors = False +disallow_untyped_defs = True +disallow_incomplete_defs = True +check_untyped_defs = True +no_implicit_optional = True +warn_unused_ignores = True +warn_no_return = True +warn_unreachable = True +strict_equality = True From f1a081282bafe5d5cc076156d8a6dbfec0daac9c Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 24 Nov 2021 07:48:16 +0200 Subject: [PATCH 060/168] backup workaround --- karapace/protobuf/protobuf_to_dict.py | 85 ++++++++++++++++----------- mypy.ini | 1 - tests/unit/test_protobuf_schema.py | 1 - 3 files changed, 50 insertions(+), 37 deletions(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 9e566506a..12ec8663f 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -5,17 +5,16 @@ LICENSE: https://github.com/wearefair/protobuf-to-dict/blob/master/LICENSE """ -# -*- coding:utf-8 -*- -import six -import datetime from dateutil.parser import parse as date_parser - -from google.protobuf.message import Message from google.protobuf.descriptor import FieldDescriptor +from google.protobuf.message import Message from google.protobuf.timestamp_pb2 import Timestamp -__all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", - "REVERSE_TYPE_CALLABLE_MAP"] +import datetime +# -*- coding:utf-8 -*- +import six + +__all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", "REVERSE_TYPE_CALLABLE_MAP"] Timestamp_type_name = 'Timestamp' @@ -33,7 +32,6 @@ def timestamp_to_datetime(ts): EXTENSION_CONTAINER = '___X' - TYPE_CALLABLE_MAP = { FieldDescriptor.TYPE_DOUBLE: float, FieldDescriptor.TYPE_FLOAT: float, @@ -65,13 +63,19 @@ def enum_label_name(field, value, lowercase_enum_lables=False): def _is_map_entry(field): - return (field.type == FieldDescriptor.TYPE_MESSAGE and - field.message_type.has_options and - field.message_type.GetOptions().map_entry) - - -def protobuf_to_dict(pb, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=False, - including_default_value_fields=False, lowercase_enum_lables=False): + return ( + field.type == FieldDescriptor.TYPE_MESSAGE and field.message_type.has_options + and field.message_type.GetOptions().map_entry + ) + + +def protobuf_to_dict( + pb, + type_callable_map=TYPE_CALLABLE_MAP, + use_enum_labels=False, + including_default_value_fields=False, + lowercase_enum_lables=False +): result_dict = {} extensions = {} for field, value in pb.ListFields(): @@ -79,15 +83,14 @@ def protobuf_to_dict(pb, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=Fa result_dict[field.name] = dict() value_field = field.message_type.fields_by_name['value'] type_callable = _get_field_value_adaptor( - pb, value_field, type_callable_map, - use_enum_labels, including_default_value_fields, - lowercase_enum_lables) + pb, value_field, type_callable_map, use_enum_labels, including_default_value_fields, lowercase_enum_lables + ) for k, v in value.items(): result_dict[field.name][k] = type_callable(v) continue - type_callable = _get_field_value_adaptor(pb, field, type_callable_map, - use_enum_labels, including_default_value_fields, - lowercase_enum_lables) + type_callable = _get_field_value_adaptor( + pb, field, type_callable_map, use_enum_labels, including_default_value_fields, lowercase_enum_lables + ) if field.label == FieldDescriptor.LABEL_REPEATED: type_callable = repeated(type_callable) @@ -101,10 +104,8 @@ def protobuf_to_dict(pb, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=Fa if including_default_value_fields: for field in pb.DESCRIPTOR.fields: # Singular message fields and oneof fields will not be affected. - if (( - field.label != FieldDescriptor.LABEL_REPEATED and - field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE) or - field.containing_oneof): + if ((field.label != FieldDescriptor.LABEL_REPEATED and field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE) + or field.containing_oneof): continue if field.name in result_dict: # Skip the field which has been serailized already. @@ -123,15 +124,22 @@ def protobuf_to_dict(pb, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=Fa return result_dict -def _get_field_value_adaptor(pb, field, type_callable_map=TYPE_CALLABLE_MAP, use_enum_labels=False, - including_default_value_fields=False, lowercase_enum_lables=False): +def _get_field_value_adaptor( + pb, + field, + type_callable_map=TYPE_CALLABLE_MAP, + use_enum_labels=False, + including_default_value_fields=False, + lowercase_enum_lables=False +): if field.message_type and field.message_type.name == Timestamp_type_name: return timestamp_to_datetime if field.type == FieldDescriptor.TYPE_MESSAGE: # recursively encode protobuf sub-message return lambda pb: protobuf_to_dict( - pb, type_callable_map=type_callable_map, + pb, + type_callable_map=type_callable_map, use_enum_labels=use_enum_labels, including_default_value_fields=including_default_value_fields, lowercase_enum_lables=lowercase_enum_lables, @@ -143,16 +151,20 @@ def _get_field_value_adaptor(pb, field, type_callable_map=TYPE_CALLABLE_MAP, use if field.type in type_callable_map: return type_callable_map[field.type] - raise TypeError("Field %s.%s has unrecognised type id %d" % ( - pb.__class__.__name__, field.name, field.type)) + raise TypeError("Field %s.%s has unrecognised type id %d" % (pb.__class__.__name__, field.name, field.type)) -REVERSE_TYPE_CALLABLE_MAP = { -} +REVERSE_TYPE_CALLABLE_MAP = {} -def dict_to_protobuf(pb_klass_or_instance, values, type_callable_map=REVERSE_TYPE_CALLABLE_MAP, - strict=True, ignore_none=False, use_date_parser_for_fields=None): +def dict_to_protobuf( + pb_klass_or_instance, + values, + type_callable_map=REVERSE_TYPE_CALLABLE_MAP, + strict=True, + ignore_none=False, + use_date_parser_for_fields=None +): """Populates a protobuf model from a dictionary. :param pb_klass_or_instance: a protobuf message class, or an protobuf instance @@ -213,7 +225,10 @@ def _dict_to_protobuf(pb, value, type_callable_map, strict, ignore_none, use_dat value_field = field.message_type.fields_by_name['value'] for key, value in input_value.items(): if value_field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE: - _dict_to_protobuf(getattr(pb, field.name)[key], value, type_callable_map, strict, ignore_none, use_date_parser_for_fields) + _dict_to_protobuf( + getattr(pb, field.name)[key], value, type_callable_map, strict, ignore_none, + use_date_parser_for_fields + ) else: if ignore_none and value is None: continue diff --git a/mypy.ini b/mypy.ini index 566cd7a32..2da13169e 100644 --- a/mypy.ini +++ b/mypy.ini @@ -12,4 +12,3 @@ warn_unused_ignores = True warn_no_return = True warn_unreachable = True strict_equality = True - diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 5efc22562..2f4db8be5 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -287,4 +287,3 @@ def test_protobuf_field_compatible_alter_to_oneof(): protobuf_schema1.compare(protobuf_schema2, result) assert result.is_compatible() - From 11db3c5eba87ffa748de6533a68411fc5e9a0184 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 25 Nov 2021 00:40:40 +0200 Subject: [PATCH 061/168] Update karapace/protobuf/proto_file_element.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_file_element.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 5dfe72fe8..5b743ec4f 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -110,13 +110,11 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe other_indexes: dict = dict() compare_types = CompareTypes(self.package_name, other.package_name, result) type_: TypeElement - i = 0 - for type_ in self.types: + for i, type_ in enumerate(self.types): self_types[type_.name] = type_ self_indexes[type_.name] = i package_name = self.package_name if self.package_name else '' compare_types.add_self_type(package_name, type_) - i += 1 i = 0 for type_ in other.types: From 924f60a6748ea8703f8f2c68414b612cd12ba352 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 25 Nov 2021 00:41:27 +0200 Subject: [PATCH 062/168] Update karapace/protobuf/proto_file_element.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_file_element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 5b743ec4f..6e52249ee 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -113,7 +113,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for i, type_ in enumerate(self.types): self_types[type_.name] = type_ self_indexes[type_.name] = i - package_name = self.package_name if self.package_name else '' + package_name = self.package_name or '' compare_types.add_self_type(package_name, type_) i = 0 From deadd7c07b6de8d73c2e8a1d7fe207c4c8eef866 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 25 Nov 2021 00:43:01 +0200 Subject: [PATCH 063/168] fuxup --- karapace/protobuf/proto_file_element.py | 4 ++-- karapace/protobuf/protobuf_to_dict.py | 30 ++++++++++++++++--------- requirements.txt | 26 ++++++++++++++++++++- tests/unit/test_protobuf_schema.py | 1 - 4 files changed, 46 insertions(+), 15 deletions(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 5dfe72fe8..9346b194e 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -114,7 +114,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for type_ in self.types: self_types[type_.name] = type_ self_indexes[type_.name] = i - package_name = self.package_name if self.package_name else '' + package_name = self.package_name if or '' compare_types.add_self_type(package_name, type_) i += 1 @@ -122,7 +122,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for type_ in other.types: other_types[type_.name] = type_ other_indexes[type_.name] = i - package_name = other.package_name if other.package_name else '' + package_name = other.package_name or '' compare_types.add_other_type(package_name, type_) i += 1 diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 12ec8663f..6dcbf6e2f 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -1,3 +1,4 @@ +# -*- coding:utf-8 -*- """ This module provide a small Python library for creating dicts from protocol buffers Module based on code : @@ -6,12 +7,12 @@ """ from dateutil.parser import parse as date_parser +from frozendict import frozendict from google.protobuf.descriptor import FieldDescriptor from google.protobuf.message import Message from google.protobuf.timestamp_pb2 import Timestamp import datetime -# -*- coding:utf-8 -*- import six __all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", "REVERSE_TYPE_CALLABLE_MAP"] @@ -32,7 +33,7 @@ def timestamp_to_datetime(ts): EXTENSION_CONTAINER = '___X' -TYPE_CALLABLE_MAP = { +TYPE_CALLABLE_MAP = frozendict({ FieldDescriptor.TYPE_DOUBLE: float, FieldDescriptor.TYPE_FLOAT: float, FieldDescriptor.TYPE_INT32: int, @@ -49,20 +50,20 @@ def timestamp_to_datetime(ts): FieldDescriptor.TYPE_STRING: six.text_type, FieldDescriptor.TYPE_BYTES: six.binary_type, FieldDescriptor.TYPE_ENUM: int, -} +}) def repeated(type_callable): return lambda value_list: [type_callable(value) for value in value_list] -def enum_label_name(field, value, lowercase_enum_lables=False): +def enum_label_name(field, value, lowercase_enum_lables=False) -> str: label = field.enum_type.values_by_number[int(value)].name label = label.lower() if lowercase_enum_lables else label return label -def _is_map_entry(field): +def _is_map_entry(field) -> bool: return ( field.type == FieldDescriptor.TYPE_MESSAGE and field.message_type.has_options and field.message_type.GetOptions().map_entry @@ -75,7 +76,7 @@ def protobuf_to_dict( use_enum_labels=False, including_default_value_fields=False, lowercase_enum_lables=False -): +) -> dict: result_dict = {} extensions = {} for field, value in pb.ListFields(): @@ -107,6 +108,7 @@ def protobuf_to_dict( if ((field.label != FieldDescriptor.LABEL_REPEATED and field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE) or field.containing_oneof): continue + if field.name in result_dict: # Skip the field which has been serailized already. continue @@ -132,7 +134,6 @@ def _get_field_value_adaptor( including_default_value_fields=False, lowercase_enum_lables=False ): - if field.message_type and field.message_type.name == Timestamp_type_name: return timestamp_to_datetime if field.type == FieldDescriptor.TYPE_MESSAGE: @@ -154,7 +155,7 @@ def _get_field_value_adaptor( raise TypeError("Field %s.%s has unrecognised type id %d" % (pb.__class__.__name__, field.name, field.type)) -REVERSE_TYPE_CALLABLE_MAP = {} +REVERSE_TYPE_CALLABLE_MAP = frozendict({}) def dict_to_protobuf( @@ -164,9 +165,10 @@ def dict_to_protobuf( strict=True, ignore_none=False, use_date_parser_for_fields=None -): +) -> object: """Populates a protobuf model from a dictionary. + :param ignore_none: :param pb_klass_or_instance: a protobuf message class, or an protobuf instance :type pb_klass_or_instance: a type or instance of a subclass of google.protobuf.message.Message :param dict values: a dictionary of values. Repeated and nested values are @@ -187,6 +189,7 @@ def dict_to_protobuf( def _get_field_mapping(pb, dict_value, strict): field_mapping = [] + key: str = "" for key, value in dict_value.items(): if key == EXTENSION_CONTAINER: continue @@ -201,11 +204,15 @@ def _get_field_mapping(pb, dict_value, strict): ext_num = int(ext_num) except ValueError: raise ValueError("Extension keys must be integers.") + # pylint: disable=W0212 if ext_num not in pb._extensions_by_number: if strict: raise KeyError("%s does not have a extension with number %s. Perhaps you forgot to import it?" % (pb, key)) continue + # pylint: disable=W0212 + ext_field = pb._extensions_by_number[ext_num] + # noinspection PyUnusedLocal pb_val = None pb_val = pb.Extensions[ext_field] field_mapping.append((ext_field, ext_val, pb_val)) @@ -219,6 +226,7 @@ def _dict_to_protobuf(pb, value, type_callable_map, strict, ignore_none, use_dat for field, input_value, pb_value in fields: if ignore_none and input_value is None: continue + if field.label == FieldDescriptor.LABEL_REPEATED: if field.message_type and field.message_type.has_options and field.message_type.GetOptions().map_entry: key_field = field.message_type.fields_by_name['key'] @@ -250,6 +258,7 @@ def _dict_to_protobuf(pb, value, type_callable_map, strict, ignore_none, use_dat else: pb_value.append(item) continue + if isinstance(input_value, datetime.datetime): input_value = datetime_to_timestamp(input_value) # Instead of setattr we need to use CopyFrom for composite fields @@ -289,8 +298,7 @@ def _string_to_enum(field, input_value, strict=False): except KeyError: if strict: raise KeyError("`%s` is not a valid value for field `%s`" % (input_value, field.name)) - else: - return _string_to_enum(field, input_value.upper(), strict=True) + return _string_to_enum(field, input_value.upper(), strict=True) return input_value diff --git a/requirements.txt b/requirements.txt index 41bd71493..21a96adb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,10 +6,12 @@ jsonschema==3.2.0 lz4==3.0.2 requests==2.23.0 networkx==2.5 -six +six~=1.15.0 python-dateutil==2.8.2 + + # Patched dependencies # # Note: It is important to use commits to reference patched dependencies. This @@ -20,3 +22,25 @@ python-dateutil==2.8.2 # git+https://github.com/aiven/avro.git@513b153bac5040af6bba5847aef202adb680b67b#subdirectory=lang/py3/ git+git://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 + +pip~=21.0.1 +argparse~=1.4.0 +toml~=0.10.1 +zlib~=1.2.11 +wheel~=0.36.2 +openssl~=1.1.1l +py~=1.10.0 +attrs~=20.3.0 +avro-python3~=1.11.0+snapshot +idna~=2.10 +pytest~=6.2.2 +certifi~=2021.10.8 +chardet~=3.0.4 +urllib3~=1.25.11 +decorator~=4.4.2 +pyrsistent~=0.17.3 +setuptools~=50.3.0 +zipp~=3.4.0 +filelock~=3.0.12 +frozendict~=2.1.0 +protobuf~=3.14.0 diff --git a/tests/unit/test_protobuf_schema.py b/tests/unit/test_protobuf_schema.py index 2f4db8be5..bece9b241 100644 --- a/tests/unit/test_protobuf_schema.py +++ b/tests/unit/test_protobuf_schema.py @@ -1,5 +1,4 @@ from karapace.protobuf.compare_result import CompareResult -from karapace.protobuf.io import ProtobufDatumReader from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.protobuf.location import Location from karapace.protobuf.schema import ProtobufSchema From 08e66c44afc4bcfc14f0e64dbdc49d69bd427018 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 25 Nov 2021 00:44:40 +0200 Subject: [PATCH 064/168] fixup --- karapace/protobuf/proto_file_element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 6e52249ee..724c1d1b6 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -120,7 +120,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for type_ in other.types: other_types[type_.name] = type_ other_indexes[type_.name] = i - package_name = other.package_name if other.package_name else '' + package_name = other.package_name or '' compare_types.add_other_type(package_name, type_) i += 1 From 0d22250930aef44b322dc8fa5040039ea78f673f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 25 Nov 2021 00:59:49 +0200 Subject: [PATCH 065/168] fixup minor bugf --- karapace/protobuf/proto_file_element.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 724c1d1b6..618c51099 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -116,13 +116,11 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe package_name = self.package_name or '' compare_types.add_self_type(package_name, type_) - i = 0 - for type_ in other.types: + for i, type_ in enumerate(other.types): other_types[type_.name] = type_ other_indexes[type_.name] = i package_name = other.package_name or '' compare_types.add_other_type(package_name, type_) - i += 1 for name in chain(self_types.keys(), other_types.keys() - self_types.keys()): From 68d0a0a400fa7d04fcbf131e0c0e0e596715dd3e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 27 Nov 2021 01:06:24 +0200 Subject: [PATCH 066/168] backup working unit_test --- karapace/protobuf/proto_file_element.py | 2 +- karapace/protobuf/protobuf_to_dict.py | 33 +++++++++++++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 9346b194e..6b137baf1 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -114,7 +114,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for type_ in self.types: self_types[type_.name] = type_ self_indexes[type_.name] = i - package_name = self.package_name if or '' + package_name = self.package_name or '' compare_types.add_self_type(package_name, type_) i += 1 diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 6dcbf6e2f..8ff73aaca 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -19,9 +19,13 @@ Timestamp_type_name = 'Timestamp' +# pylint: disable=E1101 + def datetime_to_timestamp(dt): + ts = Timestamp() + ts.FromDatetime(dt) return ts @@ -70,13 +74,8 @@ def _is_map_entry(field) -> bool: ) -def protobuf_to_dict( - pb, - type_callable_map=TYPE_CALLABLE_MAP, - use_enum_labels=False, - including_default_value_fields=False, - lowercase_enum_lables=False -) -> dict: +def protobuf_to_dict(pb, use_enum_labels=True, including_default_value_fields=True, lowercase_enum_lables=False) -> dict: + type_callable_map = TYPE_CALLABLE_MAP result_dict = {} extensions = {} for field, value in pb.ListFields(): @@ -105,10 +104,10 @@ def protobuf_to_dict( if including_default_value_fields: for field in pb.DESCRIPTOR.fields: # Singular message fields and oneof fields will not be affected. - if ((field.label != FieldDescriptor.LABEL_REPEATED and field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE) - or field.containing_oneof): + if (field.label != FieldDescriptor.LABEL_REPEATED and field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE): + continue + if field.containing_oneof: continue - if field.name in result_dict: # Skip the field which has been serailized already. continue @@ -134,13 +133,13 @@ def _get_field_value_adaptor( including_default_value_fields=False, lowercase_enum_lables=False ): + if field.message_type and field.message_type.name == Timestamp_type_name: return timestamp_to_datetime if field.type == FieldDescriptor.TYPE_MESSAGE: # recursively encode protobuf sub-message return lambda pb: protobuf_to_dict( pb, - type_callable_map=type_callable_map, use_enum_labels=use_enum_labels, including_default_value_fields=including_default_value_fields, lowercase_enum_lables=lowercase_enum_lables, @@ -220,9 +219,9 @@ def _get_field_mapping(pb, dict_value, strict): return field_mapping -def _dict_to_protobuf(pb, value, type_callable_map, strict, ignore_none, use_date_parser_for_fields): - fields = _get_field_mapping(pb, value, strict) - +def _dict_to_protobuf(pb, value_, type_callable_map, strict, ignore_none, use_date_parser_for_fields): + fields = _get_field_mapping(pb, value_, strict) + value = value_ for field, input_value, pb_value in fields: if ignore_none and input_value is None: continue @@ -266,11 +265,13 @@ def _dict_to_protobuf(pb, value, type_callable_map, strict, ignore_none, use_dat # Assignment not allowed to composite field “field name” in protocol message object getattr(pb, field.name).CopyFrom(input_value) continue - elif use_date_parser_for_fields and field.name in use_date_parser_for_fields: + + if use_date_parser_for_fields and field.name in use_date_parser_for_fields: input_value = datetime_to_timestamp(date_parser(input_value)) getattr(pb, field.name).CopyFrom(input_value) continue - elif field.type == FieldDescriptor.TYPE_MESSAGE: + + if field.type == FieldDescriptor.TYPE_MESSAGE: _dict_to_protobuf(pb_value, input_value, type_callable_map, strict, ignore_none, use_date_parser_for_fields) continue From 71b13fc33dfb3f55d7aab23dc632188153b4f9d0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 29 Nov 2021 11:55:38 +0200 Subject: [PATCH 067/168] fixup --- karapace/kafka_rest_apis/consumer_manager.py | 2 +- karapace/rapu.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/karapace/kafka_rest_apis/consumer_manager.py b/karapace/kafka_rest_apis/consumer_manager.py index c278060f2..f2c1da66c 100644 --- a/karapace/kafka_rest_apis/consumer_manager.py +++ b/karapace/kafka_rest_apis/consumer_manager.py @@ -20,7 +20,7 @@ import time import uuid -KNOWN_FORMATS = {"json", "avro", "binary", "jsonschema"} +KNOWN_FORMATS = {"json", "avro", "binary", "jsonschema", "protobuf"} OFFSET_RESET_STRATEGIES = {"latest", "earliest"} TypedConsumer = namedtuple("TypedConsumer", ["consumer", "serialization_format", "config"]) diff --git a/karapace/rapu.py b/karapace/rapu.py index 348dabe75..a7757066e 100644 --- a/karapace/rapu.py +++ b/karapace/rapu.py @@ -43,7 +43,7 @@ # TODO -> accept more general values as well REST_CONTENT_TYPE_RE = re.compile( - r"application/((vnd\.kafka(\.(?Pavro|json|binary|jsonschema))?(\.(?Pv[12]))?" + r"application/((vnd\.kafka(\.(?Pavro|json|protobuf|binary|jsonschema))?(\.(?Pv[12]))?" r"\+(?Pjson))|(?Pjson|octet-stream))" ) REST_ACCEPT_RE = re.compile( From 529e63fc4a73591513fc47d6cda182d657d17c64 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 30 Nov 2021 22:24:23 +0200 Subject: [PATCH 068/168] fixups --- karapace/kafka_rest_apis/__init__.py | 4 ++-- karapace/kafka_rest_apis/consumer_manager.py | 2 +- karapace/rapu.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 4eb231ac1..c13d74de0 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -539,7 +539,7 @@ async def serialize( return json.dumps(obj).encode("utf8") if ser_format == "binary": return base64.b64decode(obj) - if ser_format in {"avro", "jsonschema"}: + if ser_format in {"avro", "jsonschema", "protobuf"}: return await self.schema_serialize(obj, schema_id) raise FormatError(f"Unknown format: {ser_format}") @@ -568,7 +568,7 @@ async def validate_publish_request_format(self, data: dict, formats: dict, conte sub_code=RESTErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, ) # disallow missing id and schema for any key/value list that has at least one populated element - if formats["embedded_format"] in {"avro", "jsonschema"}: + if formats["embedded_format"] in {"avro", "jsonschema", "protobuf"}: for prefix, code in zip(RECORD_KEYS, RECORD_CODES): if self.all_empty(data, prefix): continue diff --git a/karapace/kafka_rest_apis/consumer_manager.py b/karapace/kafka_rest_apis/consumer_manager.py index f2c1da66c..d33e0cd72 100644 --- a/karapace/kafka_rest_apis/consumer_manager.py +++ b/karapace/kafka_rest_apis/consumer_manager.py @@ -481,7 +481,7 @@ async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats async def deserialize(self, bytes_: bytes, fmt: str): if not bytes_: return None - if fmt in {"avro", "jsonschema"}: + if fmt in {"avro", "jsonschema", "protobuf"}: return await self.deserializer.deserialize(bytes_) if fmt == "json": return json.loads(bytes_.decode('utf-8')) diff --git a/karapace/rapu.py b/karapace/rapu.py index a7757066e..2c64dccbf 100644 --- a/karapace/rapu.py +++ b/karapace/rapu.py @@ -47,7 +47,7 @@ r"\+(?Pjson))|(?Pjson|octet-stream))" ) REST_ACCEPT_RE = re.compile( - r"(application|\*)/((vnd\.kafka(\.(?Pavro|json|binary|jsonschema))?(\.(?Pv[12]))?\+" + r"(application|\*)/((vnd\.kafka(\.(?Pavro|json|protobuf|binary|jsonschema))?(\.(?Pv[12]))?\+" r"(?Pjson))|(?Pjson|\*))" ) From bf1e3744617a121189042afe99f1cffe1688e563 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 30 Nov 2021 23:00:27 +0200 Subject: [PATCH 069/168] fixup lint warning --- karapace/rapu.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/karapace/rapu.py b/karapace/rapu.py index 2c64dccbf..ed2a6c508 100644 --- a/karapace/rapu.py +++ b/karapace/rapu.py @@ -46,8 +46,10 @@ r"application/((vnd\.kafka(\.(?Pavro|json|protobuf|binary|jsonschema))?(\.(?Pv[12]))?" r"\+(?Pjson))|(?Pjson|octet-stream))" ) + REST_ACCEPT_RE = re.compile( - r"(application|\*)/((vnd\.kafka(\.(?Pavro|json|protobuf|binary|jsonschema))?(\.(?Pv[12]))?\+" + r"(application|\*)/((vnd\.kafka(\.(?Pavro|json|" + r"protobuf|binary|jsonschema))?(\.(?Pv[12]))?\+" r"(?Pjson))|(?Pjson|\*))" ) From bab3f4c35a41bc12df2da31eda9f791f30d74f37 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 00:12:34 +0200 Subject: [PATCH 070/168] fixup --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 21a96adb0..275b0464b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,6 @@ argparse~=1.4.0 toml~=0.10.1 zlib~=1.2.11 wheel~=0.36.2 -openssl~=1.1.1l py~=1.10.0 attrs~=20.3.0 avro-python3~=1.11.0+snapshot From 01e776c0bcbadc68d2da94f4bd5cdd6e94315b5a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 00:14:55 +0200 Subject: [PATCH 071/168] fixup --- requirements.txt | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/requirements.txt b/requirements.txt index 275b0464b..b0c9c8978 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,8 +8,8 @@ requests==2.23.0 networkx==2.5 six~=1.15.0 python-dateutil==2.8.2 - - +frozendict~=2.1.0 +filelock~=3.0.12 # Patched dependencies @@ -23,23 +23,3 @@ python-dateutil==2.8.2 git+https://github.com/aiven/avro.git@513b153bac5040af6bba5847aef202adb680b67b#subdirectory=lang/py3/ git+git://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 -pip~=21.0.1 -argparse~=1.4.0 -toml~=0.10.1 -zlib~=1.2.11 -wheel~=0.36.2 -py~=1.10.0 -attrs~=20.3.0 -avro-python3~=1.11.0+snapshot -idna~=2.10 -pytest~=6.2.2 -certifi~=2021.10.8 -chardet~=3.0.4 -urllib3~=1.25.11 -decorator~=4.4.2 -pyrsistent~=0.17.3 -setuptools~=50.3.0 -zipp~=3.4.0 -filelock~=3.0.12 -frozendict~=2.1.0 -protobuf~=3.14.0 From ced47af206a7c12b761e28f9a2d78ba51bf7f166 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 00:21:12 +0200 Subject: [PATCH 072/168] fixup --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b0c9c8978..c00d603aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ six~=1.15.0 python-dateutil==2.8.2 frozendict~=2.1.0 filelock~=3.0.12 - +protobuf~=3.14.0 # Patched dependencies # From 9696121bd3a39b41a08ed34013baa52e1e9ae475 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 00:25:43 +0200 Subject: [PATCH 073/168] fixup --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c00d603aa..f55eeb682 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,3 @@ protobuf~=3.14.0 # git+https://github.com/aiven/avro.git@513b153bac5040af6bba5847aef202adb680b67b#subdirectory=lang/py3/ git+git://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 - From 217b217ea721a1b9fcab146c9604472c113cb01b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 09:17:58 +0200 Subject: [PATCH 074/168] investigate github issue --- tests/unit/test_any_tool.py | 51 +++++++++++++++++++++++++++++++++ tests/unit/test_proto_parser.py | 4 +-- 2 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 tests/unit/test_any_tool.py diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py new file mode 100644 index 000000000..4fae99380 --- /dev/null +++ b/tests/unit/test_any_tool.py @@ -0,0 +1,51 @@ +import importlib +import logging +import importlib.util + +from subprocess import Popen, PIPE, TimeoutExpired + +from karapace.protobuf.io import calculate_class_name +from karapace.protobuf.kotlin_wrapper import trim_margin + +log = logging.getLogger("KarapaceTests") + + +def test_protoc(): + proto: str = """ + |syntax = "proto3"; + |package com.instaclustr.protobuf; + |option java_outer_classname = "SimpleMessageProtos"; + |message SimpleMessage { + | string content = 1; + | string date_time = 2; + | string content2 = 3; + |} + | + """ + proto = trim_margin(proto) + + proto_name = calculate_class_name(str(proto)) + try: + with open(f"{proto_name}.proto", "w") as proto_text: + proto_text.write(str(proto)) + proto_text.close() + except Exception as e: # pylint: disable=broad-except + log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) + + proc = Popen(["protoc", "--python_out=./", f"{proto_name}.proto"], stdout=PIPE, stderr=PIPE, shell=True) + try: + out, err = proc.communicate(timeout=10) + log.info(out) + log.error(err) + except TimeoutExpired: + proc.kill() + + try: + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") + tmp_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(tmp_module) + + except Exception as e: # pylint: disable=broad-except + log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) + + assert True diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py index 69ca5a836..b7a52314f 100644 --- a/tests/unit/test_proto_parser.py +++ b/tests/unit/test_proto_parser.py @@ -28,7 +28,7 @@ def test_type_parsing(): - proto: str = """ + proto = """ |message Types { | required any f1 = 1; | required bool f2 = 2; @@ -52,7 +52,7 @@ def test_type_parsing(): | required nested.nested f20 = 20; |} """ - proto: str = trim_margin(proto) + proto = trim_margin(proto) expected = ProtoFileElement( location=location, From 1009450c7dc5345af154863e71825607f3802bd1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 10:08:36 +0200 Subject: [PATCH 075/168] lint fix --- tests/unit/test_any_tool.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 4fae99380..2cc12fe6e 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -1,11 +1,10 @@ -import importlib -import logging -import importlib.util - -from subprocess import Popen, PIPE, TimeoutExpired - from karapace.protobuf.io import calculate_class_name from karapace.protobuf.kotlin_wrapper import trim_margin +from subprocess import PIPE, Popen, TimeoutExpired + +import importlib +import importlib.util +import logging log = logging.getLogger("KarapaceTests") From 5ff8dcb132c7e759b0f3de50e867fc995374911e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 10:12:06 +0200 Subject: [PATCH 076/168] debug --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 3f731a9e9..a49e5f6b6 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] -addopts = -ra -q --tb=short --showlocals --numprocesses auto +addopts = -ra --tb=short --showlocals --numprocesses auto timeout = 600 From dd2c6baff7a4517dcdb9d1564acd18af4d33c01a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 10:28:37 +0200 Subject: [PATCH 077/168] fixup requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f55eeb682..65b3a9713 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ python-dateutil==2.8.2 frozendict~=2.1.0 filelock~=3.0.12 protobuf~=3.14.0 +protobuf3==0.2.1 # Patched dependencies # From a7c943a66b9d05c0c5676d447bfda635916f938f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 10:34:07 +0200 Subject: [PATCH 078/168] debug --- tests/unit/test_any_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 2cc12fe6e..81fc66272 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -47,4 +47,4 @@ def test_protoc(): except Exception as e: # pylint: disable=broad-except log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) - assert True + assert False From 34545e42d28065839d62d3e9560171684ebab06f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 11:01:25 +0200 Subject: [PATCH 079/168] Update tests.yml add protobuf instalation action to workflow --- .github/workflows/tests.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5bfd52bb9..8ff59881a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -27,7 +27,10 @@ jobs: - name: Install dependencies run: python -m pip install -r requirements-dev.txt - + + - name: Install Protoc + uses: arduino/setup-protoc@v1 + - name: Execute unit-tests run: make unittest From d9d9332b383106493b1cc70d17b83fbd52bbd910 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 11:38:48 +0200 Subject: [PATCH 080/168] debug --- tests/unit/test_any_tool.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 81fc66272..6003a5a4f 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -24,27 +24,35 @@ def test_protoc(): proto = trim_margin(proto) proto_name = calculate_class_name(str(proto)) + log.info(proto_name) try: with open(f"{proto_name}.proto", "w") as proto_text: proto_text.write(str(proto)) proto_text.close() except Exception as e: # pylint: disable=broad-except log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) + assert False, f"Cannot write Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" - proc = Popen(["protoc", "--python_out=./", f"{proto_name}.proto"], stdout=PIPE, stderr=PIPE, shell=True) + args = ["protoc", "--python_out=./", f"{proto_name}.proto"] + try: + proc = Popen(args, stdout=PIPE, stderr=PIPE, shell=False) + except FileNotFoundError as e: + assert False, f"Protoc not found. {e}" + except Exception as e: # pylint: disable=broad-except + log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) + assert False, f"Cannot execute protoc. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" try: out, err = proc.communicate(timeout=10) - log.info(out) - log.error(err) + assert out == b'' + assert err == b'' except TimeoutExpired: proc.kill() + assert False, "Timeout expired" try: spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") tmp_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(tmp_module) - except Exception as e: # pylint: disable=broad-except log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) - - assert False + assert False, f"Cannot execute protoc. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" From 7c837827bac062abc96cc4ae87ce39840bbad934 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 12:15:00 +0200 Subject: [PATCH 081/168] fixup --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8ff59881a..36c8802ac 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -27,10 +27,10 @@ jobs: - name: Install dependencies run: python -m pip install -r requirements-dev.txt - + - name: Install Protoc uses: arduino/setup-protoc@v1 - + - name: Execute unit-tests run: make unittest From 0548f932ce18a371da1aa2edd821b8526656e47e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 12:48:50 +0200 Subject: [PATCH 082/168] fixup --- tests/unit/test_any_tool.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 6003a5a4f..025f0621d 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -49,10 +49,13 @@ def test_protoc(): proc.kill() assert False, "Timeout expired" - try: - spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") - tmp_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(tmp_module) - except Exception as e: # pylint: disable=broad-except - log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) - assert False, f"Cannot execute protoc. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") + tmp_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(tmp_module) + + +# try: + +# except Exception as e: # pylint: disable=broad-except +# log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) +# assert False, f"Cannot execute protoc. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" From 28c71de139986c142be4448f1222dcc1b1282651 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 13:02:01 +0200 Subject: [PATCH 083/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 36c8802ac..6b5d88e6a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.7] steps: - uses: actions/checkout@v1 From 2004a1e78c686537d6fd1964ee5ba8a4764acbed Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 13:32:41 +0200 Subject: [PATCH 084/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6b5d88e6a..3ad6714a0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.7] + python-version: [3.7, 3.8] steps: - uses: actions/checkout@v1 From df13004ce9ad63c87a7d75ef5c5190f7aaa12753 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 13:52:19 +0200 Subject: [PATCH 085/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3ad6714a0..36c8802ac 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.7, 3.8] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v1 From cd362be2dc2e692214e5d7abc25e4188563227cf Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 15:07:33 +0200 Subject: [PATCH 086/168] lint fixups --- karapace/compatibility/protobuf/checks.py | 2 +- karapace/protobuf/compare_type_storage.py | 4 ++-- karapace/protobuf/enum_constant_element.py | 2 +- karapace/protobuf/enum_element.py | 6 +++--- karapace/protobuf/extend_element.py | 2 +- karapace/protobuf/field_element.py | 2 +- karapace/protobuf/kotlin_wrapper.py | 2 +- karapace/protobuf/message_element.py | 10 +++++----- karapace/protobuf/one_of_element.py | 6 +++--- karapace/protobuf/option_element.py | 2 +- karapace/protobuf/option_reader.py | 12 ++++++------ karapace/protobuf/proto_file_element.py | 8 ++++---- karapace/protobuf/proto_parser.py | 22 +++++++++++----------- karapace/protobuf/proto_type.py | 2 +- karapace/protobuf/protobuf_to_dict.py | 2 +- karapace/protobuf/reserved_document.py | 2 +- karapace/protobuf/rpc_element.py | 2 +- karapace/protobuf/schema.py | 2 +- karapace/protobuf/syntax_reader.py | 2 +- tests/integration/test_schema_protobuf.py | 2 +- 20 files changed, 47 insertions(+), 47 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 58c25cfcc..70f72ee8c 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -18,7 +18,7 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: Protobuf return SchemaCompatibilityResult.compatible() # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? - incompatibilities = list() + incompatibilities = [] record: ModificationRecord locations: set = set() messages: set = set() diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index d7b630909..fc1ae66a5 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -32,8 +32,8 @@ def __init__(self, self_package_name: str, other_package_name: str, result: Comp self.self_package_name = self_package_name self.other_package_name = other_package_name - self.self_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = dict() - self.other_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = dict() + self.self_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} + self.other_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} self.locked_messages: List[object] = [] self.environment: List[object] = [] self.result = result diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index b0bf4846a..0eb541540 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -21,7 +21,7 @@ def __init__( self.documentation = documentation or "" def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append(f"{self.name} = {self.tag}") if self.options: diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 215de8efa..9aa490b77 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -18,7 +18,7 @@ def __init__( self.constants = constants or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append(f"enum {self.name} {{") @@ -37,8 +37,8 @@ def to_schema(self) -> str: return "".join(result) def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTypes) -> None: - self_tags: dict = dict() - other_tags: dict = dict() + self_tags: dict = {} + other_tags: dict = {} constant: EnumConstantElement if types: pass diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index ac3d187b9..7cd9d2d3c 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -13,7 +13,7 @@ def __init__(self, location: Location, name: str, documentation: str = "", field self.fields = fields or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append(f"extend {self.name} {{") if self.fields: diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index acf9216fe..7a55337ec 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -35,7 +35,7 @@ def __init__( self.options = options or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) if self.label: diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index 67d4019f5..f40eee5e9 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -10,7 +10,7 @@ def check(q: bool, message: str) -> None: def trim_margin(s: str) -> str: lines = s.split("\n") - new_lines = list() + new_lines = [] if not textwrap.dedent(lines[0]): del lines[0] diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 2014eb165..42eb4e852 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -38,7 +38,7 @@ def __init__( self.groups = groups or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append(f"message {self.name} {{") if self.reserveds: @@ -85,10 +85,10 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare field: FieldElement subfield: FieldElement one_of: OneOfElement - self_tags: dict = dict() - other_tags: dict = dict() - self_one_ofs: dict = dict() - other_one_ofs: dict = dict() + self_tags: dict = {} + other_tags: dict = {} + self_one_ofs: dict = {} + other_one_ofs: dict = {} for field in self.fields: self_tags[field.tag] = field diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index 540cba1e8..1740e645b 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -15,7 +15,7 @@ def __init__(self, name: str, documentation: str = "", fields=None, groups=None, self.groups = groups or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append(f"oneof {self.name} {{") if self.options: @@ -35,8 +35,8 @@ def to_schema(self) -> str: return "".join(result) def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTypes) -> None: - self_tags: dict = dict() - other_tags: dict = dict() + self_tags: dict = {} + other_tags: dict = {} for field in self.fields: self_tags[field.tag] = field diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index b7e54bac2..2460a2db8 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -58,7 +58,7 @@ def append_options(options: list) -> str: def format_option_map(self, value: dict) -> str: keys = list(value.keys()) last_index = len(keys) - 1 - result: list = list() + result: list = [] for index, key in enumerate(keys): endl = "," if (index != last_index) else "" append_indented(result, f"{key}: {self.format_option_map_value(value[key])}{endl}") diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index 027b3622f..8dfcc7989 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -23,8 +23,8 @@ def read_options(self) -> list: list if no options are present. """ if not self.reader.peek_char('['): - return list() - result: list = list() + return [] + result: list = [] while True: result.append(self.read_option('=')) @@ -95,7 +95,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) """ if self.reader.read_char() != open_brace: raise AssertionError() - result: dict = dict() + result: dict = {} while True: if self.reader.peek_char(close_brace): # If we see the close brace, finish immediately. This handles :}/[] and ,}/,] cases. @@ -107,7 +107,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) if isinstance(value, OptionElement): nested = result.get(name) if not nested: - nested = dict() + nested = {} result[name] = nested nested[value.name] = value.value else: @@ -118,7 +118,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) elif isinstance(previous, list): # Add to previous List self.add_to_list(previous, value) else: - new_list: list = list() + new_list: list = [] new_list.append(previous) self.add_to_list(new_list, value) result[name] = new_list @@ -140,7 +140,7 @@ def read_list(self) -> list: separating values. """ self.reader.require('[') - result: list = list() + result: list = [] while True: # If we see the close brace, finish immediately. This handles [] and ,] cases. if self.reader.peek_char(']'): diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 618c51099..88281d828 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -104,10 +104,10 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe if self.syntax != other.syntax: result.add_modification(Modification.SYNTAX_ALTER) - self_types: dict = dict() - other_types: dict = dict() - self_indexes: dict = dict() - other_indexes: dict = dict() + self_types: dict = {} + other_types: dict = {} + self_indexes: dict = {} + other_indexes: dict = {} compare_types = CompareTypes(self.package_name, other.package_name, result) type_: TypeElement for i, type_ in enumerate(self.types): diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 49dbcdcf0..489af4f7d 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -265,8 +265,8 @@ def read_extend(self, location: Location, documentation: str) -> ExtendElement: def read_service(self, location: Location, documentation: str) -> ServiceElement: """ Reads a service declaration and returns it. """ name = self.reader.read_name() - rpcs = list() - options: list = list() + rpcs = [] + options: list = [] self.reader.require('{') while True: rpc_documentation = self.reader.read_documentation() @@ -292,8 +292,8 @@ def read_service(self, location: Location, documentation: str) -> ServiceElement def read_enum_element(self, location: Location, documentation: str) -> EnumElement: """ Reads an enumerated atype declaration and returns it. """ name = self.reader.read_name() - constants: list = list() - options: list = list() + constants: list = [] + options: list = [] self.reader.require("{") while True: value_documentation = self.reader.read_documentation() @@ -392,9 +392,9 @@ def strip_value(name: str, options: list) -> Union[None, str]: def read_one_of(self, documentation: str) -> OneOfElement: name: str = self.reader.read_name() - fields: list = list() - groups: list = list() - options: list = list() + fields: list = [] + groups: list = [] + options: list = [] self.reader.require("{") while True: @@ -429,7 +429,7 @@ def read_group( name = self.reader.read_word() self.reader.require("=") tag = self.reader.read_int() - fields: list = list() + fields: list = [] self.reader.require("{") while True: @@ -449,7 +449,7 @@ def read_group( def read_reserved(self, location: Location, documentation: str) -> ReservedElement: """ Reads a reserved tags and names list like "reserved 10, 12 to 14, 'foo';". """ - values: list = list() + values: list = [] while True: ch = self.reader.peek_char() if ch in ["\"", "'"]: @@ -483,7 +483,7 @@ def read_reserved(self, location: Location, documentation: str) -> ReservedEleme def read_extensions(self, location: Location, documentation: str) -> ExtensionsElement: """ Reads extensions like "extensions 101;" or "extensions 101 to max;". """ - values: list = list() + values: list = [] while True: start: int = self.reader.read_int() ch = self.reader.peek_char() @@ -558,7 +558,7 @@ def read_rpc(self, location: Location, documentation: str) -> RpcElement: self.reader.require(')') - options: list = list() + options: list = [] if self.reader.peek_char('{'): while True: rpc_documentation = self.reader.read_documentation() diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 5ff5507b1..d7621a596 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -64,7 +64,7 @@ def static_init(cls) -> None: cls.SFIXED64, cls.SINT32, cls.SINT64, cls.STRING, cls.UINT32, cls.UINT64 ] - cls.SCALAR_TYPES: dict = dict() + cls.SCALAR_TYPES: dict = {} for a in cls.SCALAR_TYPES_: cls.SCALAR_TYPES[a.string] = a diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 8ff73aaca..a38c518e6 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -80,7 +80,7 @@ def protobuf_to_dict(pb, use_enum_labels=True, including_default_value_fields=Tr extensions = {} for field, value in pb.ListFields(): if field.message_type and field.message_type.has_options and field.message_type.GetOptions().map_entry: - result_dict[field.name] = dict() + result_dict[field.name] = {} value_field = field.message_type.fields_by_name['value'] type_callable = _get_field_value_adaptor( pb, value_field, type_callable_map, use_enum_labels, including_default_value_fields, lowercase_enum_lables diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index e12499ce5..3d037d567 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -14,7 +14,7 @@ def __init__(self, location: Location, documentation: str = "", values: list = N self.values = values or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append("reserved ") diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index a48852094..3c8b86a61 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -27,7 +27,7 @@ def __init__( self.options = options or [] def to_schema(self) -> str: - result: list = list() + result: list = [] append_documentation(result, self.documentation) result.append(f"rpc {self.name} (") diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index 4067055ad..c1acc10c8 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -30,7 +30,7 @@ def add_slashes(text: str) -> str: def message_element_string(element: MessageElement) -> str: - result: list = list() + result: list = [] append_documentation(result, element.documentation) result.append(f"message {element.name} {{") if element.reserveds: diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index e15af5af3..f38371b08 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -217,7 +217,7 @@ def read_comment(self) -> str: self.pos += 1 result: str = "" if tval == ord('*'): - buffer: list = list() + buffer: list = [] start_of_line = True while self.pos + 1 < len(self.data): # pylint: disable=no-else-break diff --git a/tests/integration/test_schema_protobuf.py b/tests/integration/test_schema_protobuf.py index 91a893b82..327449acb 100644 --- a/tests/integration/test_schema_protobuf.py +++ b/tests/integration/test_schema_protobuf.py @@ -130,7 +130,7 @@ async def test_protobuf_schema_compatibility(registry_async_client: Client, trai class Schemas: url = requests.get(compatibility_test_url) sch = json.loads(url.text) - schemas = dict() + schemas = {} descriptions = [] max_count = 120 count = 0 From caa4fdd74997a19dfc1b31c9aff56712d138ede9 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 1 Dec 2021 15:09:17 +0200 Subject: [PATCH 087/168] fixup python workflow to 3.7 and 3.8 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6b5d88e6a..3ad6714a0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.7] + python-version: [3.7, 3.8] steps: - uses: actions/checkout@v1 From 0e479c0e6b262d6f9c460354853edce8bda555c5 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 2 Dec 2021 14:53:50 +0200 Subject: [PATCH 088/168] improving code style/tests --- karapace/protobuf/io.py | 25 ++++---- karapace/serialization.py | 32 +++++----- tests/unit/conftest.py | 7 ++- tests/unit/test_protobuf_serialization.py | 74 ++++++++++++----------- tests/utils.py | 29 +++++++++ 5 files changed, 102 insertions(+), 65 deletions(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 3e4c22425..cc9526b0b 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -54,9 +54,7 @@ def check_props(schema_one, schema_two, prop_list): def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: # TODO (serge): schema comparison by fields required - if str(writer_schema) == str(reader_schema): - return True - return False + return str(writer_schema) == str(reader_schema) def __init__(self, writer_schema=None, reader_schema=None): """ @@ -92,8 +90,7 @@ def read_varint(bio: BytesIO) -> int: if len(char) == 0: if read_bytes == 0: return 0 - # raise EOFError('EOF while reading varint, value is %i so far' % - # varint) + raise 'EOF while reading varint, value is %i so far' % varint byte = ord(char) varint += (byte & 0x7F) << (7 * read_bytes) @@ -104,7 +101,11 @@ def read_varint(bio: BytesIO) -> int: return varint def read_indexes(self, bio: BytesIO): - size: int = self.read_varint(bio) + try: + size: int = self.read_varint(bio) + except EOFError: + # TODO: change exception + raise IllegalArgumentException("problem with reading binary data") result = [] if size == 0: result.append(0) @@ -113,6 +114,7 @@ def read_indexes(self, bio: BytesIO): while i < size: result.append(self.read_varint(bio)) i += 1 + return result def read(self, bio: BytesIO): if self.reader_schema is None: @@ -122,14 +124,8 @@ def read(self, bio: BytesIO): @staticmethod def find_message_name(schema: ProtobufSchema, indexes: list) -> str: result: list = [] - dot: bool = False types = schema.proto_file_element.types for index in indexes: - if dot: - result.append(".") - else: - dot = True - try: message = types[index] except Exception: @@ -140,10 +136,11 @@ def find_message_name(schema: ProtobufSchema, indexes: list) -> str: types = message.nested_types else: raise IllegalArgumentException(f"Invalid message indexes: {indexes}") - + result.append(".") # for java we also need package name. But in case we will use protoc # for compiling to python we can ignore it at all - + if len(result) > 0: + result.pop() return "".join(result) def read_data(self, writer_schema, reader_schema, bio: BytesIO): diff --git a/karapace/serialization.py b/karapace/serialization.py index 5bf8b4741..eccd45d55 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -1,4 +1,5 @@ from avro.io import BinaryDecoder, BinaryEncoder, DatumReader, DatumWriter +from google.protobuf.message import DecodeError from json import load from jsonschema import ValidationError from karapace.protobuf.exception import ProtobufTypeException @@ -269,8 +270,12 @@ def read_value(schema: TypedSchema, bio: io.BytesIO) -> object: return value if schema.schema_type is SchemaType.PROTOBUF: - reader = ProtobufDatumReader(schema.schema) - return reader.read(bio) + try: + reader = ProtobufDatumReader(schema.schema) + return reader.read(bio) + except DecodeError as e: + raise InvalidPayload from e + raise ValueError("Unknown schema type") @@ -300,18 +305,13 @@ async def serialize(self, schema: TypedSchema, value: dict) -> bytes: schema_id = self.schemas_to_ids[schema.__str__()] with io.BytesIO() as bio: bio.write(struct.pack(HEADER_FORMAT, START_BYTE, schema_id)) - if schema.schema_type is SchemaType.PROTOBUF: - try: - write_value(schema, bio, value) - return bio.getvalue() - except ProtobufTypeException as e: - raise InvalidMessageSchema("Object does not fit to stored schema") from e - else: - try: - write_value(schema, bio, value) - return bio.getvalue() - except avro.io.AvroTypeException as e: - raise InvalidMessageSchema("Object does not fit to stored schema") from e + try: + write_value(schema, bio, value) + return bio.getvalue() + except ProtobufTypeException as e: + raise InvalidMessageSchema("Object does not fit to stored schema") from e + except avro.io.AvroTypeException as e: + raise InvalidMessageSchema("Object does not fit to stored schema") from e class SchemaRegistryDeserializer(SchemaRegistrySerializerDeserializer): @@ -324,9 +324,11 @@ async def deserialize(self, bytes_: bytes) -> dict: raise InvalidMessageHeader("Start byte is %x and should be %x" % (start_byte, START_BYTE)) try: schema = await self.get_schema_for_id(schema_id) + if schema is None: + raise InvalidPayload("No schema with ID from payload") ret_val = read_value(schema, bio) return ret_val except AssertionError as e: - raise InvalidPayload(f"Data does not contain a valid {schema.schema_type} message") from e + raise InvalidPayload("Data does not contain a valid message") from e except avro.io.SchemaResolutionException as e: raise InvalidPayload("Data cannot be decoded with provided schema") from e diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 5e0d45697..32a6934d1 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,6 +1,6 @@ from karapace.protobuf.kotlin_wrapper import trim_margin from karapace.schema_reader import SchemaType, TypedSchema -from tests.utils import schema_avro_json, schema_protobuf +from tests.utils import schema_avro_json, schema_protobuf, schema_protobuf2 import pytest @@ -25,7 +25,12 @@ class MockProtobufClient: def __init__(self, *args, **kwargs): pass + async def get_schema_for_id2(self, *args, **kwargs): + return TypedSchema.parse(SchemaType.PROTOBUF, trim_margin(schema_protobuf2)) + async def get_schema_for_id(self, *args, **kwargs): + if args[0] != 1: + return None return TypedSchema.parse(SchemaType.PROTOBUF, trim_margin(schema_protobuf)) async def get_latest_schema(self, *args, **kwargs): diff --git a/tests/unit/test_protobuf_serialization.py b/tests/unit/test_protobuf_serialization.py index 4d2379a91..b8183878f 100644 --- a/tests/unit/test_protobuf_serialization.py +++ b/tests/unit/test_protobuf_serialization.py @@ -1,8 +1,13 @@ from karapace.config import read_config -from karapace.serialization import SchemaRegistryDeserializer, SchemaRegistrySerializer -from tests.utils import test_objects_protobuf +from karapace.serialization import ( + InvalidMessageHeader, InvalidMessageSchema, InvalidPayload, SchemaRegistryDeserializer, SchemaRegistrySerializer, + START_BYTE +) +from tests.utils import test_fail_objects_protobuf, test_objects_protobuf import logging +import pytest +import struct log = logging.getLogger(__name__) @@ -33,36 +38,35 @@ async def test_happy_flow(default_config_path, mock_protobuf_registry_client): assert 1 in o.ids_to_schemas -# async def test_serialization_fails(default_config_path, mock_protobuf_registry_client): -# serializer, _ = await make_ser_deser(default_config_path, mock_protobuf_registry_client) -# with pytest.raises(InvalidMessageSchema): -# schema = await serializer.get_schema_for_subject("topic") -# await serializer.serialize(schema, {"foo": "bar"}) -# -# -# async def test_deserialization_fails(default_config_path, mock_protobuf_registry_client): -# _, deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) -# invalid_header_payload = struct.pack(">bII", 1, 500, 500) -# with pytest.raises(InvalidMessageHeader): -# await deserializer.deserialize(invalid_header_payload) -# -# # for now we ignore the packed in schema id -# invalid_data_payload = struct.pack(">bII", START_BYTE, 1, 500) -# with pytest.raises(InvalidPayload): -# await deserializer.deserialize(invalid_data_payload) -# -# # but we can pass in a perfectly fine doc belonging to a diff schema -# schema = await mock_protobuf_registry_client.get_schema_for_id(1) -# schema = copy.deepcopy(schema.to_json()) -# schema["name"] = "BadUser" -# schema["fields"][0]["type"] = "int" -# obj = {"name": 100, "favorite_number": 2, "favorite_color": "bar"} -# writer = avro.io.DatumWriter(avro.io.schema.parse(json.dumps(schema))) -# with io.BytesIO() as bio: -# enc = avro.io.BinaryEncoder(bio) -# bio.write(struct.pack(HEADER_FORMAT, START_BYTE, 1)) -# writer.write(obj, enc) -# enc_bytes = bio.getvalue() -# with pytest.raises(InvalidPayload): -# await deserializer.deserialize(enc_bytes) -# +async def test_serialization_fails(default_config_path, mock_protobuf_registry_client): + serializer, _ = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + with pytest.raises(InvalidMessageSchema): + schema = await serializer.get_schema_for_subject("top") + await serializer.serialize(schema, test_fail_objects_protobuf[0]) + + with pytest.raises(InvalidMessageSchema): + schema = await serializer.get_schema_for_subject("top") + await serializer.serialize(schema, test_fail_objects_protobuf[1]) + + +async def test_deserialization_fails(default_config_path, mock_protobuf_registry_client): + _, deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + invalid_header_payload = struct.pack(">bII", 1, 500, 500) + with pytest.raises(InvalidMessageHeader): + await deserializer.deserialize(invalid_header_payload) + + # wrong schema id (500) + invalid_data_payload = struct.pack(">bII", START_BYTE, 500, 500) + with pytest.raises(InvalidPayload): + await deserializer.deserialize(invalid_data_payload) + + +async def test_deserialization_fails2(default_config_path, mock_protobuf_registry_client): + _, deserializer = await make_ser_deser(default_config_path, mock_protobuf_registry_client) + invalid_header_payload = struct.pack(">bII", 1, 500, 500) + with pytest.raises(InvalidMessageHeader): + await deserializer.deserialize(invalid_header_payload) + + enc_bytes = b'\x00\x00\x00\x00\x01\x00\x02\x05\0x12' # wrong schema data (2) + with pytest.raises(InvalidPayload): + await deserializer.deserialize(enc_bytes) diff --git a/tests/utils.py b/tests/utils.py index a3d0f5a30..204f44608 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -86,6 +86,24 @@ """ schema_protobuf = trim_margin(schema_protobuf) +schema_protobuf2 = """ +|syntax = "proto3"; +| +|option java_package = "com.codingharbour.protobuf"; +|option java_outer_classname = "TestEnumOrder"; +| +|message Message { +| int32 query = 1; +|} +|enum Enum { +| HIGH = 0; +| MIDDLE = 1; +| LOW = 2; +|} +| +""" +schema_protobuf2 = trim_margin(schema_protobuf2) + test_objects_protobuf = [ { 'query': 5, @@ -97,6 +115,17 @@ }, ] +test_fail_objects_protobuf = [ + { + 'query': 'STR', + 'speed': 99 + }, + { + 'xx': 10, + 'bb': 'MIDDLE' + }, +] + schema_data = { "avro": (schema_avro_json, test_objects_avro), "jsonschema": (schema_jsonschema_json, test_objects_jsonschema), From 57579d9095a0fc158939a90c5d67727ea378e4a9 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 4 Dec 2021 23:15:33 +0200 Subject: [PATCH 089/168] debugging --- tests/unit/test_any_tool.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 025f0621d..66ff89abd 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -29,6 +29,7 @@ def test_protoc(): with open(f"{proto_name}.proto", "w") as proto_text: proto_text.write(str(proto)) proto_text.close() + except Exception as e: # pylint: disable=broad-except log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) assert False, f"Cannot write Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" @@ -48,6 +49,16 @@ def test_protoc(): except TimeoutExpired: proc.kill() assert False, "Timeout expired" + module_content = "" + try: + with open(f"{proto_name}.proto", "r") as proto_text: + module_content = proto_text.read() + proto_text.close() + print(module_content) + + except Exception as e: # pylint: disable=broad-except + log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) + assert False, f"Cannot read Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") tmp_module = importlib.util.module_from_spec(spec) From ecf9cbae77e7205d703cf3abfcee774a65b0c4dd Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 4 Dec 2021 23:42:44 +0200 Subject: [PATCH 090/168] debug --- tests/unit/test_any_tool.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 66ff89abd..3d4e2239b 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -51,7 +51,7 @@ def test_protoc(): assert False, "Timeout expired" module_content = "" try: - with open(f"{proto_name}.proto", "r") as proto_text: + with open(f"./{proto_name}_pb2.py", "r") as proto_text: module_content = proto_text.read() proto_text.close() print(module_content) @@ -60,6 +60,9 @@ def test_protoc(): log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) assert False, f"Cannot read Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" + print(module_content) + assert False + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") tmp_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(tmp_module) From 7b9ef6a3ea3bbb0dc19a46c86f1166c36fefa403 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 5 Dec 2021 14:49:37 +0200 Subject: [PATCH 091/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 36c8802ac..e0df973be 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.9] steps: - uses: actions/checkout@v1 From 1e3c31783e3976a455ac7218a97de7264408927d Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 7 Dec 2021 17:14:14 +0200 Subject: [PATCH 092/168] debug --- tests/unit/test_any_tool.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 3d4e2239b..2419e6070 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -10,6 +10,7 @@ def test_protoc(): + proto: str = """ |syntax = "proto3"; |package com.instaclustr.protobuf; @@ -55,21 +56,11 @@ def test_protoc(): module_content = proto_text.read() proto_text.close() print(module_content) - except Exception as e: # pylint: disable=broad-except log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) assert False, f"Cannot read Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" - print(module_content) - assert False - spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") tmp_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(tmp_module) - -# try: - -# except Exception as e: # pylint: disable=broad-except -# log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) -# assert False, f"Cannot execute protoc. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" From 35eac2ed3595ecc4b7a5813f5d9fd0f5b7528063 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 7 Dec 2021 17:39:11 +0200 Subject: [PATCH 093/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e0df973be..172c30921 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.9] + python-version: [3.10] steps: - uses: actions/checkout@v1 From 46e732ba00121da8a4921ad37070cd890aa7f068 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 7 Dec 2021 17:47:04 +0200 Subject: [PATCH 094/168] debug --- tests/unit/test_any_tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 2419e6070..bad37ef40 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -63,4 +63,3 @@ def test_protoc(): spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") tmp_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(tmp_module) - From 76afe9f3017138962175de0e2d50b944b7282cb5 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 7 Dec 2021 17:50:55 +0200 Subject: [PATCH 095/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 172c30921..6621a28db 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.10] + python-version: ["3.10"] steps: - uses: actions/checkout@v1 From 54717ee8570c7de07506f00f6ccf9b43a05e50d2 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 7 Dec 2021 19:09:10 +0200 Subject: [PATCH 096/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6621a28db..e0df973be 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.10"] + python-version: [3.9] steps: - uses: actions/checkout@v1 From 724b03bc6eff0776f95e27f444d6fdd0d2d837c3 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 10 Dec 2021 21:54:59 +0200 Subject: [PATCH 097/168] debug workflow --- .github/workflows/tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e0df973be..a086bebdf 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,6 +30,9 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v1 + with: + version: '3.13.0' + - name: Execute unit-tests run: make unittest From 7ff389e6e8a52e97fc9255bdd949664e5e75a25a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 10 Dec 2021 22:44:00 +0200 Subject: [PATCH 098/168] debug --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a086bebdf..62ce3948e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,7 +30,7 @@ jobs: - name: Install Protoc uses: arduino/setup-protoc@v1 - with: + with: version: '3.13.0' From 12166b9ec14a2904c693590541a18903bc960b72 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 10:04:03 +0200 Subject: [PATCH 099/168] Update karapace/protobuf/exception.py Co-authored-by: Augusto Hack --- karapace/protobuf/exception.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index d35a748e3..5096cf784 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -45,4 +45,4 @@ def __init__(self, fail_msg, writer_schema=None, reader_schema=None): fail_msg += "\nWriter's Schema: %s" % writer_dump if reader_schema: fail_msg += "\nReader's Schema: %s" % reader_dump - ProtobufException.__init__(self, fail_msg) + super().__init__(self, fail_msg) From 6e6b30e1f167a6f3d4f8c4711f4a9966a4b694ae Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 10:59:44 +0200 Subject: [PATCH 100/168] Update karapace/protobuf/io.py Co-authored-by: Augusto Hack --- karapace/protobuf/io.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index cc9526b0b..95e06bee7 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -45,10 +45,13 @@ class ProtobufDatumReader: @staticmethod def check_props(schema_one, schema_two, prop_list): - for prop in prop_list: - if getattr(schema_one, prop) != getattr(schema_two, prop): - return False - return True + try: + return all( + getattr(schema_one, prop) == getattr(schema_two, prop) + for prop in prop_list + ) + except AttributeError: + return False @staticmethod def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: From 853038fb012006bbb53827f92c62c1c6162d8148 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 11:01:39 +0200 Subject: [PATCH 101/168] fixup --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 62ce3948e..2ef29594f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: [3.9] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v1 From 84095fa76662d54820e1790b67b43d488989fdaa Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 11:02:05 +0200 Subject: [PATCH 102/168] fixup --- karapace/protobuf/exception.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index d35a748e3..7c8663c51 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -38,7 +38,7 @@ class SchemaParseException(ProtobufException): class ProtobufSchemaResolutionException(ProtobufException): - def __init__(self, fail_msg, writer_schema=None, reader_schema=None): + def __init__(self, fail_msg: str, writer_schema=None, reader_schema=None) -> None: writer_dump = json.dumps(json.loads(str(writer_schema)), indent=2) reader_dump = json.dumps(json.loads(str(reader_schema)), indent=2) if writer_schema: From 50f68fdec262a083004c17020ea52dd5c625afc3 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 12:03:49 +0200 Subject: [PATCH 103/168] Update karapace/protobuf/io.py Co-authored-by: Augusto Hack --- karapace/protobuf/io.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 95e06bee7..2cae0a057 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -109,15 +109,12 @@ def read_indexes(self, bio: BytesIO): except EOFError: # TODO: change exception raise IllegalArgumentException("problem with reading binary data") - result = [] if size == 0: - result.append(0) - return result - i = 0 - while i < size: - result.append(self.read_varint(bio)) - i += 1 - return result + return [0] + return [ + self.read_varint(bio) + for _ in range(size) + ] def read(self, bio: BytesIO): if self.reader_schema is None: From b7e4dfccc744e9f737f1530702a52a4b0dd95e8f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 12:04:56 +0200 Subject: [PATCH 104/168] Update karapace/protobuf/io.py Co-authored-by: Augusto Hack --- karapace/protobuf/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 2cae0a057..60270584c 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -122,7 +122,7 @@ def read(self, bio: BytesIO): return protobuf_to_dict(self.read_data(self.writer_schema, self.reader_schema, bio), True) @staticmethod - def find_message_name(schema: ProtobufSchema, indexes: list) -> str: + def find_message_name(schema: ProtobufSchema, indexes: List[int]) -> str: result: list = [] types = schema.proto_file_element.types for index in indexes: From 5bdafdfd85831d0bc05a94f26fcee0e081049ca7 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 12:05:42 +0200 Subject: [PATCH 105/168] fixup --- karapace/protobuf/io.py | 48 ++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 95e06bee7..17c272a64 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -19,7 +19,8 @@ # limitations under the License. from io import BytesIO -from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, ProtobufTypeException +from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, \ + ProtobufTypeException from karapace.protobuf.message_element import MessageElement from karapace.protobuf.protobuf_to_dict import dict_to_protobuf, protobuf_to_dict from karapace.protobuf.schema import ProtobufSchema @@ -46,10 +47,7 @@ class ProtobufDatumReader: @staticmethod def check_props(schema_one, schema_two, prop_list): try: - return all( - getattr(schema_one, prop) == getattr(schema_two, prop) - for prop in prop_list - ) + return all(getattr(schema_one, prop) == getattr(schema_two, prop) for prop in prop_list) except AttributeError: return False @@ -60,25 +58,13 @@ def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) return str(writer_schema) == str(reader_schema) def __init__(self, writer_schema=None, reader_schema=None): + """ As defined in the Protobuf specification, we call the schema encoded + in the data the "writer's schema", and the schema expected by the + reader the "reader's schema". """ - As defined in the Protobuf specification, we call the schema encoded - in the data the "writer's schema", and the schema expected by the - reader the "reader's schema". - """ self._writer_schema = writer_schema self._reader_schema = reader_schema - # read/write properties - def set_writer_schema(self, writer_schema): - self._writer_schema = writer_schema - - writer_schema = property(lambda self: self._writer_schema, set_writer_schema) - - def set_reader_schema(self, reader_schema): - self._reader_schema = reader_schema - - reader_schema = property(lambda self: self._reader_schema, set_reader_schema) - @staticmethod def read_varint(bio: BytesIO) -> int: """Read a variable-length integer. @@ -93,7 +79,7 @@ def read_varint(bio: BytesIO) -> int: if len(char) == 0: if read_bytes == 0: return 0 - raise 'EOF while reading varint, value is %i so far' % varint + raise EOFError(f"EOF while reading varint, value is {varint} so far") byte = ord(char) varint += (byte & 0x7F) << (7 * read_bytes) @@ -120,9 +106,9 @@ def read_indexes(self, bio: BytesIO): return result def read(self, bio: BytesIO): - if self.reader_schema is None: - self.reader_schema = self.writer_schema - return protobuf_to_dict(self.read_data(self.writer_schema, self.reader_schema, bio), True) + if self._reader_schema is None: + self._reader_schema = self._writer_schema + return protobuf_to_dict(self.read_data(self._writer_schema, self._reader_schema, bio), True) @staticmethod def find_message_name(schema: ProtobufSchema, indexes: list) -> str: @@ -131,7 +117,7 @@ def find_message_name(schema: ProtobufSchema, indexes: list) -> str: for index in indexes: try: message = types[index] - except Exception: + except IndexError: raise IllegalArgumentException(f"Invalid message indexes: {indexes}") if message and isinstance(message, MessageElement): @@ -139,12 +125,10 @@ def find_message_name(schema: ProtobufSchema, indexes: list) -> str: types = message.nested_types else: raise IllegalArgumentException(f"Invalid message indexes: {indexes}") - result.append(".") + # for java we also need package name. But in case we will use protoc # for compiling to python we can ignore it at all - if len(result) > 0: - result.pop() - return "".join(result) + return ".".join(result) def read_data(self, writer_schema, reader_schema, bio: BytesIO): # TODO (serge): check and polish it @@ -223,9 +207,9 @@ def write_index(self, writer: BytesIO): def write(self, datum: dict, writer: BytesIO): # validate datum - proto_name = calculate_class_name(str(self.writer_schema)) + proto_name = calculate_class_name(str(self._writer_schema)) with open(f"{proto_name}.proto", "w") as proto_text: - proto_text.write(str(self.writer_schema)) + proto_text.write(str(self._writer_schema)) proto_text.close() os.system(f"protoc --python_out=./ {proto_name}.proto") @@ -239,7 +223,7 @@ def write(self, datum: dict, writer: BytesIO): try: dict_to_protobuf(class_instance, datum) except Exception: - raise ProtobufTypeException(self.writer_schema, datum) + raise ProtobufTypeException(self._writer_schema, datum) writer.write(class_instance.SerializeToString()) From 06bccf76d6d226a9472371acba74cc5708702548 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 14:46:53 +0200 Subject: [PATCH 106/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 8ff73aaca..8f1d7ce9a 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -41,15 +41,15 @@ def timestamp_to_datetime(ts): FieldDescriptor.TYPE_DOUBLE: float, FieldDescriptor.TYPE_FLOAT: float, FieldDescriptor.TYPE_INT32: int, - FieldDescriptor.TYPE_INT64: int if six.PY3 else six.integer_types[1], + FieldDescriptor.TYPE_INT64: int, FieldDescriptor.TYPE_UINT32: int, - FieldDescriptor.TYPE_UINT64: int if six.PY3 else six.integer_types[1], + FieldDescriptor.TYPE_UINT64: int, FieldDescriptor.TYPE_SINT32: int, - FieldDescriptor.TYPE_SINT64: int if six.PY3 else six.integer_types[1], + FieldDescriptor.TYPE_SINT64: int, FieldDescriptor.TYPE_FIXED32: int, - FieldDescriptor.TYPE_FIXED64: int if six.PY3 else six.integer_types[1], + FieldDescriptor.TYPE_FIXED64: int, FieldDescriptor.TYPE_SFIXED32: int, - FieldDescriptor.TYPE_SFIXED64: int if six.PY3 else six.integer_types[1], + FieldDescriptor.TYPE_SFIXED64: int, FieldDescriptor.TYPE_BOOL: bool, FieldDescriptor.TYPE_STRING: six.text_type, FieldDescriptor.TYPE_BYTES: six.binary_type, From b5c345353d076820574adf4a35bbd68935453bea Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 14:48:02 +0200 Subject: [PATCH 107/168] fixup by hackaugusto suggestions --- .gitignore | 1 + karapace/config.py | 3 +- karapace/protobuf/exception.py | 4 - karapace/protobuf/io.py | 291 +++++++++++++------------- karapace/protobuf/proto_parser.py | 10 +- karapace/protobuf/protobuf_to_dict.py | 6 +- 6 files changed, 158 insertions(+), 157 deletions(-) diff --git a/.gitignore b/.gitignore index 4b8a06a7a..f4300531f 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ __pycache__/ /kafka_*/ venv /karapace/version.py +/runtime/* diff --git a/karapace/config.py b/karapace/config.py index c3736b687..5704c8598 100644 --- a/karapace/config.py +++ b/karapace/config.py @@ -52,7 +52,8 @@ "session_timeout_ms": 10000, "karapace_rest": False, "karapace_registry": False, - "master_election_strategy": "lowest" + "master_election_strategy": "lowest", + "protobuf_runtime_directory": "runtime" } DEFAULT_LOG_FORMAT_JOURNAL = "%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s" diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index e6a867bdd..6ff91595f 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -1,10 +1,6 @@ import json -def error(message: str) -> None: - raise Exception(message) - - class ProtobufParserRuntimeException(Exception): pass diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index e0a503180..be7e05e4b 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -2,6 +2,20 @@ # -*- mode: python -*- # -*- coding: utf-8 -*- +from io import BytesIO +from karapace import config +from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, ProtobufTypeException +from karapace.protobuf.message_element import MessageElement +from karapace.protobuf.protobuf_to_dict import dict_to_protobuf, protobuf_to_dict +from karapace.protobuf.schema import ProtobufSchema +from karapace.protobuf.type_element import TypeElement +from typing import Any, List + +import hashlib +import importlib +import importlib.util +import logging +import os # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -17,20 +31,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from io import BytesIO -from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, \ - ProtobufTypeException -from karapace.protobuf.message_element import MessageElement -from karapace.protobuf.protobuf_to_dict import dict_to_protobuf, protobuf_to_dict -from karapace.protobuf.schema import ProtobufSchema -from karapace.protobuf.type_element import TypeElement - -import hashlib -import importlib -import importlib.util -import logging -import os +import subprocess ZERO_BYTE = b'\x00' @@ -41,21 +42,116 @@ def calculate_class_name(name: str) -> str: return "c_" + hashlib.md5(name.encode('utf-8')).hexdigest() -class ProtobufDatumReader: - """Deserialize Protobuf-encoded data into a Python data structure.""" +def check_props(schema_one, schema_two, prop_list): + try: + return all(getattr(schema_one, prop) == getattr(schema_two, prop) for prop in prop_list) + except AttributeError: + return False + + +def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: + # TODO (serge): schema comparison by fields required + + return str(writer_schema) == str(reader_schema) + + +def read_varint(bio: BytesIO) -> int: + """Read a variable-length integer. + + :returns: Integer + """ + varint = 0 + read_bytes = 0 + + while True: + char = bio.read(1) + if len(char) == 0: + if read_bytes == 0: + return 0 + raise EOFError(f"EOF while reading varint, value is {varint} so far") + + byte = ord(char) + varint += (byte & 0x7F) << (7 * read_bytes) + + read_bytes += 1 + + if not byte & 0x80: + return varint - @staticmethod - def check_props(schema_one, schema_two, prop_list): + +def read_indexes(bio: BytesIO): + try: + size: int = read_varint(bio) + except EOFError: + # TODO: change exception + raise IllegalArgumentException("problem with reading binary data") + if size == 0: + return [0] + return [read_varint(bio) for _ in range(size)] + + +def find_message_name(schema: ProtobufSchema, indexes: List[int]) -> str: + result: list = [] + types = schema.proto_file_element.types + for index in indexes: try: - return all(getattr(schema_one, prop) == getattr(schema_two, prop) for prop in prop_list) - except AttributeError: - return False + message = types[index] + except IndexError: + raise IllegalArgumentException(f"Invalid message indexes: {indexes}") + + if message and isinstance(message, MessageElement): + result.append(message.name) + types = message.nested_types + else: + raise IllegalArgumentException(f"Invalid message indexes: {indexes}") + + # for java we also need package name. But in case we will use protoc + # for compiling to python we can ignore it at all + return ".".join(result) + + +def get_protobuf_class_instance(schema: ProtobufSchema, class_name: str) -> Any: + directory = config.DEFAULTS["protobuf_runtime_directory"] + proto_name = calculate_class_name(str(schema)) + proto_path = f"{directory}/{proto_name}.proto" + class_path = f"{directory}/{proto_name}_pb2.py" + if not os.path.isfile(proto_path): + with open(f"{directory}/{proto_name}.proto", "w") as proto_text: + proto_text.write(str(schema)) + proto_text.close() + + if not os.path.isfile(class_path): + complete = subprocess.run([ + "protoc", + f"--python_out={directory}", + f"{proto_name}.proto", + ], check=True) + if complete.returncode != 0: + raise OSError(f"A protoc error code: {complete.returncode}") + + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", class_path) + tmp_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(tmp_module) + class_to_call = getattr(tmp_module, class_name) + return class_to_call() + + +def read_data(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema, bio: BytesIO): + # TODO (serge): check and polish it + if not match_schemas(writer_schema, reader_schema): + fail_msg = 'Schemas do not match.' + raise ProtobufSchemaResolutionException(fail_msg, writer_schema, reader_schema) + + indexes = read_indexes(bio) + name = find_message_name(writer_schema, indexes) + class_instance = get_protobuf_class_instance(writer_schema, name) + class_instance.ParseFromString(bio.read()) - @staticmethod - def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: - # TODO (serge): schema comparison by fields required + return class_instance - return str(writer_schema) == str(reader_schema) + +class ProtobufDatumReader: + """Deserialize Protobuf-encoded data into a Python data structure.""" def __init__(self, writer_schema=None, reader_schema=None): """ As defined in the Protobuf specification, we call the schema encoded @@ -65,91 +161,38 @@ def __init__(self, writer_schema=None, reader_schema=None): self._writer_schema = writer_schema self._reader_schema = reader_schema - @staticmethod - def read_varint(bio: BytesIO) -> int: - """Read a variable-length integer. + def read(self, bio: BytesIO): + if self._reader_schema is None: + self._reader_schema = self._writer_schema + return protobuf_to_dict(read_data(self._writer_schema, self._reader_schema, bio), True) - :returns: Integer - """ - varint = 0 - read_bytes = 0 - while True: - char = bio.read(1) - if len(char) == 0: - if read_bytes == 0: - return 0 - raise EOFError(f"EOF while reading varint, value is {varint} so far") +def write_varint(bio: BytesIO, value: int) -> int: - byte = ord(char) - varint += (byte & 0x7F) << (7 * read_bytes) + if value < 0: + raise ValueError(f"value must not be negative, got {value}") - read_bytes += 1 + if value == 0: + bio.write(ZERO_BYTE) + return 1 - if not byte & 0x80: - return varint + written_bytes = 0 + while value > 0: + to_write = value & 0x7f + value = value >> 7 - def read_indexes(self, bio: BytesIO): - try: - size: int = self.read_varint(bio) - except EOFError: - # TODO: change exception - raise IllegalArgumentException("problem with reading binary data") - if size == 0: - return [0] - return [ - self.read_varint(bio) - for _ in range(size) - ] + if value > 0: + to_write |= 0x80 - def read(self, bio: BytesIO): - if self._reader_schema is None: - self._reader_schema = self._writer_schema - return protobuf_to_dict(self.read_data(self._writer_schema, self._reader_schema, bio), True) - - @staticmethod - def find_message_name(schema: ProtobufSchema, indexes: List[int]) -> str: - result: list = [] - types = schema.proto_file_element.types - for index in indexes: - try: - message = types[index] - except IndexError: - raise IllegalArgumentException(f"Invalid message indexes: {indexes}") - - if message and isinstance(message, MessageElement): - result.append(message.name) - types = message.nested_types - else: - raise IllegalArgumentException(f"Invalid message indexes: {indexes}") - - # for java we also need package name. But in case we will use protoc - # for compiling to python we can ignore it at all - return ".".join(result) - - def read_data(self, writer_schema, reader_schema, bio: BytesIO): - # TODO (serge): check and polish it - if not ProtobufDatumReader.match_schemas(writer_schema, reader_schema): - fail_msg = 'Schemas do not match.' - raise ProtobufSchemaResolutionException(fail_msg, writer_schema, reader_schema) - - indexes = self.read_indexes(bio) - name = self.find_message_name(writer_schema, indexes) - proto_name = calculate_class_name(str(writer_schema)) - with open(f"{proto_name}.proto", "w") as proto_text: - proto_text.write(str(writer_schema)) - proto_text.close() + bio.write(bytearray(to_write)[0]) + written_bytes += 1 - os.system(f"protoc --python_out=./ {proto_name}.proto") + return written_bytes - spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") - tmp_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(tmp_module) - class_to_call = getattr(tmp_module, name) - class_instance = class_to_call() - class_instance.ParseFromString(bio.read()) - return class_instance +def write_indexes(bio: BytesIO, indexes: List[int]) -> None: + for i in indexes: + write_varint(bio, i) class ProtobufDatumWriter: @@ -169,53 +212,13 @@ def __init__(self, writer_schema=None): if self._message_name == '': raise ProtobufTypeException("No message in protobuf schema") - # read/write properties - def set_writer_schema(self, writer_schema): - self._writer_schema = writer_schema - - writer_schema = property(lambda self: self._writer_schema, set_writer_schema) - - @staticmethod - def write_varint(bio: BytesIO, value): - - if value == 0: - bio.write(ZERO_BYTE) - return 1 - - written_bytes = 0 - while value > 0: - to_write = value & 0x7f - value = value >> 7 - - if value > 0: - to_write |= 0x80 - - bio.write(bytearray(to_write)[0]) - written_bytes += 1 - - return written_bytes - - def write_indexes(self, bio: BytesIO, value): - self.write_varint(bio, value) - def write_index(self, writer: BytesIO): - self.write_indexes(writer, self._message_index) + write_indexes(writer, [self._message_index]) def write(self, datum: dict, writer: BytesIO): # validate datum - proto_name = calculate_class_name(str(self._writer_schema)) - with open(f"{proto_name}.proto", "w") as proto_text: - proto_text.write(str(self._writer_schema)) - proto_text.close() - - os.system(f"protoc --python_out=./ {proto_name}.proto") - name = self._message_name - spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") - tmp_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(tmp_module) - class_to_call = getattr(tmp_module, name) - class_instance = class_to_call() + class_instance = get_protobuf_class_instance(self._writer_schema, self._message_name) try: dict_to_protobuf(class_instance, datum) @@ -223,7 +226,3 @@ def write(self, datum: dict, writer: BytesIO): raise ProtobufTypeException(self._writer_schema, datum) writer.write(class_instance.SerializeToString()) - - -if __name__ == '__main__': - raise Exception('Not a standalone module') diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 489af4f7d..b0822963b 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -5,7 +5,7 @@ from enum import Enum from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.enum_element import EnumElement -from karapace.protobuf.exception import error, IllegalArgumentException +from karapace.protobuf.exception import IllegalArgumentException, SchemaParseException from karapace.protobuf.extend_element import ExtendElement from karapace.protobuf.extensions_element import ExtensionsElement from karapace.protobuf.field import Field @@ -96,13 +96,17 @@ def read_proto_file(self) -> ProtoFileElement: # TODO: add check for exception? duplicate = next((x for x in iter(self.nested_types) if x.name == declaration.name), None) if duplicate: - error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") + raise SchemaParseException( + f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}" + ) self.nested_types.append(declaration) elif isinstance(declaration, ServiceElement): duplicate = next((x for x in iter(self.services) if x.name == declaration.name), None) if duplicate: - error(f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}") + raise SchemaParseException( + f"{declaration.name} ({declaration.location}) is already defined at {duplicate.location}" + ) self.services.append(declaration) elif isinstance(declaration, OptionElement): diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index a38c518e6..08b6f2a58 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -19,11 +19,10 @@ Timestamp_type_name = 'Timestamp' -# pylint: disable=E1101 +# pylint: disable=no-member def datetime_to_timestamp(dt): - ts = Timestamp() ts.FromDatetime(dt) @@ -35,6 +34,8 @@ def timestamp_to_datetime(ts): return dt +# pylint: enable=no-member + EXTENSION_CONTAINER = '___X' TYPE_CALLABLE_MAP = frozendict({ @@ -133,7 +134,6 @@ def _get_field_value_adaptor( including_default_value_fields=False, lowercase_enum_lables=False ): - if field.message_type and field.message_type.name == Timestamp_type_name: return timestamp_to_datetime if field.type == FieldDescriptor.TYPE_MESSAGE: From 5b3794aba0a4b720d71786c4b92c22b950f6c9c0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 14:49:45 +0200 Subject: [PATCH 108/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 8f1d7ce9a..12ae0608f 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -104,7 +104,7 @@ def protobuf_to_dict(pb, use_enum_labels=True, including_default_value_fields=Tr if including_default_value_fields: for field in pb.DESCRIPTOR.fields: # Singular message fields and oneof fields will not be affected. - if (field.label != FieldDescriptor.LABEL_REPEATED and field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE): + if field.label != FieldDescriptor.LABEL_REPEATED and field.cpp_type == FieldDescriptor.CPPTYPE_MESSAGE: continue if field.containing_oneof: continue From 6bfa9c42b61236dadd950a85903af444532773c6 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 14:58:34 +0200 Subject: [PATCH 109/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 12ae0608f..66ea2420b 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -51,7 +51,7 @@ def timestamp_to_datetime(ts): FieldDescriptor.TYPE_SFIXED32: int, FieldDescriptor.TYPE_SFIXED64: int, FieldDescriptor.TYPE_BOOL: bool, - FieldDescriptor.TYPE_STRING: six.text_type, + FieldDescriptor.TYPE_STRING: str, FieldDescriptor.TYPE_BYTES: six.binary_type, FieldDescriptor.TYPE_ENUM: int, }) From 0cb7ec58e9d6c3ec1d471ef422c78c5d446bae9f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 14:59:58 +0200 Subject: [PATCH 110/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 66ea2420b..39830c1dc 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -52,7 +52,7 @@ def timestamp_to_datetime(ts): FieldDescriptor.TYPE_SFIXED64: int, FieldDescriptor.TYPE_BOOL: bool, FieldDescriptor.TYPE_STRING: str, - FieldDescriptor.TYPE_BYTES: six.binary_type, + FieldDescriptor.TYPE_BYTES: bytes, FieldDescriptor.TYPE_ENUM: int, }) From 684fb90fdcf089c554059347fd8dc41a3bbca9a6 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:00:14 +0200 Subject: [PATCH 111/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 39830c1dc..491c50a99 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -203,7 +203,7 @@ def _get_field_mapping(pb, dict_value, strict): ext_num = int(ext_num) except ValueError: raise ValueError("Extension keys must be integers.") - # pylint: disable=W0212 + # pylint: disable=protected-access if ext_num not in pb._extensions_by_number: if strict: raise KeyError("%s does not have a extension with number %s. Perhaps you forgot to import it?" % (pb, key)) From 0fc9153841b9339c28f9ba1aafc9eacad57a9643 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:00:20 +0200 Subject: [PATCH 112/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 491c50a99..b1b639b1a 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -208,7 +208,7 @@ def _get_field_mapping(pb, dict_value, strict): if strict: raise KeyError("%s does not have a extension with number %s. Perhaps you forgot to import it?" % (pb, key)) continue - # pylint: disable=W0212 + # pylint: disable=protected-access ext_field = pb._extensions_by_number[ext_num] # noinspection PyUnusedLocal From e1ae30c914b9199730df73b47a05abadff58f4f2 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:00:27 +0200 Subject: [PATCH 113/168] Update tests/unit/conftest.py Co-authored-by: Augusto Hack --- tests/unit/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 32a6934d1..ff800660e 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -21,7 +21,7 @@ async def post_new_schema(self, *args, **kwargs): class MockProtobufClient: - # pylint: disable=W0613 + # pylint: disable=unused-argument def __init__(self, *args, **kwargs): pass From b77a9911fb460d49ec5a530a0af46a7646f6f95f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:01:56 +0200 Subject: [PATCH 114/168] fixup --- karapace/protobuf/protobuf_to_dict.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 1486a1c42..08107a9da 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -13,7 +13,6 @@ from google.protobuf.timestamp_pb2 import Timestamp import datetime -import six __all__ = ["protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf", "REVERSE_TYPE_CALLABLE_MAP"] @@ -52,8 +51,8 @@ def timestamp_to_datetime(ts): FieldDescriptor.TYPE_SFIXED32: int, FieldDescriptor.TYPE_SFIXED64: int, FieldDescriptor.TYPE_BOOL: bool, - FieldDescriptor.TYPE_STRING: six.text_type, - FieldDescriptor.TYPE_BYTES: six.binary_type, + FieldDescriptor.TYPE_STRING: str, + FieldDescriptor.TYPE_BYTES: bytes, FieldDescriptor.TYPE_ENUM: int, }) @@ -252,7 +251,7 @@ def _dict_to_protobuf(pb, value_, type_callable_map, strict, ignore_none, use_da if field.type == FieldDescriptor.TYPE_MESSAGE: m = pb_value.add() _dict_to_protobuf(m, item, type_callable_map, strict, ignore_none, use_date_parser_for_fields) - elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, six.string_types): + elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, str): pb_value.append(_string_to_enum(field, item, strict)) else: pb_value.append(item) @@ -282,7 +281,7 @@ def _dict_to_protobuf(pb, value_, type_callable_map, strict, ignore_none, use_da pb.Extensions[field] = input_value continue - if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, six.string_types): + if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, str): input_value = _string_to_enum(field, input_value, strict) try: From bc719a60323e9bf8f32b28d48939207ee21817b9 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:06:16 +0200 Subject: [PATCH 115/168] fixup test --- tests/unit/test_any_tool.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index bad37ef40..2dbc39878 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -1,3 +1,4 @@ +from karapace import config from karapace.protobuf.io import calculate_class_name from karapace.protobuf.kotlin_wrapper import trim_margin from subprocess import PIPE, Popen, TimeoutExpired @@ -24,10 +25,14 @@ def test_protoc(): """ proto = trim_margin(proto) + directory = config.DEFAULTS["protobuf_runtime_directory"] proto_name = calculate_class_name(str(proto)) + proto_path = f"{directory}/{proto_name}.proto" + class_path = f"{directory}/{proto_name}_pb2.py" + log.info(proto_name) try: - with open(f"{proto_name}.proto", "w") as proto_text: + with open(proto_path, "w") as proto_text: proto_text.write(str(proto)) proto_text.close() @@ -35,7 +40,7 @@ def test_protoc(): log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) assert False, f"Cannot write Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" - args = ["protoc", "--python_out=./", f"{proto_name}.proto"] + args = ["protoc", f"--python_out={directory}", f"{proto_name}.proto"] try: proc = Popen(args, stdout=PIPE, stderr=PIPE, shell=False) except FileNotFoundError as e: @@ -60,6 +65,6 @@ def test_protoc(): log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) assert False, f"Cannot read Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" - spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", f"./{proto_name}_pb2.py") + spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", class_path) tmp_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(tmp_module) From 05f43c9cfd5fc06fdbb64c9bee8525bded7319ef Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:09:31 +0200 Subject: [PATCH 116/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index b1b639b1a..78c44ee43 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -154,7 +154,7 @@ def _get_field_value_adaptor( raise TypeError("Field %s.%s has unrecognised type id %d" % (pb.__class__.__name__, field.name, field.type)) -REVERSE_TYPE_CALLABLE_MAP = frozendict({}) +REVERSE_TYPE_CALLABLE_MAP = MappingProxyType({}) def dict_to_protobuf( From 0b64ebc6aea9cf22c61127db6a4cdfee29d9897e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:10:01 +0200 Subject: [PATCH 117/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 78c44ee43..7219cd3c6 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -252,7 +252,7 @@ def _dict_to_protobuf(pb, value_, type_callable_map, strict, ignore_none, use_da if field.type == FieldDescriptor.TYPE_MESSAGE: m = pb_value.add() _dict_to_protobuf(m, item, type_callable_map, strict, ignore_none, use_date_parser_for_fields) - elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, six.string_types): + elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(item, str): pb_value.append(_string_to_enum(field, item, strict)) else: pb_value.append(item) From 3d8ed006e383ca7b5146f0223437a45b5856f643 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 15:10:38 +0200 Subject: [PATCH 118/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 7219cd3c6..eb8d57d47 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -282,7 +282,7 @@ def _dict_to_protobuf(pb, value_, type_callable_map, strict, ignore_none, use_da pb.Extensions[field] = input_value continue - if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, six.string_types): + if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value, str): input_value = _string_to_enum(field, input_value, strict) try: From 27ce00648adcdd7c81da3b33071374d143863d8e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 16:56:43 +0200 Subject: [PATCH 119/168] Update karapace/protobuf/protobuf_to_dict.py Co-authored-by: Augusto Hack --- karapace/protobuf/protobuf_to_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index eb8d57d47..f2faf18ec 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -37,7 +37,7 @@ def timestamp_to_datetime(ts): EXTENSION_CONTAINER = '___X' -TYPE_CALLABLE_MAP = frozendict({ +TYPE_CALLABLE_MAP = MappingProxyType({ FieldDescriptor.TYPE_DOUBLE: float, FieldDescriptor.TYPE_FLOAT: float, FieldDescriptor.TYPE_INT32: int, From 675d140979fae223eeb68d4cefc14060f61fcf83 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 17:19:32 +0200 Subject: [PATCH 120/168] fixup --- karapace/protobuf/io.py | 4 ++-- karapace/protobuf/protobuf_to_dict.py | 7 +++---- requirements.txt | 3 --- tests/unit/test_any_tool.py | 4 ++-- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index be7e05e4b..4974c38e1 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -123,8 +123,8 @@ def get_protobuf_class_instance(schema: ProtobufSchema, class_name: str) -> Any: if not os.path.isfile(class_path): complete = subprocess.run([ "protoc", - f"--python_out={directory}", - f"{proto_name}.proto", + "--python_out=./", + proto_path, ], check=True) if complete.returncode != 0: raise OSError(f"A protoc error code: {complete.returncode}") diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index 908eeb7cc..c3bdb12ee 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -5,12 +5,11 @@ https://github.com/wearefair/protobuf-to-dict LICENSE: https://github.com/wearefair/protobuf-to-dict/blob/master/LICENSE """ - from dateutil.parser import parse as date_parser -from frozendict import frozendict from google.protobuf.descriptor import FieldDescriptor from google.protobuf.message import Message from google.protobuf.timestamp_pb2 import Timestamp +from types import MappingProxyType import datetime @@ -37,7 +36,7 @@ def timestamp_to_datetime(ts): EXTENSION_CONTAINER = '___X' -TYPE_CALLABLE_MAP = frozendict({ +TYPE_CALLABLE_MAP = MappingProxyType({ FieldDescriptor.TYPE_DOUBLE: float, FieldDescriptor.TYPE_FLOAT: float, FieldDescriptor.TYPE_INT32: int, @@ -153,7 +152,7 @@ def _get_field_value_adaptor( raise TypeError("Field %s.%s has unrecognised type id %d" % (pb.__class__.__name__, field.name, field.type)) -REVERSE_TYPE_CALLABLE_MAP = frozendict({}) +REVERSE_TYPE_CALLABLE_MAP = MappingProxyType({}) def dict_to_protobuf( diff --git a/requirements.txt b/requirements.txt index 65b3a9713..7fe4e9f28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,12 +6,9 @@ jsonschema==3.2.0 lz4==3.0.2 requests==2.23.0 networkx==2.5 -six~=1.15.0 python-dateutil==2.8.2 -frozendict~=2.1.0 filelock~=3.0.12 protobuf~=3.14.0 -protobuf3==0.2.1 # Patched dependencies # diff --git a/tests/unit/test_any_tool.py b/tests/unit/test_any_tool.py index 2dbc39878..04a7a60da 100644 --- a/tests/unit/test_any_tool.py +++ b/tests/unit/test_any_tool.py @@ -40,7 +40,7 @@ def test_protoc(): log.error("Unexpected exception in statsd send: %s: %s", e.__class__.__name__, e) assert False, f"Cannot write Proto File. Unexpected exception in statsd send: {e.__class__.__name__} + {e}" - args = ["protoc", f"--python_out={directory}", f"{proto_name}.proto"] + args = ["protoc", "--python_out=./", proto_path] try: proc = Popen(args, stdout=PIPE, stderr=PIPE, shell=False) except FileNotFoundError as e: @@ -57,7 +57,7 @@ def test_protoc(): assert False, "Timeout expired" module_content = "" try: - with open(f"./{proto_name}_pb2.py", "r") as proto_text: + with open(class_path, "r") as proto_text: module_content = proto_text.read() proto_text.close() print(module_content) From 26e81961e9a985b91f15d79f66b8a8fb25a9daf1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 17:30:24 +0200 Subject: [PATCH 121/168] fixup --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index a49e5f6b6..f505a7a0f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] addopts = -ra --tb=short --showlocals --numprocesses auto -timeout = 600 +timeout = 60 From 53908baab787ceb794d05315843e4628ade43da5 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sat, 11 Dec 2021 17:35:54 +0200 Subject: [PATCH 122/168] fixup gitignore --- .gitignore | 1 - runtime/.gitignore | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 runtime/.gitignore diff --git a/.gitignore b/.gitignore index f4300531f..4b8a06a7a 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,3 @@ __pycache__/ /kafka_*/ venv /karapace/version.py -/runtime/* diff --git a/runtime/.gitignore b/runtime/.gitignore new file mode 100644 index 000000000..5e7d2734c --- /dev/null +++ b/runtime/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore From 25d9112e0f79049d39e60001926ccf0405f2b952 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 17 Dec 2021 17:13:54 +0200 Subject: [PATCH 123/168] fixup by PR --- karapace/protobuf/exception.py | 8 ++++-- karapace/protobuf/extensions_element.py | 2 +- karapace/protobuf/io.py | 36 +++++-------------------- karapace/protobuf/option_element.py | 6 ++--- 4 files changed, 17 insertions(+), 35 deletions(-) diff --git a/karapace/protobuf/exception.py b/karapace/protobuf/exception.py index 6ff91595f..43baa95fe 100644 --- a/karapace/protobuf/exception.py +++ b/karapace/protobuf/exception.py @@ -33,10 +33,14 @@ class SchemaParseException(ProtobufException): """Error while parsing a Protobuf schema descriptor.""" +def pretty_print_json(obj: str) -> str: + return json.dumps(json.loads(obj), indent=2) + + class ProtobufSchemaResolutionException(ProtobufException): def __init__(self, fail_msg: str, writer_schema=None, reader_schema=None) -> None: - writer_dump = json.dumps(json.loads(str(writer_schema)), indent=2) - reader_dump = json.dumps(json.loads(str(reader_schema)), indent=2) + writer_dump = pretty_print_json(str(writer_schema)) + reader_dump = pretty_print_json(str(reader_schema)) if writer_schema: fail_msg += "\nWriter's Schema: %s" % writer_dump if reader_schema: diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 5ebb28a80..18e5bd116 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -14,7 +14,7 @@ def __init__(self, location: Location, documentation: str = "", values: list = N self.values = values or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append("extensions ") diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 4974c38e1..f8116f3d3 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -1,7 +1,3 @@ -#!/usr/bin/env python3 -# -*- mode: python -*- -# -*- coding: utf-8 -*- - from io import BytesIO from karapace import config from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, ProtobufTypeException @@ -9,28 +5,13 @@ from karapace.protobuf.protobuf_to_dict import dict_to_protobuf, protobuf_to_dict from karapace.protobuf.schema import ProtobufSchema from karapace.protobuf.type_element import TypeElement -from typing import Any, List +from typing import Any, Dict, List import hashlib import importlib import importlib.util import logging import os -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. import subprocess ZERO_BYTE = b'\x00' @@ -110,24 +91,21 @@ def find_message_name(schema: ProtobufSchema, indexes: List[int]) -> str: return ".".join(result) -def get_protobuf_class_instance(schema: ProtobufSchema, class_name: str) -> Any: - directory = config.DEFAULTS["protobuf_runtime_directory"] +def get_protobuf_class_instance(schema: ProtobufSchema, class_name: str, cfg: Dict) -> Any: + directory = cfg["protobuf_runtime_directory"] proto_name = calculate_class_name(str(schema)) proto_path = f"{directory}/{proto_name}.proto" class_path = f"{directory}/{proto_name}_pb2.py" if not os.path.isfile(proto_path): with open(f"{directory}/{proto_name}.proto", "w") as proto_text: proto_text.write(str(schema)) - proto_text.close() if not os.path.isfile(class_path): - complete = subprocess.run([ + subprocess.run([ "protoc", "--python_out=./", proto_path, ], check=True) - if complete.returncode != 0: - raise OSError(f"A protoc error code: {complete.returncode}") spec = importlib.util.spec_from_file_location(f"{proto_name}_pb2", class_path) tmp_module = importlib.util.module_from_spec(spec) @@ -144,7 +122,8 @@ def read_data(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema, bio: indexes = read_indexes(bio) name = find_message_name(writer_schema, indexes) - class_instance = get_protobuf_class_instance(writer_schema, name) + + class_instance = get_protobuf_class_instance(writer_schema, name, config.DEFAULTS) class_instance.ParseFromString(bio.read()) return class_instance @@ -168,7 +147,6 @@ def read(self, bio: BytesIO): def write_varint(bio: BytesIO, value: int) -> int: - if value < 0: raise ValueError(f"value must not be negative, got {value}") @@ -218,7 +196,7 @@ def write_index(self, writer: BytesIO): def write(self, datum: dict, writer: BytesIO): # validate datum - class_instance = get_protobuf_class_instance(self._writer_schema, self._message_name) + class_instance = get_protobuf_class_instance(self._writer_schema, self._message_name, config.DEFAULTS) try: dict_to_protobuf(class_instance, datum) diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index 2460a2db8..bf9f6d346 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -51,14 +51,14 @@ def to_schema_declaration(self) -> str: @staticmethod def append_options(options: list) -> str: - data: list = [] + data = [] append_options(data, options) return "".join(data) def format_option_map(self, value: dict) -> str: keys = list(value.keys()) last_index = len(keys) - 1 - result: list = [] + result = [] for index, key in enumerate(keys): endl = "," if (index != last_index) else "" append_indented(result, f"{key}: {self.format_option_map_value(value[key])}{endl}") @@ -82,7 +82,7 @@ def format_option_map_value(self, value) -> str: def format_list_map_value(self, value) -> str: last_index = len(value) - 1 - result: list = [] + result = [] for index, elm in enumerate(value): endl = "," if (index != last_index) else "" append_indented(result, f"{self.format_option_map_value(elm)}{endl}") From 78fb9ee1be182c24e70d31f4ad42d6b1d550c089 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 19 Dec 2021 22:06:15 +0200 Subject: [PATCH 124/168] last commit of this PR --- .../test_rest_consumer_protobuf.py | 349 ++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100644 tests/integration/test_rest_consumer_protobuf.py diff --git a/tests/integration/test_rest_consumer_protobuf.py b/tests/integration/test_rest_consumer_protobuf.py new file mode 100644 index 000000000..533a4b824 --- /dev/null +++ b/tests/integration/test_rest_consumer_protobuf.py @@ -0,0 +1,349 @@ +from tests.utils import ( + consumer_valid_payload, new_consumer, new_random_name, new_topic, repeat_until_successful_request, REST_HEADERS, + schema_data +) + +import base64 +import copy +import json +import pytest +import random + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_create_and_delete(rest_async_client, trail): + header = REST_HEADERS["json"] + group_name = "test_group" + resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=consumer_valid_payload, headers=header) + assert resp.ok + body = resp.json() + assert "base_uri" in body + instance_id = body["instance_id"] + # add with the same name fails + with_name = copy.copy(consumer_valid_payload) + with_name["name"] = instance_id + resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=with_name, headers=header) + assert not resp.ok + assert resp.status == 409, f"Expected conflict for instance {instance_id} and group {group_name} " \ + f"but got a different error: {resp.body}" + invalid_fetch = copy.copy(consumer_valid_payload) + # add with faulty params fails + invalid_fetch["fetch.min.bytes"] = -10 + resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=invalid_fetch, headers=header) + assert not resp.ok + assert resp.status == 422, f"Expected invalid fetch request value config for: {resp.body}" + # delete followed by add succeeds + resp = await rest_async_client.delete(f"/consumers/{group_name}/instances/{instance_id}{trail}", headers=header) + assert resp.ok, "Could not delete " + resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=with_name, headers=header) + assert resp.ok + # delete unknown entity fails + resp = await rest_async_client.delete(f"/consumers/{group_name}/instances/random_name{trail}") + assert resp.status == 404 + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_assignment(rest_async_client, admin_client, trail): + header = REST_HEADERS["json"] + instance_id = await new_consumer(rest_async_client, "assignment_group", fmt="json", trail=trail) + assign_path = f"/consumers/assignment_group/instances/{instance_id}/assignments{trail}" + res = await rest_async_client.get(assign_path, headers=header) + assert res.ok, f"Expected status 200 but got {res.status}" + assert "partitions" in res.json() and len(res.json()["partitions"]) == 0, "Assignment list should be empty" + # assign one topic + topic_name = new_topic(admin_client) + assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} + res = await rest_async_client.post(assign_path, headers=header, json=assign_payload) + assert res.ok + assign_path = f"/consumers/assignment_group/instances/{instance_id}/assignments{trail}" + res = await rest_async_client.get(assign_path, headers=header) + assert res.ok, f"Expected status 200 but got {res.status}" + data = res.json() + assert "partitions" in data and len(data["partitions"]) == 1, "Should have one assignment" + p = data["partitions"][0] + assert p["topic"] == topic_name + assert p["partition"] == 0 + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_subscription(rest_async_client, admin_client, producer, trail): + # The random name is necessary to avoid test errors, without it the second + # parametrize test will fail. Issue: #178 + group_name = new_random_name("group") + + header = REST_HEADERS["binary"] + topic_name = new_topic(admin_client) + instance_id = await new_consumer(rest_async_client, group_name, fmt="binary", trail=trail) + sub_path = f"/consumers/{group_name}/instances/{instance_id}/subscription{trail}" + consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" + res = await rest_async_client.get(sub_path, headers=header) + assert res.ok + data = res.json() + assert "topics" in data and len(data["topics"]) == 0, \ + f"Expecting no subscription on freshly created consumer: {data}" + # simple sub + res = await rest_async_client.post(sub_path, json={"topics": [topic_name]}, headers=header) + assert res.ok + res = await rest_async_client.get(sub_path, headers=header) + assert res.ok + data = res.json() + assert "topics" in data and len(data["topics"]) == 1 and data["topics"][0] == topic_name, \ + f"expecting {topic_name} in {data}" + for _ in range(3): + producer.send(topic_name, b"foo").get() + resp = await rest_async_client.get(consume_path, headers=header) + data = resp.json() + assert resp.ok, f"Expected a successful response: {data['message']}" + assert len(data) == 3, f"Expected to consume 3 messages but got {data}" + + # on delete it's empty again + res = await rest_async_client.delete(sub_path, headers=header) + assert res.ok + res = await rest_async_client.get(sub_path, headers=header) + assert res.ok + data = res.json() + assert "topics" in data and len(data["topics"]) == 0, f"expecting {data} to be empty" + # one pattern sub will get all 3 + prefix = f"{hash(random.random())}" + pattern_topics = [new_topic(admin_client, prefix=f"{prefix}{i}") for i in range(3)] + res = await rest_async_client.post(sub_path, json={"topic_pattern": f"{prefix}.*"}, headers=REST_HEADERS["json"]) + assert res.ok + + # Consume so confluent rest reevaluates the subscription + resp = await rest_async_client.get(consume_path, headers=header) + assert resp.ok + # Should we keep this behaviour + + res = await rest_async_client.get(sub_path, headers=header) + assert res.ok + data = res.json() + assert "topics" in data and len(data["topics"]) == 3, "expecting subscription to 3 topics by pattern" + subscribed_to = set(data["topics"]) + expected = set(pattern_topics) + assert expected == subscribed_to, f"Expecting {expected} as subscribed to topics, but got {subscribed_to} instead" + # writing to all 3 will get us results from all 3 + for t in pattern_topics: + for _ in range(3): + producer.send(t, b"bar").get() + resp = await rest_async_client.get(consume_path, headers=header) + data = resp.json() + assert resp.ok, f"Expected a successful response: {data['message']}" + assert len(data) == 9, f"Expected to consume 3 messages but got {data}" + + # topic name sub along with pattern will fail + res = await rest_async_client.post( + sub_path, json={ + "topics": [topic_name], + "topic_pattern": "baz" + }, headers=REST_HEADERS["json"] + ) + assert res.status == 409, f"Invalid state error expected: {res.status}" + data = res.json() + assert data["error_code"] == 40903, f"Invalid state error expected: {data}" + # assign after subscribe will fail + assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" + assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} + res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) + assert res.status == 409, "Expecting status code 409 on assign after subscribe on the same consumer instance" + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_seek(rest_async_client, admin_client, trail): + group = "seek_group" + instance_id = await new_consumer(rest_async_client, group, trail=trail) + seek_path = f"/consumers/{group}/instances/{instance_id}/positions{trail}" + # one partition assigned, we can + topic_name = new_topic(admin_client) + assign_path = f"/consumers/{group}/instances/{instance_id}/assignments{trail}" + assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} + res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) + assert res.ok + seek_payload = {"offsets": [{"topic": topic_name, "partition": 0, "offset": 10}]} + res = await rest_async_client.post(seek_path, json=seek_payload, headers=REST_HEADERS["json"]) + assert res.ok, f"Unexpected status for {res}" + extreme_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} + for pos in ["beginning", "end"]: + url = f"{seek_path}/{pos}" + res = await rest_async_client.post(url, json=extreme_payload, headers=REST_HEADERS["json"]) + assert res.ok, f"Expecting a successful response: {res}" + # unassigned seeks should fail + invalid_payload = {"offsets": [{"topic": "faulty", "partition": 0, "offset": 10}]} + res = await rest_async_client.post(seek_path, json=invalid_payload, headers=REST_HEADERS["json"]) + assert res.status == 409, f"Expecting a failure for unassigned partition seek: {res}" + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_offsets(rest_async_client, admin_client, trail): + group_name = "offset_group" + fmt = "binary" + header = REST_HEADERS[fmt] + instance_id = await new_consumer(rest_async_client, group_name, fmt=fmt, trail=trail) + topic_name = new_topic(admin_client) + offsets_path = f"/consumers/{group_name}/instances/{instance_id}/offsets{trail}" + assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" + res = await rest_async_client.post( + assign_path, json={"partitions": [{ + "topic": topic_name, + "partition": 0 + }]}, headers=header + ) + assert res.ok, f"Unexpected response status for assignment {res}" + + await repeat_until_successful_request( + rest_async_client.post, + offsets_path, + json_data={"offsets": [{ + "topic": topic_name, + "partition": 0, + "offset": 0, + }]}, + headers=header, + error_msg="Unexpected response status for offset commit", + timeout=20, + sleep=1, + ) + + res = await rest_async_client.get( + offsets_path, headers=header, json={"partitions": [{ + "topic": topic_name, + "partition": 0 + }]} + ) + assert res.ok, f"Unexpected response status for {res}" + data = res.json() + assert "offsets" in data and len(data["offsets"]) == 1, f"Unexpected offsets response {res}" + data = data["offsets"][0] + assert "topic" in data and data["topic"] == topic_name, f"Unexpected topic {data}" + assert "offset" in data and data["offset"] == 1, f"Unexpected offset {data}" + assert "partition" in data and data["partition"] == 0, f"Unexpected partition {data}" + res = await rest_async_client.post( + offsets_path, json={"offsets": [{ + "topic": topic_name, + "partition": 0, + "offset": 1 + }]}, headers=header + ) + assert res.ok, f"Unexpected response status for offset commit {res}" + + res = await rest_async_client.get( + offsets_path, headers=header, json={"partitions": [{ + "topic": topic_name, + "partition": 0 + }]} + ) + assert res.ok, f"Unexpected response status for {res}" + data = res.json() + assert "offsets" in data and len(data["offsets"]) == 1, f"Unexpected offsets response {res}" + data = data["offsets"][0] + assert "topic" in data and data["topic"] == topic_name, f"Unexpected topic {data}" + assert "offset" in data and data["offset"] == 2, f"Unexpected offset {data}" + assert "partition" in data and data["partition"] == 0, f"Unexpected partition {data}" + + +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_consume(rest_async_client, admin_client, producer, trail): + # avro to be handled in a separate testcase ?? + values = { + "json": [json.dumps({ + "foo": f"bar{i}" + }).encode("utf-8") for i in range(3)], + "binary": [f"val{i}".encode('utf-8') for i in range(3)] + } + deserializers = {"binary": base64.b64decode, "json": lambda x: json.dumps(x).encode("utf-8")} + group_name = "consume_group" + for fmt in ["binary", "json"]: + header = copy.deepcopy(REST_HEADERS[fmt]) + instance_id = await new_consumer(rest_async_client, group_name, fmt=fmt, trail=trail) + assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" + seek_path = f"/consumers/{group_name}/instances/{instance_id}/positions/beginning{trail}" + consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" + topic_name = new_topic(admin_client) + assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} + res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) + assert res.ok + for i in range(len(values[fmt])): + producer.send(topic_name, value=values[fmt][i]).get() + seek_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} + resp = await rest_async_client.post(seek_path, headers=header, json=seek_payload) + assert resp.ok + header["Accept"] = f"application/vnd.kafka.{fmt}.v2+json" + resp = await rest_async_client.get(consume_path, headers=header) + assert resp.ok, f"Expected a successful response: {resp}" + data = resp.json() + assert len(data) == len(values[fmt]), f"Expected {len(values[fmt])} element in response: {resp}" + for i in range(len(values[fmt])): + assert deserializers[fmt](data[i]["value"]) == values[fmt][i], \ + f"Extracted data {deserializers[fmt](data[i]['value'])}" \ + f" does not match {values[fmt][i]} for format {fmt}" + + +@pytest.mark.parametrize("schema_type", ["avro"]) +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_publish_consume_avro(rest_async_client, admin_client, trail, schema_type): + header = REST_HEADERS[schema_type] + group_name = "e2e_group" + instance_id = await new_consumer(rest_async_client, group_name, fmt=schema_type, trail=trail) + assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" + consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" + tn = new_topic(admin_client) + assign_payload = {"partitions": [{"topic": tn, "partition": 0}]} + res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) + assert res.ok + publish_payload = schema_data[schema_type][1] + await repeat_until_successful_request( + rest_async_client.post, + f"topics/{tn}{trail}", + json_data={ + "value_schema": schema_data[schema_type][0], + "records": [{ + "value": o + } for o in publish_payload] + }, + headers=header, + error_msg="Unexpected response status for offset commit", + timeout=10, + sleep=1, + ) + resp = await rest_async_client.get(consume_path, headers=header) + assert resp.ok, f"Expected a successful response: {resp}" + data = resp.json() + assert len(data) == len(publish_payload), f"Expected to read test_objects from fetch request but got {data}" + data_values = [x["value"] for x in data] + for expected, actual in zip(publish_payload, data_values): + assert expected == actual, f"Expecting {actual} to be {expected}" + + +@pytest.mark.parametrize("schema_type", ["protobuf"]) +@pytest.mark.parametrize("trail", ["", "/"]) +async def test_publish_consume_protobuf(rest_async_client, admin_client, trail, schema_type): + header = REST_HEADERS[schema_type] + group_name = "e2e_protobuf_group" + instance_id = await new_consumer(rest_async_client, group_name, fmt=schema_type, trail=trail) + assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" + consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" + tn = new_topic(admin_client) + assign_payload = {"partitions": [{"topic": tn, "partition": 0}]} + res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) + assert res.ok + publish_payload = schema_data[schema_type][1] + await repeat_until_successful_request( + rest_async_client.post, + f"topics/{tn}{trail}", + json_data={ + "value_schema": schema_data[schema_type][0], + "records": [{ + "value": o + } for o in publish_payload] + }, + headers=header, + error_msg="Unexpected response status for offset commit", + timeout=10, + sleep=1, + ) + resp = await rest_async_client.get(consume_path, headers=header) + assert resp.ok, f"Expected a successful response: {resp}" + data = resp.json() + assert len(data) == len(publish_payload), f"Expected to read test_objects from fetch request but got {data}" + data_values = [x["value"] for x in data] + for expected, actual in zip(publish_payload, data_values): + assert expected == actual, f"Expecting {actual} to be {expected}" From acd633b8affe5eb16286520e0d1718a509a9f8b9 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 19 Dec 2021 22:06:25 +0200 Subject: [PATCH 125/168] last commit of this PR --- tests/integration/test_rest_consumer.py | 36 --- .../test_rest_consumer_protobuf.py | 288 +----------------- tests/utils.py | 173 +++++++++++ 3 files changed, 181 insertions(+), 316 deletions(-) diff --git a/tests/integration/test_rest_consumer.py b/tests/integration/test_rest_consumer.py index 533a4b824..b564ac754 100644 --- a/tests/integration/test_rest_consumer.py +++ b/tests/integration/test_rest_consumer.py @@ -311,39 +311,3 @@ async def test_publish_consume_avro(rest_async_client, admin_client, trail, sche data_values = [x["value"] for x in data] for expected, actual in zip(publish_payload, data_values): assert expected == actual, f"Expecting {actual} to be {expected}" - - -@pytest.mark.parametrize("schema_type", ["protobuf"]) -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_publish_consume_protobuf(rest_async_client, admin_client, trail, schema_type): - header = REST_HEADERS[schema_type] - group_name = "e2e_protobuf_group" - instance_id = await new_consumer(rest_async_client, group_name, fmt=schema_type, trail=trail) - assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" - consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" - tn = new_topic(admin_client) - assign_payload = {"partitions": [{"topic": tn, "partition": 0}]} - res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) - assert res.ok - publish_payload = schema_data[schema_type][1] - await repeat_until_successful_request( - rest_async_client.post, - f"topics/{tn}{trail}", - json_data={ - "value_schema": schema_data[schema_type][0], - "records": [{ - "value": o - } for o in publish_payload] - }, - headers=header, - error_msg="Unexpected response status for offset commit", - timeout=10, - sleep=1, - ) - resp = await rest_async_client.get(consume_path, headers=header) - assert resp.ok, f"Expected a successful response: {resp}" - data = resp.json() - assert len(data) == len(publish_payload), f"Expected to read test_objects from fetch request but got {data}" - data_values = [x["value"] for x in data] - for expected, actual in zip(publish_payload, data_values): - assert expected == actual, f"Expecting {actual} to be {expected}" diff --git a/tests/integration/test_rest_consumer_protobuf.py b/tests/integration/test_rest_consumer_protobuf.py index 533a4b824..94695e952 100644 --- a/tests/integration/test_rest_consumer_protobuf.py +++ b/tests/integration/test_rest_consumer_protobuf.py @@ -1,287 +1,15 @@ from tests.utils import ( - consumer_valid_payload, new_consumer, new_random_name, new_topic, repeat_until_successful_request, REST_HEADERS, - schema_data + new_consumer, new_topic, repeat_until_successful_request, REST_HEADERS, schema_data, schema_data_second ) -import base64 -import copy -import json import pytest -import random +@pytest.mark.parametrize("schema_type", ["protobuf"]) @pytest.mark.parametrize("trail", ["", "/"]) -async def test_create_and_delete(rest_async_client, trail): - header = REST_HEADERS["json"] - group_name = "test_group" - resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=consumer_valid_payload, headers=header) - assert resp.ok - body = resp.json() - assert "base_uri" in body - instance_id = body["instance_id"] - # add with the same name fails - with_name = copy.copy(consumer_valid_payload) - with_name["name"] = instance_id - resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=with_name, headers=header) - assert not resp.ok - assert resp.status == 409, f"Expected conflict for instance {instance_id} and group {group_name} " \ - f"but got a different error: {resp.body}" - invalid_fetch = copy.copy(consumer_valid_payload) - # add with faulty params fails - invalid_fetch["fetch.min.bytes"] = -10 - resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=invalid_fetch, headers=header) - assert not resp.ok - assert resp.status == 422, f"Expected invalid fetch request value config for: {resp.body}" - # delete followed by add succeeds - resp = await rest_async_client.delete(f"/consumers/{group_name}/instances/{instance_id}{trail}", headers=header) - assert resp.ok, "Could not delete " - resp = await rest_async_client.post(f"/consumers/{group_name}{trail}", json=with_name, headers=header) - assert resp.ok - # delete unknown entity fails - resp = await rest_async_client.delete(f"/consumers/{group_name}/instances/random_name{trail}") - assert resp.status == 404 - - -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_assignment(rest_async_client, admin_client, trail): - header = REST_HEADERS["json"] - instance_id = await new_consumer(rest_async_client, "assignment_group", fmt="json", trail=trail) - assign_path = f"/consumers/assignment_group/instances/{instance_id}/assignments{trail}" - res = await rest_async_client.get(assign_path, headers=header) - assert res.ok, f"Expected status 200 but got {res.status}" - assert "partitions" in res.json() and len(res.json()["partitions"]) == 0, "Assignment list should be empty" - # assign one topic - topic_name = new_topic(admin_client) - assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} - res = await rest_async_client.post(assign_path, headers=header, json=assign_payload) - assert res.ok - assign_path = f"/consumers/assignment_group/instances/{instance_id}/assignments{trail}" - res = await rest_async_client.get(assign_path, headers=header) - assert res.ok, f"Expected status 200 but got {res.status}" - data = res.json() - assert "partitions" in data and len(data["partitions"]) == 1, "Should have one assignment" - p = data["partitions"][0] - assert p["topic"] == topic_name - assert p["partition"] == 0 - - -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_subscription(rest_async_client, admin_client, producer, trail): - # The random name is necessary to avoid test errors, without it the second - # parametrize test will fail. Issue: #178 - group_name = new_random_name("group") - - header = REST_HEADERS["binary"] - topic_name = new_topic(admin_client) - instance_id = await new_consumer(rest_async_client, group_name, fmt="binary", trail=trail) - sub_path = f"/consumers/{group_name}/instances/{instance_id}/subscription{trail}" - consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" - res = await rest_async_client.get(sub_path, headers=header) - assert res.ok - data = res.json() - assert "topics" in data and len(data["topics"]) == 0, \ - f"Expecting no subscription on freshly created consumer: {data}" - # simple sub - res = await rest_async_client.post(sub_path, json={"topics": [topic_name]}, headers=header) - assert res.ok - res = await rest_async_client.get(sub_path, headers=header) - assert res.ok - data = res.json() - assert "topics" in data and len(data["topics"]) == 1 and data["topics"][0] == topic_name, \ - f"expecting {topic_name} in {data}" - for _ in range(3): - producer.send(topic_name, b"foo").get() - resp = await rest_async_client.get(consume_path, headers=header) - data = resp.json() - assert resp.ok, f"Expected a successful response: {data['message']}" - assert len(data) == 3, f"Expected to consume 3 messages but got {data}" - - # on delete it's empty again - res = await rest_async_client.delete(sub_path, headers=header) - assert res.ok - res = await rest_async_client.get(sub_path, headers=header) - assert res.ok - data = res.json() - assert "topics" in data and len(data["topics"]) == 0, f"expecting {data} to be empty" - # one pattern sub will get all 3 - prefix = f"{hash(random.random())}" - pattern_topics = [new_topic(admin_client, prefix=f"{prefix}{i}") for i in range(3)] - res = await rest_async_client.post(sub_path, json={"topic_pattern": f"{prefix}.*"}, headers=REST_HEADERS["json"]) - assert res.ok - - # Consume so confluent rest reevaluates the subscription - resp = await rest_async_client.get(consume_path, headers=header) - assert resp.ok - # Should we keep this behaviour - - res = await rest_async_client.get(sub_path, headers=header) - assert res.ok - data = res.json() - assert "topics" in data and len(data["topics"]) == 3, "expecting subscription to 3 topics by pattern" - subscribed_to = set(data["topics"]) - expected = set(pattern_topics) - assert expected == subscribed_to, f"Expecting {expected} as subscribed to topics, but got {subscribed_to} instead" - # writing to all 3 will get us results from all 3 - for t in pattern_topics: - for _ in range(3): - producer.send(t, b"bar").get() - resp = await rest_async_client.get(consume_path, headers=header) - data = resp.json() - assert resp.ok, f"Expected a successful response: {data['message']}" - assert len(data) == 9, f"Expected to consume 3 messages but got {data}" - - # topic name sub along with pattern will fail - res = await rest_async_client.post( - sub_path, json={ - "topics": [topic_name], - "topic_pattern": "baz" - }, headers=REST_HEADERS["json"] - ) - assert res.status == 409, f"Invalid state error expected: {res.status}" - data = res.json() - assert data["error_code"] == 40903, f"Invalid state error expected: {data}" - # assign after subscribe will fail - assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" - assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} - res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) - assert res.status == 409, "Expecting status code 409 on assign after subscribe on the same consumer instance" - - -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_seek(rest_async_client, admin_client, trail): - group = "seek_group" - instance_id = await new_consumer(rest_async_client, group, trail=trail) - seek_path = f"/consumers/{group}/instances/{instance_id}/positions{trail}" - # one partition assigned, we can - topic_name = new_topic(admin_client) - assign_path = f"/consumers/{group}/instances/{instance_id}/assignments{trail}" - assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} - res = await rest_async_client.post(assign_path, headers=REST_HEADERS["json"], json=assign_payload) - assert res.ok - seek_payload = {"offsets": [{"topic": topic_name, "partition": 0, "offset": 10}]} - res = await rest_async_client.post(seek_path, json=seek_payload, headers=REST_HEADERS["json"]) - assert res.ok, f"Unexpected status for {res}" - extreme_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} - for pos in ["beginning", "end"]: - url = f"{seek_path}/{pos}" - res = await rest_async_client.post(url, json=extreme_payload, headers=REST_HEADERS["json"]) - assert res.ok, f"Expecting a successful response: {res}" - # unassigned seeks should fail - invalid_payload = {"offsets": [{"topic": "faulty", "partition": 0, "offset": 10}]} - res = await rest_async_client.post(seek_path, json=invalid_payload, headers=REST_HEADERS["json"]) - assert res.status == 409, f"Expecting a failure for unassigned partition seek: {res}" - - -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_offsets(rest_async_client, admin_client, trail): - group_name = "offset_group" - fmt = "binary" - header = REST_HEADERS[fmt] - instance_id = await new_consumer(rest_async_client, group_name, fmt=fmt, trail=trail) - topic_name = new_topic(admin_client) - offsets_path = f"/consumers/{group_name}/instances/{instance_id}/offsets{trail}" - assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" - res = await rest_async_client.post( - assign_path, json={"partitions": [{ - "topic": topic_name, - "partition": 0 - }]}, headers=header - ) - assert res.ok, f"Unexpected response status for assignment {res}" - - await repeat_until_successful_request( - rest_async_client.post, - offsets_path, - json_data={"offsets": [{ - "topic": topic_name, - "partition": 0, - "offset": 0, - }]}, - headers=header, - error_msg="Unexpected response status for offset commit", - timeout=20, - sleep=1, - ) - - res = await rest_async_client.get( - offsets_path, headers=header, json={"partitions": [{ - "topic": topic_name, - "partition": 0 - }]} - ) - assert res.ok, f"Unexpected response status for {res}" - data = res.json() - assert "offsets" in data and len(data["offsets"]) == 1, f"Unexpected offsets response {res}" - data = data["offsets"][0] - assert "topic" in data and data["topic"] == topic_name, f"Unexpected topic {data}" - assert "offset" in data and data["offset"] == 1, f"Unexpected offset {data}" - assert "partition" in data and data["partition"] == 0, f"Unexpected partition {data}" - res = await rest_async_client.post( - offsets_path, json={"offsets": [{ - "topic": topic_name, - "partition": 0, - "offset": 1 - }]}, headers=header - ) - assert res.ok, f"Unexpected response status for offset commit {res}" - - res = await rest_async_client.get( - offsets_path, headers=header, json={"partitions": [{ - "topic": topic_name, - "partition": 0 - }]} - ) - assert res.ok, f"Unexpected response status for {res}" - data = res.json() - assert "offsets" in data and len(data["offsets"]) == 1, f"Unexpected offsets response {res}" - data = data["offsets"][0] - assert "topic" in data and data["topic"] == topic_name, f"Unexpected topic {data}" - assert "offset" in data and data["offset"] == 2, f"Unexpected offset {data}" - assert "partition" in data and data["partition"] == 0, f"Unexpected partition {data}" - - -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_consume(rest_async_client, admin_client, producer, trail): - # avro to be handled in a separate testcase ?? - values = { - "json": [json.dumps({ - "foo": f"bar{i}" - }).encode("utf-8") for i in range(3)], - "binary": [f"val{i}".encode('utf-8') for i in range(3)] - } - deserializers = {"binary": base64.b64decode, "json": lambda x: json.dumps(x).encode("utf-8")} - group_name = "consume_group" - for fmt in ["binary", "json"]: - header = copy.deepcopy(REST_HEADERS[fmt]) - instance_id = await new_consumer(rest_async_client, group_name, fmt=fmt, trail=trail) - assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" - seek_path = f"/consumers/{group_name}/instances/{instance_id}/positions/beginning{trail}" - consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" - topic_name = new_topic(admin_client) - assign_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} - res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) - assert res.ok - for i in range(len(values[fmt])): - producer.send(topic_name, value=values[fmt][i]).get() - seek_payload = {"partitions": [{"topic": topic_name, "partition": 0}]} - resp = await rest_async_client.post(seek_path, headers=header, json=seek_payload) - assert resp.ok - header["Accept"] = f"application/vnd.kafka.{fmt}.v2+json" - resp = await rest_async_client.get(consume_path, headers=header) - assert resp.ok, f"Expected a successful response: {resp}" - data = resp.json() - assert len(data) == len(values[fmt]), f"Expected {len(values[fmt])} element in response: {resp}" - for i in range(len(values[fmt])): - assert deserializers[fmt](data[i]["value"]) == values[fmt][i], \ - f"Extracted data {deserializers[fmt](data[i]['value'])}" \ - f" does not match {values[fmt][i]} for format {fmt}" - - -@pytest.mark.parametrize("schema_type", ["avro"]) -@pytest.mark.parametrize("trail", ["", "/"]) -async def test_publish_consume_avro(rest_async_client, admin_client, trail, schema_type): +async def test_publish_consume_protobuf(rest_async_client, admin_client, trail, schema_type): header = REST_HEADERS[schema_type] - group_name = "e2e_group" + group_name = "e2e_protobuf_group" instance_id = await new_consumer(rest_async_client, group_name, fmt=schema_type, trail=trail) assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" @@ -315,9 +43,9 @@ async def test_publish_consume_avro(rest_async_client, admin_client, trail, sche @pytest.mark.parametrize("schema_type", ["protobuf"]) @pytest.mark.parametrize("trail", ["", "/"]) -async def test_publish_consume_protobuf(rest_async_client, admin_client, trail, schema_type): +async def test_publish_consume_protobuf_second(rest_async_client, admin_client, trail, schema_type): header = REST_HEADERS[schema_type] - group_name = "e2e_protobuf_group" + group_name = "e2e_proto_second" instance_id = await new_consumer(rest_async_client, group_name, fmt=schema_type, trail=trail) assign_path = f"/consumers/{group_name}/instances/{instance_id}/assignments{trail}" consume_path = f"/consumers/{group_name}/instances/{instance_id}/records{trail}?timeout=1000" @@ -325,12 +53,12 @@ async def test_publish_consume_protobuf(rest_async_client, admin_client, trail, assign_payload = {"partitions": [{"topic": tn, "partition": 0}]} res = await rest_async_client.post(assign_path, json=assign_payload, headers=header) assert res.ok - publish_payload = schema_data[schema_type][1] + publish_payload = schema_data_second[schema_type][1] await repeat_until_successful_request( rest_async_client.post, f"topics/{tn}{trail}", json_data={ - "value_schema": schema_data[schema_type][0], + "value_schema": schema_data_second[schema_type][0], "records": [{ "value": o } for o in publish_payload] diff --git a/tests/utils.py b/tests/utils.py index 204f44608..2ad36920c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -132,6 +132,179 @@ "protobuf": (schema_protobuf, test_objects_protobuf) } +schema_protobuf_second = """ +|syntax = "proto3"; +| +|option java_package = "com.codingharbour.protobuf"; +|option java_outer_classname = "TestEnumOrder"; +| +|message SensorInfo { +| int32 q = 1; +| Enum sensor_type = 2; +| repeated int32 nums = 3; +| Order order = 4; +| message Order { +| string item = 1; +| } +|} +|enum Enum { +| HIGH = 0; +| MIDDLE = 1; +| LOW = 2; +|} +| +""" +schema_protobuf_second = trim_margin(schema_protobuf_second) + +test_objects_protobuf_second = [ + { + 'q': 1, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 2, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 3, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 4, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 5, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 6, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 7, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 8, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 9, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 10, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 11, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 12, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 13, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 14, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 15, + 'sensor_type': 'HIGH', + 'nums': [3, 4], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 16, + 'sensor_type': 'MIDDLE', + 'nums': [2], + 'order': { + 'item': 'ABC01233' + } + }, + { + 'q': 17, + 'sensor_type': 'HIGH', + 'nums': [3, 4, 6], + 'order': { + 'item': 'ABC01223' + } + }, + { + 'q': 18, + 'sensor_type': 'MIDDLE', + 'nums': [2, 4, 5, 6, 7, 8, 9], + 'order': { + 'item': 'ABC01233' + } + }, +] + +schema_data_second = {"protobuf": (schema_protobuf_second, test_objects_protobuf_second)} + second_schema_json = json.dumps({ "namespace": "example.avro.other", "type": "record", From 17d8aeb2f0c4a622ca3688c0a1029b70bd03a5d1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 19 Dec 2021 22:47:06 +0200 Subject: [PATCH 126/168] decrease test load --- tests/utils.py | 56 -------------------------------------------------- 1 file changed, 56 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 2ad36920c..072ec081c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -245,62 +245,6 @@ 'item': 'ABC01223' } }, - { - 'q': 12, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 13, - 'sensor_type': 'HIGH', - 'nums': [3, 4], - 'order': { - 'item': 'ABC01223' - } - }, - { - 'q': 14, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 15, - 'sensor_type': 'HIGH', - 'nums': [3, 4], - 'order': { - 'item': 'ABC01223' - } - }, - { - 'q': 16, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 17, - 'sensor_type': 'HIGH', - 'nums': [3, 4, 6], - 'order': { - 'item': 'ABC01223' - } - }, - { - 'q': 18, - 'sensor_type': 'MIDDLE', - 'nums': [2, 4, 5, 6, 7, 8, 9], - 'order': { - 'item': 'ABC01233' - } - }, ] schema_data_second = {"protobuf": (schema_protobuf_second, test_objects_protobuf_second)} From 063c95e72a89f9d3d94c00768bb9536895010c4b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 19 Dec 2021 22:59:23 +0200 Subject: [PATCH 127/168] decrease test load --- tests/utils.py | 65 +------------------------------------------------- 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 072ec081c..6f0726c78 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -181,70 +181,7 @@ 'item': 'ABC01223' } }, - { - 'q': 4, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 5, - 'sensor_type': 'HIGH', - 'nums': [3, 4], - 'order': { - 'item': 'ABC01223' - } - }, - { - 'q': 6, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 7, - 'sensor_type': 'HIGH', - 'nums': [3, 4], - 'order': { - 'item': 'ABC01223' - } - }, - { - 'q': 8, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 9, - 'sensor_type': 'HIGH', - 'nums': [3, 4], - 'order': { - 'item': 'ABC01223' - } - }, - { - 'q': 10, - 'sensor_type': 'MIDDLE', - 'nums': [2], - 'order': { - 'item': 'ABC01233' - } - }, - { - 'q': 11, - 'sensor_type': 'HIGH', - 'nums': [3, 4], - 'order': { - 'item': 'ABC01223' - } - }, + ] schema_data_second = {"protobuf": (schema_protobuf_second, test_objects_protobuf_second)} From 4a89533c59d40c0f388ada89716400e523801a28 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 19 Dec 2021 23:27:58 +0200 Subject: [PATCH 128/168] decrease test load --- tests/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index 6f0726c78..adad7a3a4 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -181,7 +181,6 @@ 'item': 'ABC01223' } }, - ] schema_data_second = {"protobuf": (schema_protobuf_second, test_objects_protobuf_second)} From 70ab64fac445acc9626d142b58d7217eac2acb60 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Sun, 19 Dec 2021 23:46:17 +0200 Subject: [PATCH 129/168] decrease test load --- tests/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index adad7a3a4..64a2e7927 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -140,14 +140,14 @@ | |message SensorInfo { | int32 q = 1; -| Enum sensor_type = 2; +| Enu sensor_type = 2; | repeated int32 nums = 3; | Order order = 4; | message Order { | string item = 1; | } |} -|enum Enum { +|enum Enu { | HIGH = 0; | MIDDLE = 1; | LOW = 2; From 5dbf93808a3959351de34fa26c9a7d0316df33c1 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 20 Dec 2021 00:12:54 +0200 Subject: [PATCH 130/168] decrease test load --- tests/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 64a2e7927..f8577171a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -148,9 +148,9 @@ | } |} |enum Enu { -| HIGH = 0; -| MIDDLE = 1; -| LOW = 2; +| H1 = 0; +| M1 = 1; +| L1 = 2; |} | """ @@ -159,7 +159,7 @@ test_objects_protobuf_second = [ { 'q': 1, - 'sensor_type': 'HIGH', + 'sensor_type': 'H1', 'nums': [3, 4], 'order': { 'item': 'ABC01223' @@ -167,7 +167,7 @@ }, { 'q': 2, - 'sensor_type': 'MIDDLE', + 'sensor_type': 'M1', 'nums': [2], 'order': { 'item': 'ABC01233' @@ -175,7 +175,7 @@ }, { 'q': 3, - 'sensor_type': 'HIGH', + 'sensor_type': 'L1', 'nums': [3, 4], 'order': { 'item': 'ABC01223' From 6e99fa60dbb9a62449970e479b34470b50a01adb Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 20 Dec 2021 11:26:05 +0200 Subject: [PATCH 131/168] remove unneded dependency --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7fe4e9f28..cd733f543 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,6 @@ lz4==3.0.2 requests==2.23.0 networkx==2.5 python-dateutil==2.8.2 -filelock~=3.0.12 protobuf~=3.14.0 # Patched dependencies From 60e9d788be360dd7f1d29f6ecb3dc9f881233302 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Tue, 11 Jan 2022 17:00:49 +0200 Subject: [PATCH 132/168] fixup dependencies because of https://github.blog/2021-09-01-improving-git-protocol-security-github/ --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cd733f543..6732e0940 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,4 @@ protobuf~=3.14.0 # images and forces a new image generation. # git+https://github.com/aiven/avro.git@513b153bac5040af6bba5847aef202adb680b67b#subdirectory=lang/py3/ -git+git://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 +git+https://github.com/aiven/kafka-python.git@b9f2f78377d56392f61cba8856dc6c02ae841b79 From c34974b8d6644e371b5f052b85ba5f3f85f96c6e Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 15:33:42 +0200 Subject: [PATCH 133/168] Update checks.py remove TODO --- karapace/compatibility/protobuf/checks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 70f72ee8c..e93711572 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -1,4 +1,3 @@ -# TODO: PROTOBUF* this functionality must be implemented from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaCompatibilityType from karapace.protobuf.compare_result import CompareResult, ModificationRecord from karapace.protobuf.schema import ProtobufSchema From abee24503be81b2094ea6f5803f3b398a4cb70db Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 17:02:12 +0200 Subject: [PATCH 134/168] Update config.py --- karapace/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/config.py b/karapace/config.py index 5704c8598..098ad4dce 100644 --- a/karapace/config.py +++ b/karapace/config.py @@ -53,7 +53,7 @@ "karapace_rest": False, "karapace_registry": False, "master_election_strategy": "lowest", - "protobuf_runtime_directory": "runtime" + "protobuf_runtime_directory": "runtime", } DEFAULT_LOG_FORMAT_JOURNAL = "%(name)-20s\t%(threadName)s\t%(levelname)-8s\t%(message)s" From 447d46d8b064e3034ea4cec5c8849baab98a7ba4 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 17:04:21 +0200 Subject: [PATCH 135/168] Update __init__.py remove TODO --- karapace/kafka_rest_apis/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 23860b6e4..8be851143 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -27,7 +27,6 @@ RECORD_CODES = [42201, 42202] KNOWN_FORMATS = {"json", "avro", "protobuf", "binary"} OFFSET_RESET_STRATEGIES = {"latest", "earliest"} -# TODO: PROTOBUF* check schema mapping SCHEMA_MAPPINGS = {"avro": SchemaType.AVRO, "jsonschema": SchemaType.JSONSCHEMA, "protobuf": SchemaType.PROTOBUF} TypedConsumer = namedtuple("TypedConsumer", ["consumer", "serialization_format", "config"]) From 899212cd4c0cf1c10f1267bffe28acaa42d5710a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 17:58:30 +0200 Subject: [PATCH 136/168] Update karapace/protobuf/compare_result.py change logic Co-authored-by: Augusto Hack --- karapace/protobuf/compare_result.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index 10984d474..d8e4e0274 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -72,8 +72,7 @@ def add_modification(self, modification: Modification) -> None: self.result.append(record) def is_compatible(self) -> bool: - record: ModificationRecord - for record in self.result: - if not record.modification.is_compatible(): - return False - return True + return all( + record.modification.is_compatible() + for record in self.result + ) From 5d8df5a2a69b05bc961fdf3d2163f613ccdd091b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 17:59:49 +0200 Subject: [PATCH 137/168] Update karapace/protobuf/compare_type_storage.py Co-authored-by: Augusto Hack --- karapace/protobuf/compare_type_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index fc1ae66a5..13022e71c 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -8,7 +8,7 @@ def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: dict) -> Optional[str]: string = t.string - if string.startswith('.'): + if string.startswith("."): name = string[1:] if types.get(name): return name From 03842e189ee187c93ceb6e4cfdc15dd57f2f4a06 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 18:00:16 +0200 Subject: [PATCH 138/168] Update karapace/protobuf/compare_type_storage.py change quotes Co-authored-by: Augusto Hack --- karapace/protobuf/compare_type_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 13022e71c..9f76b9ff4 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -41,7 +41,7 @@ def __init__(self, self_package_name: str, other_package_name: str, result: Comp def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict) -> None: name: str if prefix: - name = prefix + '.' + type_element.name + name = prefix + "." + type_element.name else: name = type_element.name from karapace.protobuf.message_element import MessageElement From 1a0473af4050cbea41bb7dfa38d050d0d4920baf Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Thu, 13 Jan 2022 18:02:32 +0200 Subject: [PATCH 139/168] fixup --- karapace/compatibility/protobuf/checks.py | 9 ++++---- karapace/protobuf/compare_result.py | 24 +++++++++++++--------- karapace/protobuf/enum_constant_element.py | 2 +- karapace/protobuf/enum_element.py | 6 +++--- karapace/protobuf/extend_element.py | 2 +- karapace/protobuf/field_element.py | 2 +- karapace/protobuf/group_element.py | 2 +- karapace/protobuf/message_element.py | 16 +++++++-------- karapace/protobuf/one_of_element.py | 6 +++--- karapace/protobuf/option_reader.py | 10 ++++----- karapace/protobuf/proto_file_element.py | 10 ++++----- karapace/protobuf/proto_parser.py | 24 +++++++++++----------- karapace/protobuf/proto_type.py | 2 +- karapace/protobuf/reserved_document.py | 2 +- karapace/protobuf/rpc_element.py | 2 +- karapace/protobuf/schema.py | 4 ++-- karapace/protobuf/syntax_reader.py | 4 ++-- karapace/rapu.py | 2 +- 18 files changed, 66 insertions(+), 63 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index e93711572..2e2c87458 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -1,5 +1,5 @@ from karapace.avro_compatibility import SchemaCompatibilityResult, SchemaCompatibilityType -from karapace.protobuf.compare_result import CompareResult, ModificationRecord +from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.schema import ProtobufSchema import logging @@ -8,7 +8,7 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: ProtobufSchema) -> SchemaCompatibilityResult: - result: CompareResult = CompareResult() + result = CompareResult() log.debug("READER: %s", reader.to_schema()) log.debug("WRITER: %s", writer.to_schema()) writer.compare(reader, result) @@ -18,9 +18,8 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: Protobuf # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? incompatibilities = [] - record: ModificationRecord - locations: set = set() - messages: set = set() + locations = set() + messages = set() for record in result.result: if not record.modification.is_compatible(): incompatibilities.append(record.modification.__str__()) diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index 10984d474..189e5bb00 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass, field from enum import auto, Enum @@ -38,11 +39,14 @@ def is_compatible(self) -> bool: ] +@dataclass class ModificationRecord: - def __init__(self, modification: Modification, path: str) -> None: - self.modification = modification - self.path = path - if modification.is_compatible(): + modification: Modification + path: str + message: str = field(init=False) + + def __post_init__(self) -> None: + if self.modification.is_compatible(): self.message = f"Compatible modification {self.modification} found" else: self.message = f"Incompatible modification {self.modification} found" @@ -53,14 +57,14 @@ def to_str(self) -> str: class CompareResult: def __init__(self) -> None: - self.result: list = [] - self.path: list = [] - self.canonical_name: list = [] + self.result = [] + self.path = [] + self.canonical_name = [] - def push_path(self, string: str, canonical: bool = False) -> None: + def push_path(self, name_element: str, canonical: bool = False) -> None: if canonical: - self.canonical_name.append(str(string)) - self.path.append(str(string)) + self.canonical_name.append(name_element) + self.path.append(name_element) def pop_path(self, canonical: bool = False) -> None: if canonical: diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 0eb541540..5b3321de4 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -21,7 +21,7 @@ def __init__( self.documentation = documentation or "" def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append(f"{self.name} = {self.tag}") if self.options: diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 9aa490b77..9998cc840 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -18,7 +18,7 @@ def __init__( self.constants = constants or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append(f"enum {self.name} {{") @@ -37,8 +37,8 @@ def to_schema(self) -> str: return "".join(result) def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTypes) -> None: - self_tags: dict = {} - other_tags: dict = {} + self_tags = {} + other_tags = {} constant: EnumConstantElement if types: pass diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index 7cd9d2d3c..78479b9e0 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -13,7 +13,7 @@ def __init__(self, location: Location, name: str, documentation: str = "", field self.fields = fields or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append(f"extend {self.name} {{") if self.fields: diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 7a55337ec..176fc3ff1 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -35,7 +35,7 @@ def __init__( self.options = options or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) if self.label: diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index 0b96cecad..b68081b1b 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -25,7 +25,7 @@ def __init__( self.documentation = documentation def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) # TODO: compare lower() to lowercase() and toLowerCase(Locale.US) Kotlin diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 42eb4e852..0df375f20 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -38,7 +38,7 @@ def __init__( self.groups = groups or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append(f"message {self.name} {{") if self.reserveds: @@ -85,10 +85,10 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare field: FieldElement subfield: FieldElement one_of: OneOfElement - self_tags: dict = {} - other_tags: dict = {} - self_one_ofs: dict = {} - other_one_ofs: dict = {} + self_tags = {} + other_tags = {} + self_one_ofs = {} + other_one_ofs = {} for field in self.fields: self_tags[field.tag] = field @@ -103,7 +103,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare other_one_ofs[one_of.name] = one_of for field in other.one_ofs: - result.push_path(field.name) + result.push_path(str(field.name)) convert_count = 0 for subfield in field.fields: tag = subfield.tag @@ -116,7 +116,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare # Compare fields for tag in chain(self_tags.keys(), other_tags.keys() - self_tags.keys()): - result.push_path(tag) + result.push_path(str(tag)) if self_tags.get(tag) is None: result.add_modification(Modification.FIELD_ADD) @@ -128,7 +128,7 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare result.pop_path() # Compare OneOfs for name in chain(self_one_ofs.keys(), other_one_ofs.keys() - self_one_ofs.keys()): - result.push_path(name) + result.push_path(str(name)) if self_one_ofs.get(name) is None: result.add_modification(Modification.ONE_OF_ADD) diff --git a/karapace/protobuf/one_of_element.py b/karapace/protobuf/one_of_element.py index 1740e645b..0f1f16a61 100644 --- a/karapace/protobuf/one_of_element.py +++ b/karapace/protobuf/one_of_element.py @@ -15,7 +15,7 @@ def __init__(self, name: str, documentation: str = "", fields=None, groups=None, self.groups = groups or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append(f"oneof {self.name} {{") if self.options: @@ -35,8 +35,8 @@ def to_schema(self) -> str: return "".join(result) def compare(self, other: 'OneOfElement', result: CompareResult, types: CompareTypes) -> None: - self_tags: dict = {} - other_tags: dict = {} + self_tags = {} + other_tags = {} for field in self.fields: self_tags[field.tag] = field diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index 8dfcc7989..fd143a2f8 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -24,7 +24,7 @@ def read_options(self) -> list: """ if not self.reader.peek_char('['): return [] - result: list = [] + result = [] while True: result.append(self.read_option('=')) @@ -45,7 +45,7 @@ def read_option(self, key_value_separator: str) -> OptionElement: if is_extension: name = f"[{name}]" - sub_names: list = [] + sub_names = [] c = self.reader.read_char() if c == '.': # Read nested field name. For example "baz" in "(foo.bar).baz = 12". @@ -95,7 +95,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) """ if self.reader.read_char() != open_brace: raise AssertionError() - result: dict = {} + result = {} while True: if self.reader.peek_char(close_brace): # If we see the close brace, finish immediately. This handles :}/[] and ,}/,] cases. @@ -118,7 +118,7 @@ def read_map(self, open_brace: str, close_brace: str, key_value_separator: str) elif isinstance(previous, list): # Add to previous List self.add_to_list(previous, value) else: - new_list: list = [] + new_list = [] new_list.append(previous) self.add_to_list(new_list, value) result[name] = new_list @@ -140,7 +140,7 @@ def read_list(self) -> list: separating values. """ self.reader.require('[') - result: list = [] + result = [] while True: # If we see the close brace, finish immediately. This handles [] and ,] cases. if self.reader.peek_char(']'): diff --git a/karapace/protobuf/proto_file_element.py b/karapace/protobuf/proto_file_element.py index 88281d828..c798104fb 100644 --- a/karapace/protobuf/proto_file_element.py +++ b/karapace/protobuf/proto_file_element.py @@ -104,10 +104,10 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe if self.syntax != other.syntax: result.add_modification(Modification.SYNTAX_ALTER) - self_types: dict = {} - other_types: dict = {} - self_indexes: dict = {} - other_indexes: dict = {} + self_types = {} + other_types = {} + self_indexes = {} + other_indexes = {} compare_types = CompareTypes(self.package_name, other.package_name, result) type_: TypeElement for i, type_ in enumerate(self.types): @@ -124,7 +124,7 @@ def compare(self, other: 'ProtoFileElement', result: CompareResult) -> CompareRe for name in chain(self_types.keys(), other_types.keys() - self_types.keys()): - result.push_path(name, True) + result.push_path(str(name), True) if self_types.get(name) is None and other_types.get(name) is not None: if isinstance(other_types[name], MessageElement): diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index b0822963b..419af2b43 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -245,7 +245,7 @@ def read_message(self, location: Location, documentation: str) -> MessageElement def read_extend(self, location: Location, documentation: str) -> ExtendElement: """ Reads an extend declaration. """ name = self.reader.read_name() - fields: list = [] + fields = [] self.reader.require("{") while True: nested_documentation = self.reader.read_documentation() @@ -270,7 +270,7 @@ def read_service(self, location: Location, documentation: str) -> ServiceElement """ Reads a service declaration and returns it. """ name = self.reader.read_name() rpcs = [] - options: list = [] + options = [] self.reader.require('{') while True: rpc_documentation = self.reader.read_documentation() @@ -296,8 +296,8 @@ def read_service(self, location: Location, documentation: str) -> ServiceElement def read_enum_element(self, location: Location, documentation: str) -> EnumElement: """ Reads an enumerated atype declaration and returns it. """ name = self.reader.read_name() - constants: list = [] - options: list = [] + constants = [] + options = [] self.reader.require("{") while True: value_documentation = self.reader.read_documentation() @@ -354,7 +354,7 @@ def read_field_with_label( tag = self.reader.read_int() # Mutable copy to extract the default value, and add packed if necessary. - options: list = OptionReader(self.reader).read_options() + options = OptionReader(self.reader).read_options() default_value = self.strip_default(options) json_name = self.strip_json_name(options) @@ -396,9 +396,9 @@ def strip_value(name: str, options: list) -> Union[None, str]: def read_one_of(self, documentation: str) -> OneOfElement: name: str = self.reader.read_name() - fields: list = [] - groups: list = [] - options: list = [] + fields = [] + groups = [] + options = [] self.reader.require("{") while True: @@ -433,7 +433,7 @@ def read_group( name = self.reader.read_word() self.reader.require("=") tag = self.reader.read_int() - fields: list = [] + fields = [] self.reader.require("{") while True: @@ -453,7 +453,7 @@ def read_group( def read_reserved(self, location: Location, documentation: str) -> ReservedElement: """ Reads a reserved tags and names list like "reserved 10, 12 to 14, 'foo';". """ - values: list = [] + values = [] while True: ch = self.reader.peek_char() if ch in ["\"", "'"]: @@ -487,7 +487,7 @@ def read_reserved(self, location: Location, documentation: str) -> ReservedEleme def read_extensions(self, location: Location, documentation: str) -> ExtensionsElement: """ Reads extensions like "extensions 101;" or "extensions 101 to max;". """ - values: list = [] + values = [] while True: start: int = self.reader.read_int() ch = self.reader.peek_char() @@ -562,7 +562,7 @@ def read_rpc(self, location: Location, documentation: str) -> RpcElement: self.reader.require(')') - options: list = [] + options = [] if self.reader.peek_char('{'): while True: rpc_documentation = self.reader.read_documentation() diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index d7621a596..b55a9c2c5 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -64,7 +64,7 @@ def static_init(cls) -> None: cls.SFIXED64, cls.SINT32, cls.SINT64, cls.STRING, cls.UINT32, cls.UINT64 ] - cls.SCALAR_TYPES: dict = {} + cls.SCALAR_TYPES = {} for a in cls.SCALAR_TYPES_: cls.SCALAR_TYPES[a.string] = a diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 3d037d567..400887a47 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -14,7 +14,7 @@ def __init__(self, location: Location, documentation: str = "", values: list = N self.values = values or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append("reserved ") diff --git a/karapace/protobuf/rpc_element.py b/karapace/protobuf/rpc_element.py index 3c8b86a61..51f4ebb35 100644 --- a/karapace/protobuf/rpc_element.py +++ b/karapace/protobuf/rpc_element.py @@ -27,7 +27,7 @@ def __init__( self.options = options or [] def to_schema(self) -> str: - result: list = [] + result = [] append_documentation(result, self.documentation) result.append(f"rpc {self.name} (") diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index c1acc10c8..d539bfa84 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -30,7 +30,7 @@ def add_slashes(text: str) -> str: def message_element_string(element: MessageElement) -> str: - result: list = [] + result = [] append_documentation(result, element.documentation) result.append(f"message {element.name} {{") if element.reserveds: @@ -113,7 +113,7 @@ def __str__(self) -> str: return self.cache_string def to_schema(self) -> str: - strings: list = [] + strings = [] shm: ProtoFileElement = self.proto_file_element if shm.syntax: strings.append("syntax = \"") diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index f38371b08..947704eb3 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -61,7 +61,7 @@ def read_quoted_string(self) -> str: if start_quote not in ('"', "'"): raise IllegalStateException(" quote expected") - result: list = [] + result = [] while self.pos < len(self.data): c = self.data[self.pos] @@ -217,7 +217,7 @@ def read_comment(self) -> str: self.pos += 1 result: str = "" if tval == ord('*'): - buffer: list = [] + buffer = [] start_of_line = True while self.pos + 1 < len(self.data): # pylint: disable=no-else-break diff --git a/karapace/rapu.py b/karapace/rapu.py index 4aa3ad2e0..a552248ff 100644 --- a/karapace/rapu.py +++ b/karapace/rapu.py @@ -197,7 +197,7 @@ def check_rest_headers(self, request: HTTPRequest) -> dict: # pylint:disable=in method = request.method default_content = "application/vnd.kafka.json.v2+json" default_accept = "*/*" - result: dict = {"content_type": default_content} + result = {"content_type": default_content} content_matcher = REST_CONTENT_TYPE_RE.search( cgi.parse_header(request.get_header("Content-Type", default_content))[0] ) From 383f715525c7cb0a361e26d1a6d5fb569be54571 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 00:05:54 +0200 Subject: [PATCH 140/168] Update karapace/protobuf/compare_type_storage.py Co-authored-by: Augusto Hack --- karapace/protobuf/compare_type_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 9f76b9ff4..85c8158b3 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -98,7 +98,7 @@ def other_type_short_name(self, t: ProtoType) -> Optional[str]: return name[(len(type_record.package_name) + 1):] return name - def lock_message(self, message: object) -> bool: + def lock_message(self, message: MessageElement) -> bool: if message in self.locked_messages: return False self.locked_messages.append(message) From 876996ade4f2440d1520c0a109bfc871ab869c8b Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 00:09:03 +0200 Subject: [PATCH 141/168] fixup circular dependency --- karapace/protobuf/compare_type_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 85c8158b3..9f76b9ff4 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -98,7 +98,7 @@ def other_type_short_name(self, t: ProtoType) -> Optional[str]: return name[(len(type_record.package_name) + 1):] return name - def lock_message(self, message: MessageElement) -> bool: + def lock_message(self, message: object) -> bool: if message in self.locked_messages: return False self.locked_messages.append(message) From fe60f7c832123c1020521f962d46158a7c4a2989 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 00:11:02 +0200 Subject: [PATCH 142/168] Update karapace/protobuf/compare_type_storage.py fixup quotes Co-authored-by: Augusto Hack --- karapace/protobuf/compare_type_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 9f76b9ff4..3528bdedd 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -17,7 +17,7 @@ def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: if package_name: canonical_name.insert(0, package_name) while len(canonical_name) > 0: - pretender: str = ".".join(canonical_name) + '.' + string + pretender: str = ".".join(canonical_name) + "." + string pt = types.get(pretender) if pt is not None: return pretender From b658e81d6157ade8b068f796f3c8c83eba936489 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 00:12:37 +0200 Subject: [PATCH 143/168] Update karapace/protobuf/compare_type_storage.py remove quotes Co-authored-by: Augusto Hack --- karapace/protobuf/compare_type_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 3528bdedd..a0f5703e3 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -48,7 +48,7 @@ def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, from karapace.protobuf.field_element import FieldElement if isinstance(type_element, MessageElement): # add support of MapEntry messages - if 'map_entry' in type_element.options: + if "map_entry" in type_element.options: key: Optional[FieldElement] = next((f for f in type_element.fields if f.name == 'key'), None) value: Optional[FieldElement] = next((f for f in type_element.fields if f.name == 'value'), None) types[name] = TypeRecordMap(package_name, type_element, key, value) From e082ab30aee4c40ffd1487784a54d79002a1a8ef Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 00:47:21 +0200 Subject: [PATCH 144/168] Update field.py remove deprecated TODO --- karapace/protobuf/field.py | 1 - 1 file changed, 1 deletion(-) diff --git a/karapace/protobuf/field.py b/karapace/protobuf/field.py index bfa3e407f..c24a25257 100644 --- a/karapace/protobuf/field.py +++ b/karapace/protobuf/field.py @@ -1,7 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Field.kt -# TODO: We ported only code which is used by schema parser. Full class may be ported if required from enum import Enum From aad40a368cf51c8cdb0552aa09aaf305a59bcca5 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 00:58:31 +0200 Subject: [PATCH 145/168] Apply suggestions from code review fixup style Co-authored-by: Augusto Hack --- karapace/protobuf/compare_type_storage.py | 4 ++-- karapace/protobuf/enum_constant_element.py | 2 +- karapace/protobuf/enum_element.py | 2 +- karapace/protobuf/field_element.py | 2 +- karapace/protobuf/io.py | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index a0f5703e3..1fd43d840 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -49,8 +49,8 @@ def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, if isinstance(type_element, MessageElement): # add support of MapEntry messages if "map_entry" in type_element.options: - key: Optional[FieldElement] = next((f for f in type_element.fields if f.name == 'key'), None) - value: Optional[FieldElement] = next((f for f in type_element.fields if f.name == 'value'), None) + key: Optional[FieldElement] = next((f for f in type_element.fields if f.name == "key"), None) + value: Optional[FieldElement] = next((f for f in type_element.fields if f.name == "value"), None) types[name] = TypeRecordMap(package_name, type_element, key, value) else: types[name] = TypeRecord(package_name, type_element) diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 5b3321de4..bc9bce0d0 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -11,7 +11,7 @@ def __init__( name: str, tag: int, documentation: str = "", - options: list = None, + options: Optional[list] = None, ) -> None: self.location = location self.name = name diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 9998cc840..9bbda6aab 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -36,7 +36,7 @@ def to_schema(self) -> str: result.append("}\n") return "".join(result) - def compare(self, other: 'EnumElement', result: CompareResult, types: CompareTypes) -> None: + def compare(self, other: "EnumElement", result: CompareResult, types: CompareTypes) -> None: self_tags = {} other_tags = {} constant: EnumConstantElement diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index 176fc3ff1..b5a390942 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -69,7 +69,7 @@ def options_with_special_values(self) -> list: # Only non-repeated scalar types and Enums support default values. - def compare(self, other: 'FieldElement', result: CompareResult, types: CompareTypes) -> None: + def compare(self, other: "FieldElement", result: CompareResult, types: CompareTypes) -> None: if self.name != other.name: result.add_modification(Modification.FIELD_NAME_ALTER) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index f8116f3d3..765e1d614 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -60,7 +60,7 @@ def read_varint(bio: BytesIO) -> int: return varint -def read_indexes(bio: BytesIO): +def read_indexes(bio: BytesIO) -> List[int]: try: size: int = read_varint(bio) except EOFError: @@ -72,7 +72,7 @@ def read_indexes(bio: BytesIO): def find_message_name(schema: ProtobufSchema, indexes: List[int]) -> str: - result: list = [] + result: List[str] = [] types = schema.proto_file_element.types for index in indexes: try: @@ -114,7 +114,7 @@ def get_protobuf_class_instance(schema: ProtobufSchema, class_name: str, cfg: Di return class_to_call() -def read_data(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema, bio: BytesIO): +def read_data(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema, bio: BytesIO) -> Any: # TODO (serge): check and polish it if not match_schemas(writer_schema, reader_schema): fail_msg = 'Schemas do not match.' From db3f5925783a68e6bcf5e5ba5de3d0b3068be696 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 14 Jan 2022 01:11:00 +0200 Subject: [PATCH 146/168] fixup style --- karapace/protobuf/compare_result.py | 5 +---- karapace/protobuf/enum_constant_element.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index 67cd8d045..b52b505d8 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -76,7 +76,4 @@ def add_modification(self, modification: Modification) -> None: self.result.append(record) def is_compatible(self) -> bool: - return all( - record.modification.is_compatible() - for record in self.result - ) + return all(record.modification.is_compatible() for record in self.result) diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index bc9bce0d0..34e39c2da 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -2,6 +2,7 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumConstantElement.kt from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_options +from typing import Optional class EnumConstantElement: From f8745d25eb6ad555667e2902e98667dc73a0dd92 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Wed, 19 Jan 2022 00:33:07 +0200 Subject: [PATCH 147/168] fixup annotations part one --- karapace/protobuf/compare_type_storage.py | 18 ++++++++++++------ karapace/protobuf/enum_constant_element.py | 4 ++-- karapace/protobuf/proto_type.py | 4 ++-- karapace/protobuf/type_element.py | 10 ++++++---- karapace/protobuf/utils.py | 5 +++-- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index 1fd43d840..a7c7344f5 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -2,7 +2,11 @@ from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.type_element import TypeElement -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, TYPE_CHECKING, Union + +if TYPE_CHECKING: + from karapace.protobuf.message_element import MessageElement + from karapace.protobuf.field_element import FieldElement def compute_name(t: ProtoType, result_path: List[str], package_name: str, types: dict) -> Optional[str]: @@ -34,8 +38,8 @@ def __init__(self, self_package_name: str, other_package_name: str, result: Comp self.other_package_name = other_package_name self.self_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} self.other_types: Dict[str, Union[TypeRecord, TypeRecordMap]] = {} - self.locked_messages: List[object] = [] - self.environment: List[object] = [] + self.locked_messages: List['MessageElement'] = [] + self.environment: List['MessageElement'] = [] self.result = result def add_a_type(self, prefix: str, package_name: str, type_element: TypeElement, types: dict) -> None: @@ -98,13 +102,13 @@ def other_type_short_name(self, t: ProtoType) -> Optional[str]: return name[(len(type_record.package_name) + 1):] return name - def lock_message(self, message: object) -> bool: + def lock_message(self, message: 'MessageElement') -> bool: if message in self.locked_messages: return False self.locked_messages.append(message) return True - def unlock_message(self, message: object) -> bool: + def unlock_message(self, message: 'MessageElement') -> bool: if message in self.locked_messages: self.locked_messages.remove(message) return True @@ -118,7 +122,9 @@ def __init__(self, package_name: str, type_element: TypeElement) -> None: class TypeRecordMap(TypeRecord): - def __init__(self, package_name: str, type_element: TypeElement, key: object, value: object) -> None: + def __init__( + self, package_name: str, type_element: TypeElement, key: Optional['FieldElement'], value: Optional['FieldElement'] + ) -> None: super().__init__(package_name, type_element) try: self.key = key diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 34e39c2da..1b21f8b74 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -2,7 +2,7 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumConstantElement.kt from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_options -from typing import Optional +from typing import List, Optional class EnumConstantElement: @@ -12,7 +12,7 @@ def __init__( name: str, tag: int, documentation: str = "", - options: Optional[list] = None, + options: Optional[List[str]] = None, ) -> None: self.location = location self.name = name diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index b55a9c2c5..f606bd77d 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -130,7 +130,7 @@ def type_url(self) -> str: """ return None if self.is_scalar or self.is_map else f"type.googleapis.com/{self.string}" - def nested_type(self, name: str) -> object: # ProtoType + def nested_type(self, name: str) -> 'ProtoType': check(not self.is_scalar, "scalar cannot have a nested type") check(not self.is_map, "map cannot have a nested type") @@ -170,7 +170,7 @@ def get2(name: str) -> 'ProtoType': return ProtoType(False, name) @staticmethod - def get3(key_type: 'ProtoType', value_type: 'ProtoType', name: str) -> object: + def get3(key_type: 'ProtoType', value_type: 'ProtoType', name: str) -> 'ProtoType': return ProtoType(False, name, key_type, value_type) # schema compatibility check functionality karapace addon diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 76b84fd0e..06a179bf8 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -2,8 +2,10 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/TypeElement.kt from dataclasses import dataclass from karapace.protobuf.location import Location -from karapace.protobuf.option_element import OptionElement -from typing import List +from typing import List, TYPE_CHECKING + +if TYPE_CHECKING: + from karapace.protobuf.option_element import OptionElement @dataclass @@ -11,8 +13,8 @@ class TypeElement: location: Location name: str documentation: str - options: List[OptionElement] - nested_types: List[object] + options: List['OptionElement'] + nested_types: List['TypeElement'] def to_schema(self) -> str: pass diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 57b0bcef6..0c0d896da 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -1,6 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/Util.kt +from karapace.protobuf.type_element import TypeElement from typing import List @@ -24,7 +25,7 @@ def append_documentation(data: List[str], documentation: str) -> None: data.append("\n") -def append_options(data: List[str], options: List[object]) -> None: +def append_options(data: List[str], options: List[TypeElement]) -> None: count = len(options) if count == 1: data.append('[') @@ -42,7 +43,7 @@ def append_options(data: List[str], options: List[object]) -> None: data.append(']') -def try_to_schema(obj: object) -> str: +def try_to_schema(obj: TypeElement) -> str: try: return obj.to_schema() except AttributeError: From e9cc744b2bb3c11a573d87c3968812fa39ff6647 Mon Sep 17 00:00:00 2001 From: Yu Wang Date: Thu, 20 Jan 2022 18:09:16 +1100 Subject: [PATCH 148/168] Add SASL configurations to AIOKafkaProducer in karapace/kafka_rest_apis/__init__.py. --- karapace/kafka_rest_apis/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/karapace/kafka_rest_apis/__init__.py b/karapace/kafka_rest_apis/__init__.py index 3e3c86368..5c68275ce 100644 --- a/karapace/kafka_rest_apis/__init__.py +++ b/karapace/kafka_rest_apis/__init__.py @@ -181,6 +181,9 @@ async def _create_async_producer(self) -> AIOKafkaProducer: bootstrap_servers=self.config["bootstrap_uri"], security_protocol=self.config["security_protocol"], ssl_context=None if self.config["security_protocol"] == "PLAINTEXT" else create_ssl_context(self.config), + sasl_mechanism=self.config["sasl_mechanism"], + sasl_plain_username=None if not self.config["sasl_plain_username"] else self.config["sasl_plain_username"], + sasl_plain_password=None if not self.config["sasl_plain_password"] else self.config["sasl_plain_password"], metadata_max_age_ms=self.config["metadata_max_age_ms"], acks=acks, compression_type=self.config["producer_compression_type"], From 8c90d062308060e1fc13fee9247a34276f81bf1c Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:00:54 +0200 Subject: [PATCH 149/168] Update karapace/protobuf/proto_type.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index f606bd77d..14341fe2b 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -117,7 +117,7 @@ def to_kind(self) -> OptionElement.Kind: }.get(self.simple_name, OptionElement.Kind.ENUM) @property - def enclosing_type_or_package(self) -> str: + def enclosing_type_or_package(self) -> Optional[str]: """ Returns the enclosing type, or null if self type is not nested in another type. """ dot = self.string.rfind(".") return None if (dot == -1) else self.string[:dot] From 3789079f0f93c6c8aba6e4e1d05a63f64d276b43 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:02:21 +0200 Subject: [PATCH 150/168] Update karapace/protobuf/proto_type.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 14341fe2b..354c64b4a 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -97,7 +97,7 @@ def __init__( # TODO: must be IllegalArgumentException raise Exception(f"map key must be non-byte, non-floating point scalar: {key_type}") - def to_kind(self) -> OptionElement.Kind: + def to_kind(self) -> Optional[OptionElement.Kind]: return { "bool": OptionElement.Kind.BOOLEAN, "string": OptionElement.Kind.STRING, From 64bb4b0e564ec05abab3eb13d623db908f4a9ad3 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:03:02 +0200 Subject: [PATCH 151/168] fixups of styles --- karapace/protobuf/compare_type_storage.py | 14 ++-- karapace/protobuf/encoding_variants.py | 68 +++++++++++++++++ karapace/protobuf/enum_constant_element.py | 24 +++--- karapace/protobuf/enum_element.py | 9 ++- karapace/protobuf/extend_element.py | 18 +++-- karapace/protobuf/extensions_element.py | 12 +-- karapace/protobuf/field_element.py | 3 +- karapace/protobuf/group_element.py | 27 +++---- karapace/protobuf/io.py | 85 ++-------------------- karapace/protobuf/kotlin_wrapper.py | 35 +-------- karapace/protobuf/option_element.py | 6 -- karapace/protobuf/option_reader.py | 8 +- karapace/protobuf/proto_parser.py | 4 +- karapace/protobuf/proto_type.py | 30 +++++--- karapace/protobuf/utils.py | 14 ++-- 15 files changed, 159 insertions(+), 198 deletions(-) create mode 100644 karapace/protobuf/encoding_variants.py diff --git a/karapace/protobuf/compare_type_storage.py b/karapace/protobuf/compare_type_storage.py index a7c7344f5..8a5263a2a 100644 --- a/karapace/protobuf/compare_type_storage.py +++ b/karapace/protobuf/compare_type_storage.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from karapace.protobuf.compare_result import CompareResult from karapace.protobuf.exception import IllegalArgumentException from karapace.protobuf.proto_type import ProtoType @@ -115,10 +116,10 @@ def unlock_message(self, message: 'MessageElement') -> bool: return False +@dataclass class TypeRecord: - def __init__(self, package_name: str, type_element: TypeElement) -> None: - self.package_name = package_name - self.type_element = type_element + package_name: str + type_element: TypeElement class TypeRecordMap(TypeRecord): @@ -126,11 +127,8 @@ def __init__( self, package_name: str, type_element: TypeElement, key: Optional['FieldElement'], value: Optional['FieldElement'] ) -> None: super().__init__(package_name, type_element) - try: - self.key = key - self.value = value - except Exception: - raise IllegalArgumentException("TypeRecordMap") + self.key = key + self.value = value def map_type(self) -> ProtoType: return ProtoType.get2(f"map<{self.key.element_type}, {self.value.element_type}>") diff --git a/karapace/protobuf/encoding_variants.py b/karapace/protobuf/encoding_variants.py new file mode 100644 index 000000000..3511f8195 --- /dev/null +++ b/karapace/protobuf/encoding_variants.py @@ -0,0 +1,68 @@ +# Workaround to encode/decode indexes in protobuf messages +# Based on https://developers.google.com/protocol-buffers/docs/encoding#varints + +from io import BytesIO +from karapace.protobuf.exception import IllegalArgumentException +from typing import List + +ZERO_BYTE = b'\x00' + + +def read_varint(bio: BytesIO) -> int: + """Read a variable-length integer. + """ + varint = 0 + read_bytes = 0 + + while True: + char = bio.read(1) + if len(char) == 0: + if read_bytes == 0: + return 0 + raise EOFError(f"EOF while reading varint, value is {varint} so far") + + byte = ord(char) + varint += (byte & 0x7F) << (7 * read_bytes) + + read_bytes += 1 + + if not byte & 0x80: + return varint + + +def read_indexes(bio: BytesIO) -> List[int]: + try: + size: int = read_varint(bio) + except EOFError: + # TODO: change exception + raise IllegalArgumentException("problem with reading binary data") + if size == 0: + return [0] + return [read_varint(bio) for _ in range(size)] + + +def write_varint(bio: BytesIO, value: int) -> int: + if value < 0: + raise ValueError(f"value must not be negative, got {value}") + + if value == 0: + bio.write(ZERO_BYTE) + return 1 + + written_bytes = 0 + while value > 0: + to_write = value & 0x7f + value = value >> 7 + + if value > 0: + to_write |= 0x80 + + bio.write(bytearray(to_write)[0]) + written_bytes += 1 + + return written_bytes + + +def write_indexes(bio: BytesIO, indexes: List[int]) -> None: + for i in indexes: + write_varint(bio, i) diff --git a/karapace/protobuf/enum_constant_element.py b/karapace/protobuf/enum_constant_element.py index 1b21f8b74..b49f80512 100644 --- a/karapace/protobuf/enum_constant_element.py +++ b/karapace/protobuf/enum_constant_element.py @@ -1,25 +1,19 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/EnumConstantElement.kt +from attr import dataclass from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement from karapace.protobuf.utils import append_documentation, append_options -from typing import List, Optional +from typing import List +@dataclass class EnumConstantElement: - def __init__( - self, - location: Location, - name: str, - tag: int, - documentation: str = "", - options: Optional[List[str]] = None, - ) -> None: - self.location = location - self.name = name - - self.tag = tag - self.options = options or [] - self.documentation = documentation or "" + location: Location + name: str + tag: int + documentation: str = "" + options: List[OptionElement] = [] def to_schema(self) -> str: result = [] diff --git a/karapace/protobuf/enum_element.py b/karapace/protobuf/enum_element.py index 9bbda6aab..72a84b5be 100644 --- a/karapace/protobuf/enum_element.py +++ b/karapace/protobuf/enum_element.py @@ -5,13 +5,20 @@ from karapace.protobuf.compare_type_storage import CompareTypes from karapace.protobuf.enum_constant_element import EnumConstantElement from karapace.protobuf.location import Location +from karapace.protobuf.option_element import OptionElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented +from typing import List class EnumElement(TypeElement): def __init__( - self, location: Location, name: str, documentation: str = "", options: list = None, constants: list = None + self, + location: Location, + name: str, + documentation: str = "", + options: List[OptionElement] = None, + constants: List[EnumConstantElement] = None ) -> None: # Enums do not allow nested type declarations. super().__init__(location, name, documentation, options or [], []) diff --git a/karapace/protobuf/extend_element.py b/karapace/protobuf/extend_element.py index 78479b9e0..8f49c765f 100644 --- a/karapace/protobuf/extend_element.py +++ b/karapace/protobuf/extend_element.py @@ -1,16 +1,18 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtendElement.kt - +from dataclasses import dataclass +from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented +from typing import List +@dataclass class ExtendElement: - def __init__(self, location: Location, name: str, documentation: str = "", fields: list = None) -> None: - self.location = location - self.name = name - self.documentation = documentation - self.fields = fields or [] + location: Location + name: str + documentation: str = "" + fields: List[FieldElement] = None def to_schema(self) -> str: result = [] @@ -18,8 +20,8 @@ def to_schema(self) -> str: result.append(f"extend {self.name} {{") if self.fields: result.append("\n") - for field in self.fields: - append_indented(result, field.to_schema()) + for field in self.fields: + append_indented(result, field.to_schema()) result.append("}\n") return "".join(result) diff --git a/karapace/protobuf/extensions_element.py b/karapace/protobuf/extensions_element.py index 18e5bd116..45afb2e40 100644 --- a/karapace/protobuf/extensions_element.py +++ b/karapace/protobuf/extensions_element.py @@ -1,17 +1,17 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ExtensionsElement.kt - +from dataclasses import dataclass from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, MAX_TAG_VALUE +from typing import List, Union +@dataclass class ExtensionsElement: - def __init__(self, location: Location, documentation: str = "", values: list = None) -> None: - self.location = location - self.documentation = documentation - """ An [Int] or [IntRange] tag. """ - self.values = values or [] + location: Location + documentation: str = "" + values: List[Union[int, KotlinRange]] = None def to_schema(self) -> str: result = [] diff --git a/karapace/protobuf/field_element.py b/karapace/protobuf/field_element.py index b5a390942..58766afa8 100644 --- a/karapace/protobuf/field_element.py +++ b/karapace/protobuf/field_element.py @@ -7,6 +7,7 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_type import ProtoType from karapace.protobuf.utils import append_documentation, append_options +from typing import List class FieldElement: @@ -51,7 +52,7 @@ def to_schema(self) -> str: return "".join(result) - def options_with_special_values(self) -> list: + def options_with_special_values(self) -> List[OptionElement]: """ Both `default` and `json_name` are defined in the schema like options but they are actually not options themselves as they're missing from `google.protobuf.FieldOptions`. """ diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index b68081b1b..6280df4c1 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -1,28 +1,21 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/GroupElement.kt +from dataclasses import dataclass from karapace.protobuf.field import Field +from karapace.protobuf.field_element import FieldElement from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation, append_indented -from typing import Union +from typing import List, Optional +@dataclass class GroupElement: - def __init__( - self, - label: Union[None, Field.Label], - location: Location, - name: str, - tag: int, - documentation: str = "", - fields: list = None - ) -> None: - self.label = label - self.location = location - self.name = name - self.tag = tag - - self.fields = fields or [] - self.documentation = documentation + label: Optional[Field.Label] + location: Location + name: str + tag: int + documentation: str = "" + fields: List[FieldElement] = None def to_schema(self) -> str: result = [] diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 765e1d614..0f6d1c194 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -1,5 +1,6 @@ from io import BytesIO from karapace import config +from karapace.protobuf.encoding_variants import read_indexes, write_indexes from karapace.protobuf.exception import IllegalArgumentException, ProtobufSchemaResolutionException, ProtobufTypeException from karapace.protobuf.message_element import MessageElement from karapace.protobuf.protobuf_to_dict import dict_to_protobuf, protobuf_to_dict @@ -14,8 +15,6 @@ import os import subprocess -ZERO_BYTE = b'\x00' - logger = logging.getLogger(__name__) @@ -23,54 +22,12 @@ def calculate_class_name(name: str) -> str: return "c_" + hashlib.md5(name.encode('utf-8')).hexdigest() -def check_props(schema_one, schema_two, prop_list): - try: - return all(getattr(schema_one, prop) == getattr(schema_two, prop) for prop in prop_list) - except AttributeError: - return False - - def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: # TODO (serge): schema comparison by fields required return str(writer_schema) == str(reader_schema) -def read_varint(bio: BytesIO) -> int: - """Read a variable-length integer. - - :returns: Integer - """ - varint = 0 - read_bytes = 0 - - while True: - char = bio.read(1) - if len(char) == 0: - if read_bytes == 0: - return 0 - raise EOFError(f"EOF while reading varint, value is {varint} so far") - - byte = ord(char) - varint += (byte & 0x7F) << (7 * read_bytes) - - read_bytes += 1 - - if not byte & 0x80: - return varint - - -def read_indexes(bio: BytesIO) -> List[int]: - try: - size: int = read_varint(bio) - except EOFError: - # TODO: change exception - raise IllegalArgumentException("problem with reading binary data") - if size == 0: - return [0] - return [read_varint(bio) for _ in range(size)] - - def find_message_name(schema: ProtobufSchema, indexes: List[int]) -> str: result: List[str] = [] types = schema.proto_file_element.types @@ -132,7 +89,7 @@ def read_data(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema, bio: class ProtobufDatumReader: """Deserialize Protobuf-encoded data into a Python data structure.""" - def __init__(self, writer_schema=None, reader_schema=None): + def __init__(self, writer_schema: ProtobufSchema = None, reader_schema: ProtobufSchema = None) -> None: """ As defined in the Protobuf specification, we call the schema encoded in the data the "writer's schema", and the schema expected by the reader the "reader's schema". @@ -146,55 +103,27 @@ def read(self, bio: BytesIO): return protobuf_to_dict(read_data(self._writer_schema, self._reader_schema, bio), True) -def write_varint(bio: BytesIO, value: int) -> int: - if value < 0: - raise ValueError(f"value must not be negative, got {value}") - - if value == 0: - bio.write(ZERO_BYTE) - return 1 - - written_bytes = 0 - while value > 0: - to_write = value & 0x7f - value = value >> 7 - - if value > 0: - to_write |= 0x80 - - bio.write(bytearray(to_write)[0]) - written_bytes += 1 - - return written_bytes - - -def write_indexes(bio: BytesIO, indexes: List[int]) -> None: - for i in indexes: - write_varint(bio, i) - - class ProtobufDatumWriter: """ProtobufDatumWriter for generic python objects.""" - def __init__(self, writer_schema=None): + def __init__(self, writer_schema: ProtobufSchema = None): self._writer_schema = writer_schema a: ProtobufSchema = writer_schema el: TypeElement - self._message_name = '' + self._message_name = "" for idx, el in enumerate(a.proto_file_element.types): if isinstance(el, MessageElement): self._message_name = el.name self._message_index = idx break - if self._message_name == '': + if self._message_name == "": raise ProtobufTypeException("No message in protobuf schema") - def write_index(self, writer: BytesIO): + def write_index(self, writer: BytesIO) -> None: write_indexes(writer, [self._message_index]) - def write(self, datum: dict, writer: BytesIO): - # validate datum + def write(self, datum: dict, writer: BytesIO) -> None: class_instance = get_protobuf_class_instance(self._writer_schema, self._message_name, config.DEFAULTS) diff --git a/karapace/protobuf/kotlin_wrapper.py b/karapace/protobuf/kotlin_wrapper.py index f40eee5e9..044417b57 100644 --- a/karapace/protobuf/kotlin_wrapper.py +++ b/karapace/protobuf/kotlin_wrapper.py @@ -1,13 +1,8 @@ -from karapace.protobuf.exception import IllegalArgumentException, IllegalStateException +from dataclasses import dataclass import textwrap -def check(q: bool, message: str) -> None: - if not q: - raise IllegalStateException(message) - - def trim_margin(s: str) -> str: lines = s.split("\n") new_lines = [] @@ -28,32 +23,10 @@ def trim_margin(s: str) -> str: return "\n".join(new_lines) -def require(q: bool, message: str) -> None: - if not q: - raise IllegalArgumentException(message) - - -def options_to_list(a: list) -> list: - # TODO - return a - - -class String(str): - pass - - -class Any: - pass - - -class OptionsList(list): - pass - - +@dataclass class KotlinRange: - def __init__(self, minimum, maximum) -> None: - self.minimum = minimum - self.maximum = maximum + minimum: int + maximum: int def __str__(self) -> str: return f"{self.minimum}..{self.maximum}" diff --git a/karapace/protobuf/option_element.py b/karapace/protobuf/option_element.py index bf9f6d346..8c6228bcf 100644 --- a/karapace/protobuf/option_element.py +++ b/karapace/protobuf/option_element.py @@ -2,15 +2,9 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OptionElement.kt from enum import Enum -# from karapace.protobuf.kotlin_wrapper import * -# from karapace.protobuf.kotlin_wrapper import * from karapace.protobuf.utils import append_indented, append_options, try_to_schema -class ListOptionElement(list): - pass - - class OptionElement: class Kind(Enum): STRING = 1 diff --git a/karapace/protobuf/option_reader.py b/karapace/protobuf/option_reader.py index fd143a2f8..1003dcdde 100644 --- a/karapace/protobuf/option_reader.py +++ b/karapace/protobuf/option_reader.py @@ -1,15 +1,15 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/OptionReader.kt - +from dataclasses import dataclass from karapace.protobuf.option_element import OptionElement from karapace.protobuf.syntax_reader import SyntaxReader from typing import Union +@dataclass class KindAndValue: - def __init__(self, kind: OptionElement.Kind, value: object) -> None: - self.kind = kind - self.value = value + kind: OptionElement.Kind + value: object class OptionReader: diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 419af2b43..61254c37f 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -11,7 +11,7 @@ from karapace.protobuf.field import Field from karapace.protobuf.field_element import FieldElement from karapace.protobuf.group_element import GroupElement -from karapace.protobuf.kotlin_wrapper import KotlinRange, options_to_list +from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.message_element import MessageElement from karapace.protobuf.one_of_element import OneOfElement @@ -371,7 +371,7 @@ def read_field_with_label( json_name, tag, documentation, - options_to_list(options), + options, ) def strip_default(self, options: list) -> Union[str, None]: diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index f606bd77d..7ab5595fa 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -5,8 +5,7 @@ fully-qualified name using the protocol buffer package. """ from enum import auto, Enum -from karapace.protobuf.exception import IllegalArgumentException -from karapace.protobuf.kotlin_wrapper import check, require +from karapace.protobuf.exception import IllegalArgumentException, IllegalStateException from karapace.protobuf.option_element import OptionElement from typing import Optional @@ -91,13 +90,12 @@ def __init__( self.is_scalar = False self.string = string self.is_map = True - self.key_type = key_type # TODO restrict what's allowed here + self.key_type = key_type self.value_type = value_type else: - # TODO: must be IllegalArgumentException - raise Exception(f"map key must be non-byte, non-floating point scalar: {key_type}") + raise IllegalArgumentException(f"map key must be non-byte, non-floating point scalar: {key_type}") - def to_kind(self) -> OptionElement.Kind: + def to_kind(self) -> Optional[OptionElement.Kind]: return { "bool": OptionElement.Kind.BOOLEAN, "string": OptionElement.Kind.STRING, @@ -117,7 +115,7 @@ def to_kind(self) -> OptionElement.Kind: }.get(self.simple_name, OptionElement.Kind.ENUM) @property - def enclosing_type_or_package(self) -> str: + def enclosing_type_or_package(self) -> Optional[str]: """ Returns the enclosing type, or null if self type is not nested in another type. """ dot = self.string.rfind(".") return None if (dot == -1) else self.string[:dot] @@ -132,9 +130,14 @@ def type_url(self) -> str: def nested_type(self, name: str) -> 'ProtoType': - check(not self.is_scalar, "scalar cannot have a nested type") - check(not self.is_map, "map cannot have a nested type") - require(name and name.rfind(".") == -1 and len(name) != 0, f"unexpected name: {name}") + if self.is_scalar: + raise IllegalStateException("scalar cannot have a nested type") + + if self.is_map: + raise IllegalStateException("map cannot have a nested type") + + if not (name and name.rfind(".") == -1 and len(name) != 0): + raise IllegalArgumentException(f"unexpected name: {name}") return ProtoType(False, f"{self.string}.{name}") @@ -160,10 +163,13 @@ def get2(name: str) -> 'ProtoType': scalar = ProtoType.SCALAR_TYPES.get(name) if scalar: return scalar - require(name and len(name) != 0 and name.rfind("#") == -1, f"unexpected name: {name}") + if not (name and len(name) != 0 and name.rfind("#") == -1): + raise IllegalArgumentException(f"unexpected name: {name}") + if name.startswith("map<") and name.endswith(">"): comma = name.rfind(",") - require(comma != -1, f"expected ',' in map type: {name}") + if not comma != -1: + raise IllegalArgumentException(f"expected ',' in map type: {name}") key = ProtoType.get2(name[4:comma].strip()) value = ProtoType.get2(name[comma + 1:len(name) - 1].strip()) return ProtoType(False, name, key, value) diff --git a/karapace/protobuf/utils.py b/karapace/protobuf/utils.py index 0c0d896da..a9a6336c2 100644 --- a/karapace/protobuf/utils.py +++ b/karapace/protobuf/utils.py @@ -1,13 +1,9 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/Util.kt +from typing import List, TYPE_CHECKING -from karapace.protobuf.type_element import TypeElement -from typing import List - - -def protobuf_encode(a: str) -> str: - # TODO: PROTOBUF - return a +if TYPE_CHECKING: + from karapace.protobuf.option_element import OptionElement def append_documentation(data: List[str], documentation: str) -> None: @@ -25,7 +21,7 @@ def append_documentation(data: List[str], documentation: str) -> None: data.append("\n") -def append_options(data: List[str], options: List[TypeElement]) -> None: +def append_options(data: List[str], options: List['OptionElement']) -> None: count = len(options) if count == 1: data.append('[') @@ -43,7 +39,7 @@ def append_options(data: List[str], options: List[TypeElement]) -> None: data.append(']') -def try_to_schema(obj: TypeElement) -> str: +def try_to_schema(obj: 'OptionElement') -> str: try: return obj.to_schema() except AttributeError: From dcb72acb29b8370c71aaf83692f449b387348d61 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:05:58 +0200 Subject: [PATCH 152/168] Update karapace/protobuf/proto_type.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 7ab5595fa..66414b47c 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -116,7 +116,7 @@ def to_kind(self) -> Optional[OptionElement.Kind]: @property def enclosing_type_or_package(self) -> Optional[str]: - """ Returns the enclosing type, or null if self type is not nested in another type. """ + """Returns the enclosing type, or None if self type is not nested in another type.""" dot = self.string.rfind(".") return None if (dot == -1) else self.string[:dot] From 00fe7d82684f584085b8dee63070bfc3aa5261b0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:07:46 +0200 Subject: [PATCH 153/168] Update karapace/protobuf/proto_type.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 66414b47c..d939ce4f3 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -121,7 +121,7 @@ def enclosing_type_or_package(self) -> Optional[str]: return None if (dot == -1) else self.string[:dot] @property - def type_url(self) -> str: + def type_url(self) -> Optional[str]: """ Returns a string like "type.googleapis.com/packagename.messagename" or null if self type is a scalar or a map. Note that self returns a non-null string for enums because it doesn't know if the named type is a message or an enum. From 34f4ccc7107c83f73ecb9f1dde6d2708aa3570af Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:14:45 +0200 Subject: [PATCH 154/168] Update karapace/protobuf/proto_type.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_type.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index d939ce4f3..5e6241a48 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -122,8 +122,10 @@ def enclosing_type_or_package(self) -> Optional[str]: @property def type_url(self) -> Optional[str]: - """ Returns a string like "type.googleapis.com/packagename.messagename" or null if self type is - a scalar or a map. Note that self returns a non-null string for enums because it doesn't know + """Returns a string like "type.googleapis.com/packagename.messagename" or None if self type is + a scalar or a map. + + Note:: Returns a string for enums because it doesn't know if the named type is a message or an enum. """ return None if self.is_scalar or self.is_map else f"type.googleapis.com/{self.string}" From ca4ce17bb4e055233c3a957942dc1ab1339b893a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:23:38 +0200 Subject: [PATCH 155/168] Update karapace/protobuf/proto_type.py Co-authored-by: Augusto Hack --- karapace/protobuf/proto_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 5e6241a48..5a03215e3 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -182,7 +182,7 @@ def get3(key_type: 'ProtoType', value_type: 'ProtoType', name: str) -> 'ProtoTyp return ProtoType(False, name, key_type, value_type) # schema compatibility check functionality karapace addon - # Based on table https://developers.google.com/protocol-buffers/docs/proto3#scalar """ + # Based on table https://developers.google.com/protocol-buffers/docs/proto3#scalar class CompatibilityKind(Enum): VARIANT = auto() From a7bed1f62262fc08cffe1fad487cbf11ebf8a902 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 13:49:29 +0200 Subject: [PATCH 156/168] fixup by review --- karapace/protobuf/protobuf_syntax.py | 5 ----- karapace/protobuf/protobuf_to_dict.py | 1 - 2 files changed, 6 deletions(-) delete mode 100644 karapace/protobuf/protobuf_syntax.py diff --git a/karapace/protobuf/protobuf_syntax.py b/karapace/protobuf/protobuf_syntax.py deleted file mode 100644 index b97a1b72d..000000000 --- a/karapace/protobuf/protobuf_syntax.py +++ /dev/null @@ -1,5 +0,0 @@ -from enum import Enum - - -class ProtobufSyntax(Enum): - pass diff --git a/karapace/protobuf/protobuf_to_dict.py b/karapace/protobuf/protobuf_to_dict.py index c3bdb12ee..2fd0d372d 100644 --- a/karapace/protobuf/protobuf_to_dict.py +++ b/karapace/protobuf/protobuf_to_dict.py @@ -1,4 +1,3 @@ -# -*- coding:utf-8 -*- """ This module provide a small Python library for creating dicts from protocol buffers Module based on code : From fa07747d97e5847a291822ff4ff78c0c2d8c4598 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 14:00:52 +0200 Subject: [PATCH 157/168] Update karapace/protobuf/syntax_reader.py Co-authored-by: Augusto Hack --- karapace/protobuf/syntax_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 947704eb3..5f71b4d74 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -360,7 +360,7 @@ def expect_with_location(self, condition: bool, location: Location, message: str if not condition: self.unexpected(message, location) - def unexpected(self, message: str, location: Location = None) -> None: + def unexpected(self, message: str, location: Location = None) -> NoReturn: if not location: location = self.location() w = f"Syntax error in {str(location)}: {message}" From f68fcbcd30726d485df1d53259c7e5dac2026d4f Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 14:02:01 +0200 Subject: [PATCH 158/168] fixup by review --- karapace/protobuf/proto_type.py | 2 +- karapace/protobuf/reserved_document.py | 11 ++++++----- karapace/protobuf/service_element.py | 21 ++++++++------------- karapace/protobuf/syntax_reader.py | 7 ++----- 4 files changed, 17 insertions(+), 24 deletions(-) diff --git a/karapace/protobuf/proto_type.py b/karapace/protobuf/proto_type.py index 5a03215e3..74ad913d5 100644 --- a/karapace/protobuf/proto_type.py +++ b/karapace/protobuf/proto_type.py @@ -124,7 +124,7 @@ def enclosing_type_or_package(self) -> Optional[str]: def type_url(self) -> Optional[str]: """Returns a string like "type.googleapis.com/packagename.messagename" or None if self type is a scalar or a map. - + Note:: Returns a string for enums because it doesn't know if the named type is a message or an enum. """ diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 400887a47..60d5f4952 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -1,17 +1,18 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ReservedElement.kt +from dataclasses import dataclass from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation +@dataclass class ReservedElement: - def __init__(self, location: Location, documentation: str = "", values: list = None) -> None: - self.location = location - self.documentation = documentation - """ A [String] name or [Int] or [IntRange] tag. """ - self.values = values or [] + location: Location + documentation: str = "" + """ A [String] name or [Int] or [IntRange] tag. """ + values: list = None def to_schema(self) -> str: result = [] diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 10a8d28dd..11b7d6b57 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -1,5 +1,7 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ServiceElement.kt +from dataclasses import dataclass + from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement from karapace.protobuf.rpc_element import RpcElement @@ -7,20 +9,13 @@ from typing import List +@dataclass class ServiceElement: - def __init__( - self, - location: Location, - name: str, - documentation: str = "", - rpcs: List[RpcElement] = None, - options: List[OptionElement] = None - ) -> None: - self.location = location - self.name = name - self.documentation = documentation - self.rpcs = rpcs or [] - self.options = options or [] + location: Location + name: str + documentation: str = "" + rpcs: List[RpcElement] = None + options: List[OptionElement] = None def to_schema(self) -> str: result: List[str] = [] diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 947704eb3..4e1bb264d 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -2,7 +2,7 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/SyntaxReader.kt from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.location import Location -from typing import Union +from typing import Union, NoReturn class SyntaxReader: @@ -182,11 +182,8 @@ def read_int(self) -> int: radix = 16 return int(tag, radix) - # except OSError as err: - # print("OS error: {0}".format(err)) except ValueError: self.unexpected(f"expected an integer but was {tag}") - return -22 # this return never be called but mypy think we need it def read_documentation(self) -> str: """ Like skip_whitespace(), but this returns a string containing all comment text. By convention, @@ -360,7 +357,7 @@ def expect_with_location(self, condition: bool, location: Location, message: str if not condition: self.unexpected(message, location) - def unexpected(self, message: str, location: Location = None) -> None: + def unexpected(self, message: str, location: Location = None) -> NoReturn: if not location: location = self.location() w = f"Syntax error in {str(location)}: {message}" From 9436285ea4a2f5826a8c4b987a5c446e02c0e2b0 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 14:03:02 +0200 Subject: [PATCH 159/168] Update karapace/protobuf/type_element.py Co-authored-by: Augusto Hack --- karapace/protobuf/type_element.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index 06a179bf8..a1266cc69 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -17,7 +17,11 @@ class TypeElement: nested_types: List['TypeElement'] def to_schema(self) -> str: - pass + """Convert the object to valid protobuf syntax. + + This must be implemented by subclasses. + """ + raise NotImplementedError() def __repr__(self) -> str: mytype = type(self) From 178013941878aee898e2674ab021e78f3bfd4a99 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 14:16:44 +0200 Subject: [PATCH 160/168] Update karapace/schema_reader.py Co-authored-by: Augusto Hack --- karapace/schema_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index ed9310a37..f9f1d6601 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -88,7 +88,7 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen raise InvalidSchema from e @staticmethod - def parse_protobuf(schema_str: str): + def parse_protobuf(schema_str: str) -> Optional[TypedSchema]: try: ts = TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) return ts From 9cac2aa63d85bce11d7f46f3d61d33f005dee474 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 14:22:02 +0200 Subject: [PATCH 161/168] fixup by review --- karapace/protobuf/reserved_document.py | 1 - karapace/protobuf/service_element.py | 6 ++---- karapace/protobuf/syntax_reader.py | 2 +- karapace/protobuf/type_element.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_document.py index 60d5f4952..ccb33e01b 100644 --- a/karapace/protobuf/reserved_document.py +++ b/karapace/protobuf/reserved_document.py @@ -1,7 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ReservedElement.kt from dataclasses import dataclass - from karapace.protobuf.kotlin_wrapper import KotlinRange from karapace.protobuf.location import Location from karapace.protobuf.utils import append_documentation diff --git a/karapace/protobuf/service_element.py b/karapace/protobuf/service_element.py index 11b7d6b57..e5131a922 100644 --- a/karapace/protobuf/service_element.py +++ b/karapace/protobuf/service_element.py @@ -1,7 +1,6 @@ # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/ServiceElement.kt from dataclasses import dataclass - from karapace.protobuf.location import Location from karapace.protobuf.option_element import OptionElement from karapace.protobuf.rpc_element import RpcElement @@ -23,9 +22,8 @@ def to_schema(self) -> str: result.append(f"service {self.name} {{") if self.options: result.append("\n") - for option in self.options: - append_indented(result, option.to_schema_declaration()) - + for option in self.options: + append_indented(result, option.to_schema_declaration()) if self.rpcs: result.append('\n') for rpc in self.rpcs: diff --git a/karapace/protobuf/syntax_reader.py b/karapace/protobuf/syntax_reader.py index 4e1bb264d..9f2fb0c4a 100644 --- a/karapace/protobuf/syntax_reader.py +++ b/karapace/protobuf/syntax_reader.py @@ -2,7 +2,7 @@ # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/internal/parser/SyntaxReader.kt from karapace.protobuf.exception import IllegalStateException from karapace.protobuf.location import Location -from typing import Union, NoReturn +from typing import NoReturn, Union class SyntaxReader: diff --git a/karapace/protobuf/type_element.py b/karapace/protobuf/type_element.py index a1266cc69..10908e4ab 100644 --- a/karapace/protobuf/type_element.py +++ b/karapace/protobuf/type_element.py @@ -18,7 +18,7 @@ class TypeElement: def to_schema(self) -> str: """Convert the object to valid protobuf syntax. - + This must be implemented by subclasses. """ raise NotImplementedError() From 9371d07edf7a7ddec85a7125ab47bc7b90ecfc1a Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Fri, 21 Jan 2022 14:36:07 +0200 Subject: [PATCH 162/168] fixup by review --- karapace/protobuf/message_element.py | 2 +- karapace/protobuf/proto_parser.py | 2 +- .../protobuf/{reserved_document.py => reserved_element.py} | 0 karapace/schema_reader.py | 5 +++-- karapace/schema_registry_apis.py | 2 +- karapace/serialization.py | 4 +++- pytest.ini | 2 +- tests/unit/test_message_element.py | 2 +- tests/unit/test_proto_parser.py | 2 +- 9 files changed, 12 insertions(+), 9 deletions(-) rename karapace/protobuf/{reserved_document.py => reserved_element.py} (100%) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index 0df375f20..ae30c151f 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -10,7 +10,7 @@ from karapace.protobuf.location import Location from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.option_element import OptionElement -from karapace.protobuf.reserved_document import ReservedElement +from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.type_element import TypeElement from karapace.protobuf.utils import append_documentation, append_indented from typing import List diff --git a/karapace/protobuf/proto_parser.py b/karapace/protobuf/proto_parser.py index 61254c37f..68f73aba8 100644 --- a/karapace/protobuf/proto_parser.py +++ b/karapace/protobuf/proto_parser.py @@ -18,7 +18,7 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.option_reader import OptionReader from karapace.protobuf.proto_file_element import ProtoFileElement -from karapace.protobuf.reserved_document import ReservedElement +from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.rpc_element import RpcElement from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax diff --git a/karapace/protobuf/reserved_document.py b/karapace/protobuf/reserved_element.py similarity index 100% rename from karapace/protobuf/reserved_document.py rename to karapace/protobuf/reserved_element.py diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index f9f1d6601..154aefe52 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -23,7 +23,7 @@ from karapace.utils import json_encode, KarapaceKafkaClient from queue import Queue from threading import Lock, Thread -from typing import Dict +from typing import Dict, Optional import json import logging @@ -88,7 +88,7 @@ def parse_avro(schema_str: str): # pylint: disable=inconsistent-return-statemen raise InvalidSchema from e @staticmethod - def parse_protobuf(schema_str: str) -> Optional[TypedSchema]: + def parse_protobuf(schema_str: str) -> Optional['TypedSchema']: try: ts = TypedSchema(parse_protobuf_schema_definition(schema_str), SchemaType.PROTOBUF, schema_str) return ts @@ -97,6 +97,7 @@ def parse_protobuf(schema_str: str) -> Optional[TypedSchema]: IllegalArgumentException, ProtobufError, ProtobufException, ProtobufSchemaParseException ) as e: log.exception("Unexpected error: %s \n schema:[%s]", e, schema_str) + raise InvalidSchema from e @staticmethod def parse(schema_type: SchemaType, schema_str: str): # pylint: disable=inconsistent-return-statements diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 63576c2f7..d6c4875a5 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -276,7 +276,7 @@ def send_delete_subject_message(self, subject, version): value = '{{"subject":"{}","version":{}}}'.format(subject, version) return self.send_kafka_message(key, value) - # TODO: PROTOBUF add protobuf compatibility_check + # protobuf compatibility_check async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json diff --git a/karapace/serialization.py b/karapace/serialization.py index 51fca9a31..dbec23fdd 100644 --- a/karapace/serialization.py +++ b/karapace/serialization.py @@ -140,7 +140,9 @@ def get_subject_name(self, topic_name: str, schema: str, subject_type: str, sche namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") - # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO + # Protobuf does not use namespaces in terms of AVRO + if schema_type is SchemaType.PROTOBUF: + namespace = "" return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}" async def get_schema_for_subject(self, subject: str) -> TypedSchema: diff --git a/pytest.ini b/pytest.ini index f505a7a0f..3417588cc 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,3 +1,3 @@ [pytest] -addopts = -ra --tb=short --showlocals --numprocesses auto +addopts = -ra -q --tb=short --showlocals --numprocesses auto timeout = 60 diff --git a/tests/unit/test_message_element.py b/tests/unit/test_message_element.py index 2c3af3d11..324b434b6 100644 --- a/tests/unit/test_message_element.py +++ b/tests/unit/test_message_element.py @@ -10,7 +10,7 @@ from karapace.protobuf.message_element import MessageElement from karapace.protobuf.one_of_element import OneOfElement from karapace.protobuf.option_element import OptionElement -from karapace.protobuf.reserved_document import ReservedElement +from karapace.protobuf.reserved_element import ReservedElement location: Location = Location.get("file.proto") diff --git a/tests/unit/test_proto_parser.py b/tests/unit/test_proto_parser.py index b7a52314f..6774ef91c 100644 --- a/tests/unit/test_proto_parser.py +++ b/tests/unit/test_proto_parser.py @@ -16,7 +16,7 @@ from karapace.protobuf.option_element import OptionElement from karapace.protobuf.proto_file_element import ProtoFileElement from karapace.protobuf.proto_parser import ProtoParser -from karapace.protobuf.reserved_document import ReservedElement +from karapace.protobuf.reserved_element import ReservedElement from karapace.protobuf.rpc_element import RpcElement from karapace.protobuf.service_element import ServiceElement from karapace.protobuf.syntax import Syntax From 3f22da7f91126c29baddf85eb9b7a3af49c6bdb6 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 24 Jan 2022 12:54:45 +0200 Subject: [PATCH 163/168] Update checks.py --- karapace/compatibility/protobuf/checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 2e2c87458..3e0563512 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -15,7 +15,7 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: Protobuf log.debug("IS_COMPATIBLE %s", result.is_compatible()) if result.is_compatible(): return SchemaCompatibilityResult.compatible() - # TODO: maybe move incompatibility level raising to ProtoFileElement.compatible() ?? + incompatibilities = [] locations = set() From 153bbb137c3fc0099aaf28ab4839274979574ad3 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 24 Jan 2022 12:56:11 +0200 Subject: [PATCH 164/168] Update checks.py --- karapace/compatibility/protobuf/checks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 3e0563512..407525498 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -15,8 +15,7 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: Protobuf log.debug("IS_COMPATIBLE %s", result.is_compatible()) if result.is_compatible(): return SchemaCompatibilityResult.compatible() - - + incompatibilities = [] locations = set() messages = set() From 42a365a4a974aedbe8a2d388090e1d5e5a370fb5 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 24 Jan 2022 12:57:20 +0200 Subject: [PATCH 165/168] Update compare_result.py --- karapace/protobuf/compare_result.py | 1 - 1 file changed, 1 deletion(-) diff --git a/karapace/protobuf/compare_result.py b/karapace/protobuf/compare_result.py index b52b505d8..6df031962 100644 --- a/karapace/protobuf/compare_result.py +++ b/karapace/protobuf/compare_result.py @@ -3,7 +3,6 @@ class Modification(Enum): - # TODO PACKAGE_ALTER = auto() SYNTAX_ALTER = auto() MESSAGE_ADD = auto() From 8c9b0ee094efcfe72f5bd077cd5fef690dffc515 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 24 Jan 2022 13:00:01 +0200 Subject: [PATCH 166/168] Update group_element.py --- karapace/protobuf/group_element.py | 1 - 1 file changed, 1 deletion(-) diff --git a/karapace/protobuf/group_element.py b/karapace/protobuf/group_element.py index 6280df4c1..258d81064 100644 --- a/karapace/protobuf/group_element.py +++ b/karapace/protobuf/group_element.py @@ -21,7 +21,6 @@ def to_schema(self) -> str: result = [] append_documentation(result, self.documentation) - # TODO: compare lower() to lowercase() and toLowerCase(Locale.US) Kotlin if self.label: result.append(f"{str(self.label.name).lower()} ") result.append(f"group {self.name} = {self.tag} {{") From 8e2f2d334406b047217501d2170707ccfd5b73da Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 24 Jan 2022 15:16:05 +0200 Subject: [PATCH 167/168] Update karapace/protobuf/io.py Co-authored-by: Augusto Hack --- karapace/protobuf/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 0f6d1c194..5e946ce2b 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -97,7 +97,7 @@ def __init__(self, writer_schema: ProtobufSchema = None, reader_schema: Protobuf self._writer_schema = writer_schema self._reader_schema = reader_schema - def read(self, bio: BytesIO): + def read(self, bio: BytesIO) -> None: if self._reader_schema is None: self._reader_schema = self._writer_schema return protobuf_to_dict(read_data(self._writer_schema, self._reader_schema, bio), True) From 7967d4934b19c644d5cd249b4e026a504b963494 Mon Sep 17 00:00:00 2001 From: Sergiy Zaschipas Date: Mon, 24 Jan 2022 23:26:03 +0200 Subject: [PATCH 168/168] fixup minor issues --- karapace/compatibility/protobuf/checks.py | 2 +- karapace/protobuf/io.py | 3 +-- karapace/protobuf/message_element.py | 2 -- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/karapace/compatibility/protobuf/checks.py b/karapace/compatibility/protobuf/checks.py index 407525498..54661318e 100644 --- a/karapace/compatibility/protobuf/checks.py +++ b/karapace/compatibility/protobuf/checks.py @@ -15,7 +15,7 @@ def check_protobuf_schema_compatibility(reader: ProtobufSchema, writer: Protobuf log.debug("IS_COMPATIBLE %s", result.is_compatible()) if result.is_compatible(): return SchemaCompatibilityResult.compatible() - + incompatibilities = [] locations = set() messages = set() diff --git a/karapace/protobuf/io.py b/karapace/protobuf/io.py index 0f6d1c194..384fa66c6 100644 --- a/karapace/protobuf/io.py +++ b/karapace/protobuf/io.py @@ -23,7 +23,7 @@ def calculate_class_name(name: str) -> str: def match_schemas(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema) -> bool: - # TODO (serge): schema comparison by fields required + # TODO (serge): is schema comparison by fields required? return str(writer_schema) == str(reader_schema) @@ -72,7 +72,6 @@ def get_protobuf_class_instance(schema: ProtobufSchema, class_name: str, cfg: Di def read_data(writer_schema: ProtobufSchema, reader_schema: ProtobufSchema, bio: BytesIO) -> Any: - # TODO (serge): check and polish it if not match_schemas(writer_schema, reader_schema): fail_msg = 'Schemas do not match.' raise ProtobufSchemaResolutionException(fail_msg, writer_schema, reader_schema) diff --git a/karapace/protobuf/message_element.py b/karapace/protobuf/message_element.py index ae30c151f..5c7a460ab 100644 --- a/karapace/protobuf/message_element.py +++ b/karapace/protobuf/message_element.py @@ -139,6 +139,4 @@ def compare(self, other: 'MessageElement', result: CompareResult, types: Compare result.pop_path() - # TODO Compare NestedTypes must be there. - types.unlock_message(self)