From ad6ecbe09c347ed7b63ce908a3f036cd4a205456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Fri, 18 Oct 2024 14:43:53 +0100 Subject: [PATCH 1/3] sr/test: refactor avro verbose compat test Make it easier to extend by simplifying the definition of a test case. (cherry picked from commit 479beda1e3bc480325cf6c7999b3dc67b00f547e) --- .../test/compatibility_avro.cc | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc b/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc index 2c92bd5d1ec3..f9af35ca86ef 100644 --- a/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc +++ b/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc @@ -325,9 +325,7 @@ SEASTAR_THREAD_TEST_CASE(test_avro_alias_resolution_stopgap) { namespace { -const auto schema_old = pps::sanitize_avro_schema_definition( - { - R"({ +const auto schema_old = R"({ "type": "record", "name": "myrecord", "fields": [ @@ -392,13 +390,9 @@ const auto schema_old = pps::sanitize_avro_schema_definition( } } ] -})", - pps::schema_type::avro}) - .value(); +})"; -const auto schema_new = pps::sanitize_avro_schema_definition( - { - R"({ +const auto schema_new = R"({ "type": "record", "name": "myrecord", "fields": [ @@ -464,9 +458,7 @@ const auto schema_new = pps::sanitize_avro_schema_definition( } } ] -})", - pps::schema_type::avro}) - .value(); +})"; using incompatibility = pps::avro_incompatibility; @@ -534,16 +526,22 @@ const absl::flat_hash_set backward_expected{ "expected: someEnum1 (alias resolution is not yet fully supported)"}, }; -const auto compat_data = std::to_array>({ +struct compat_test_case { + std::string reader; + std::string writer; + absl::flat_hash_set expected; +}; + +const auto compat_data = std::to_array({ { - schema_old.share(), - schema_new.share(), - forward_expected, + .reader=schema_old, + .writer=schema_new, + .expected=forward_expected, }, { - schema_new.share(), - schema_old.share(), - backward_expected, + .reader=schema_new, + .writer=schema_old, + .expected=backward_expected, }, }); @@ -555,13 +553,26 @@ std::string format_set(const absl::flat_hash_set& d) { SEASTAR_THREAD_TEST_CASE(test_avro_compat_messages) { for (const auto& cd : compat_data) { - auto compat = check_compatible_verbose(cd.reader, cd.writer); + auto compat = check_compatible_verbose( + pps::sanitize_avro_schema_definition( + {cd.reader, pps::schema_type::avro}) + .value(), + pps::sanitize_avro_schema_definition( + {cd.writer, pps::schema_type::avro}) + .value()); + + pps::raw_compatibility_result raw; + absl::c_for_each(cd.expected, [&raw](auto e) { + raw.emplace(std::move(e)); + }); + auto exp_compat = std::move(raw)(pps::verbose::yes); + absl::flat_hash_set errs{ compat.messages.begin(), compat.messages.end()}; absl::flat_hash_set expected{ - cd.expected.messages.begin(), cd.expected.messages.end()}; + exp_compat.messages.begin(), exp_compat.messages.end()}; - BOOST_CHECK(!compat.is_compat); + BOOST_CHECK_EQUAL(compat.is_compat, exp_compat.is_compat); BOOST_CHECK_EQUAL(errs.size(), expected.size()); BOOST_REQUIRE_MESSAGE( errs == expected, From c164f26779a4dee2d16e8245de1205ce13d26c84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Fri, 18 Oct 2024 13:38:41 +0100 Subject: [PATCH 2/3] sr/test: add more tests for avro non-default field This adds more tests for the avro incompatibility `reader_field_missing_default_value`. All of these tests are already passing, while the tests added in the following commit show the change in behaviour. (cherry picked from commit 6aabcfa22777ee230419933a521455bb7957e740) --- .../test/compatibility_avro.cc | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc b/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc index f9af35ca86ef..7120efa3a39c 100644 --- a/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc +++ b/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc @@ -543,6 +543,112 @@ const auto compat_data = std::to_array({ .writer=schema_old, .expected=backward_expected, }, + { + .reader=R"({ + "type": "record", + "name": "car", + "fields": [ + { + "name": "color", + "type": ["string", "null"] + } + ] + })", + .writer=R"({ + "type": "record", + "name": "car", + "fields": [] + })", + .expected={ + {"/fields/0", + incompatibility::Type::reader_field_missing_default_value, + "color"}, + }, + }, + { + .reader = R"({ + "type": "record", + "name": "car", + "fields": [ + { + "name": "color", + "type": ["null", "string"], + "default": null + } + ] + })", + .writer = R"({ + "type": "record", + "name": "car", + "fields": [] + })", + .expected = {}, + }, + { + .reader=R"({ + "type": "record", + "name": "car", + "fields": [ + { + "name": "color", + "type": "null" + } + ] + })", + .writer=R"({ + "type": "record", + "name": "car", + "fields": [] + })", + .expected={ + {"/fields/0", + incompatibility::Type::reader_field_missing_default_value, + "color"}, + }, + }, + { + .reader=R"({ + "type": "record", + "name": "car", + "fields": [ + { + "name": "color", + "type": "string", + "default": "somevalue" + } + ] + })", + .writer=R"({ + "type": "record", + "name": "car", + "fields": [] + })", + .expected={}, + },{ + .reader=R"({ + "type": "record", + "name": "car", + "fields": [ + { + "name": "color", + "type": "null", + "default": null + } + ] + })", + .writer=R"({ + "type": "record", + "name": "car", + "fields": [] + })", + .expected={ + // Note: this is overly restrictive for null-type fields with null defaults. + // This is because the Avro API is not expressive enough to differentiate the two. + {"/fields/0", + incompatibility::Type::reader_field_missing_default_value, + "color"}, + }, + }, }); std::string format_set(const absl::flat_hash_set& d) { From 5fe3c0b388f3ba50a6092718ee30f8208a483e0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gell=C3=A9rt=20Peresztegi-Nagy?= Date: Wed, 6 Nov 2024 17:35:02 +0000 Subject: [PATCH 3/3] sr/avro: fix missing default compat check for null This fixes a bug in the avro compatibility check for fields present in the reader but missing from the writer. In this case the writer and the reader are compatible only if the reader field has a specified default value. The Avro library sets the default value to `GenericDatum()` which has type `AVRO_NULL` when the default is not specified. This is what we are trying to detect in this check. However, the check was confusing an explicit null default for a union-type as a missing value. The former is represented as `GenericDatum(Union(Null))` and because of the way `GenericDatum::type()` delegates to the type of the first union leaf value when `GenericDatum` contains a union, the check thought that the value was missing, when it was in fact specified as null. To solve for the above, we also check that the type is not a union type to be able to detect when the default is not set. (The same check is used in the Avro library to detect unset default values, for example, when printing an Avro schema as JSON.) The caveat is that we are still overly restrictive for fields of type null, because in that case, the default value will be `GenericDatum()` regardless of whether the default value was explicitly set to null or not. This is a limitation of the Avro library's API and could only be fixed by making breaking changes to the Avro library's API. Also note that the checks against `reader.leafAt(...)` have been removed. They were attempting to compare the type of the field against the type of the default value. This is not necessary because this is ensured by the Avro library's built-in schema validation done while parsing the Avro schema. (cherry picked from commit 3866b1c09faa0a6fd3d8a525589c27fdd55b1a95) --- src/v/pandaproxy/schema_registry/avro.cc | 35 ++++++++++++------- .../test/compatibility_avro.cc | 22 ++++++++++++ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/v/pandaproxy/schema_registry/avro.cc b/src/v/pandaproxy/schema_registry/avro.cc index 33f252b88bfc..7546cbe7f720 100644 --- a/src/v/pandaproxy/schema_registry/avro.cc +++ b/src/v/pandaproxy/schema_registry/avro.cc @@ -115,18 +115,29 @@ avro_compatibility_result check_compatible( *reader.leafAt(int(r_idx)), *writer.leafAt(int(w_idx)), fields_p / std::to_string(r_idx) / "type")); - } else if ( - reader.defaultValueAt(int(r_idx)).type() == avro::AVRO_NULL) { - // if the reader's record schema has a field with no default - // value, and writer's schema does not have a field with the - // same name, an error is signalled. - - // For union, the default must correspond to the first type. - // The default may be null. - const auto& r_leaf = reader.leafAt(int(r_idx)); - if ( - r_leaf->type() != avro::Type::AVRO_UNION - || r_leaf->leafAt(0)->type() != avro::Type::AVRO_NULL) { + } else { + // if the reader's record schema has a field with no + // default value, and writer's schema does not have a + // field with the same name, an error is signalled. + // For union, the default must correspond to the first + // type. + const auto& def = reader.defaultValueAt(int(r_idx)); + + // Note: this code is overly restrictive for null-type + // fields with null defaults. This is because the Avro API + // is not expressive enough to differentiate the two. + // Union type field's default set to null: + // def=GenericDatum(Union(Null)) + // Union type field's default missing: + // def=GenericDatum(Null) + // Null type field's default set to null: + // def=GenericDatum(Null) + // Null type field's default missing: + // def=GenericDatum(Null) + auto default_unset = !def.isUnion() + && def.type() == avro::AVRO_NULL; + + if (default_unset) { compat_result.emplace( fields_p / std::to_string(r_idx), avro_incompatibility::Type:: diff --git a/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc b/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc index 7120efa3a39c..6c927ed866d1 100644 --- a/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc +++ b/src/v/pandaproxy/schema_registry/test/compatibility_avro.cc @@ -543,6 +543,28 @@ const auto compat_data = std::to_array({ .writer=schema_old, .expected=backward_expected, }, + { + .reader=R"({ + "type": "record", + "name": "car", + "fields": [ + { + "name": "color", + "type": ["null", "string"] + } + ] + })", + .writer=R"({ + "type": "record", + "name": "car", + "fields": [] + })", + .expected={ + {"/fields/0", + incompatibility::Type::reader_field_missing_default_value, + "color"}, + }, + }, { .reader=R"({ "type": "record",