Skip to content

Commit

Permalink
add e2e test, ban named type
Browse files Browse the repository at this point in the history
  • Loading branch information
xxchan committed Jul 9, 2024
1 parent bf445cf commit 6a8c351
Show file tree
Hide file tree
Showing 6 changed files with 279 additions and 87 deletions.
27 changes: 27 additions & 0 deletions e2e_test/commands/sr_register
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

set -euo pipefail

# Register a schema to schema registry
#
# Usage: sr_register <subject> <schema>
#
# https://docs.confluent.io/platform/current/schema-registry/develop/api.html#post--subjects-(string-%20subject)-versions

# Validate arguments
if [[ $# -ne 2 ]]; then
echo "Usage: sr_register <subject> <schema>"
exit 1
fi

subject="$1"
schema="$2"


if [[ -z $subject || -z $schema ]]; then
echo "Error: Arguments cannot be empty"
exit 1
fi

echo "$schema" | jq '{"schema": tojson}' \
| curl -X POST -H 'content-type:application/vnd.schemaregistry.v1+json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/${subject}/versions"
6 changes: 2 additions & 4 deletions e2e_test/source_inline/kafka/avro/alter_source.slt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ system ok
rpk topic create 'avro_alter_source_test'

system ok
echo '{"type":"record","name":"Root","fields":[{"name":"foo","type":"string"}]}' | jq '{"schema": tojson}' \
| curl -X POST -H 'content-type:application/json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/avro_alter_source_test-value/versions"
sr_register avro_alter_source_test '{"type":"record","name":"Root","fields":[{"name":"foo","type":"string"}]}'

statement ok
create source s
Expand All @@ -27,8 +26,7 @@ FORMAT PLAIN ENCODE AVRO (

# create a new version of schema and produce a message
system ok
echo '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}]}' | jq '{"schema": tojson}' \
| curl -X POST -H 'content-type:application/json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/avro_alter_source_test-value/versions"
sr_register avro_alter_source_test '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}]}'

system ok
echo '{"foo":"ABC", "bar":1}' | rpk topic produce --schema-id=topic avro_alter_source_test
Expand Down
100 changes: 100 additions & 0 deletions e2e_test/source_inline/kafka/avro/union.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
control substitution on

system ok
rpk topic delete 'avro-union' || true; \
(rpk sr subject delete 'avro-union-value' && rpk sr subject delete 'avro-union-value' --permanent) || true;
rpk topic create avro-union

system ok
sr_register avro-union-value '
{
"type": "record",
"name": "Root",
"fields": [
{
"name": "unionType",
"type": ["int", "string"]
},
{
"name": "unionTypeComplex",
"type": [
"null",
{"type": "record", "name": "Email","fields": [{"name":"inner","type":"string"}]},
{"type": "record", "name": "Fax","fields": [{"name":"inner","type":"int"}]},
{"type": "record", "name": "Sms","fields": [{"name":"inner","type":"int"}]}
]
},
{
"name": "enumField",
"type": ["null", "int", {
"type": "enum",
"name": "myEnum",
"namespace": "my.namespace",
"symbols": ["A", "B", "C", "D"]
}],
"default": null
}
]
}
'

system ok
cat<<EOF | rpk topic produce avro-union --schema-id=topic
{"unionType": {"int":1}, "unionTypeComplex": {"Sms": {"inner":6}}, "enumField": {"my.namespace.myEnum": "A"}}
{"unionType": {"string":"2"}, "unionTypeComplex": {"Fax": {"inner":6}}}
{"unionType": {"int":3}, "unionTypeComplex": {"Email": {"inner":"[email protected]"}}, "enumField": {"int":66}}
EOF

statement error
create source avro_union
WITH (
${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
topic = 'avro-union'
)
FORMAT PLAIN ENCODE AVRO (
schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
);
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: connector error
2: failed to convert Avro union to struct
3: Feature is not yet implemented: Avro named type used in Union type: Record(RecordSchema { name: Name { name: "Email", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "inner", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 0, custom_attributes: {} }], lookup: {"inner": 0}, attributes: {} })
Tracking issue: https://github.com/risingwavelabs/risingwave/issues/17616


# FIXME: The following is the current buggy result.


# query ? rowsort
# select * from avro_union
# ----
# ("([email protected])",,)
# (,"(6)",)
# (,"(6)",)

# # Demonstrate how to access union variants (struct fields) below:
# # Note that we need to use quotes.

# query ? rowsort
# select ("enumField")."my.namespace.myEnum" from avro_union;
# ----
# A
# NULL
# NULL

# # To output the union’s tag (i.e. case in protobuf), a case-when can be used.
# query ? rowsort
# select
# case
# when ("unionTypeComplex")."Sms" is not null then 'Sms'
# when ("unionTypeComplex")."Fax" is not null then 'Fax'
# when ("unionTypeComplex")."Email" is not null then 'Email'
# else null -- optional
# end
# from avro_union;
# ----
# Email
# Fax
# Fax
140 changes: 139 additions & 1 deletion src/connector/codec/src/decoder/avro/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ pub(crate) fn avro_to_jsonb(avro: &Value, builder: &mut jsonbb::Builder) -> Acce
mod tests {
use std::str::FromStr;

use apache_avro::Decimal as AvroDecimal;
use apache_avro::{from_avro_datum, Decimal as AvroDecimal};
use expect_test::expect;
use risingwave_common::types::{Datum, Decimal};

Expand Down Expand Up @@ -761,6 +761,144 @@ mod tests {
.assert_debug_eq(&s);
}

#[test]
fn test_avro_lib_union_record_bug() {
// multiple named types (record)
let s = Schema::parse_str(
r#"
{
"type": "record",
"name": "Root",
"fields": [
{
"name": "unionTypeComplex",
"type": [
"null",
{"type": "record", "name": "Email","fields": [{"name":"inner","type":"string"}]},
{"type": "record", "name": "Fax","fields": [{"name":"inner","type":"int"}]},
{"type": "record", "name": "Sms","fields": [{"name":"inner","type":"int"}]}
]
}
]
}
"#,
)
.unwrap();

let bytes = hex::decode("060c").unwrap();
// Correct should be variant 3 (Sms)
let correct_value = from_avro_datum(&s, &mut bytes.as_slice(), None);
expect![[r#"
Ok(
Record(
[
(
"unionTypeComplex",
Union(
3,
Record(
[
(
"inner",
Int(
6,
),
),
],
),
),
),
],
),
)
"#]]
.assert_debug_eq(&correct_value);
// Bug: We got variant 2 (Fax) here, if we pass the reader schema.
let wrong_value = from_avro_datum(&s, &mut bytes.as_slice(), Some(&s));
expect![[r#"
Ok(
Record(
[
(
"unionTypeComplex",
Union(
2,
Record(
[
(
"inner",
Int(
6,
),
),
],
),
),
),
],
),
)
"#]]
.assert_debug_eq(&wrong_value);

// The bug below can explain what happened.
// The two records below are actually incompatible: https://avro.apache.org/docs/1.11.1/specification/_print/#schema-resolution
// > both schemas are records with the _same (unqualified) name_
// In from_avro_datum, it first reads the value with the writer schema, and then
// it just uses the reader schema to interpret the value.
// The value doesn't have record "name" information. So it wrongly passed the conversion.
// The correct way is that we need to use both the writer and reader schema in the second step to interpret the value.

let s = Schema::parse_str(
r#"
{
"type": "record",
"name": "Root",
"fields": [
{
"name": "a",
"type": "int"
}
]
}
"#,
)
.unwrap();
let s2 = Schema::parse_str(
r#"
{
"type": "record",
"name": "Root222",
"fields": [
{
"name": "a",
"type": "int"
}
]
}
"#,
)
.unwrap();

let bytes = hex::decode("0c").unwrap();
let value = from_avro_datum(&s, &mut bytes.as_slice(), Some(&s2));
expect![[r#"
Ok(
Record(
[
(
"a",
Int(
6,
),
),
],
),
)
"#]]
.assert_debug_eq(&value);
}

#[test]
fn test_convert_decimal() {
// 280
Expand Down
6 changes: 5 additions & 1 deletion src/connector/codec/src/decoder/avro/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,11 @@ pub(super) fn avro_schema_to_struct_field_name(schema: &Schema) -> Result<String
Schema::Map(_) => "map".to_string(),
// Named Complex types
Schema::Enum(_) | Schema::Ref { name: _ } | Schema::Fixed(_) | Schema::Record(_) => {
schema.name().unwrap().fullname(None)
// schema.name().unwrap().fullname(None)
// See test_avro_lib_union_record_bug
// https://github.com/risingwavelabs/risingwave/issues/17632
bail_not_implemented!(issue=17632, "Avro named type used in Union type: {:?}", schema)

}

// Logical types are currently banned. See https://github.com/risingwavelabs/risingwave/issues/17616
Expand Down
Loading

0 comments on commit 6a8c351

Please sign in to comment.