Skip to content

Commit

Permalink
feat(source): support Avro Union type (#17485)
Browse files Browse the repository at this point in the history
Signed-off-by: xxchan <[email protected]>
  • Loading branch information
xxchan authored Jul 9, 2024
1 parent b9ceb8d commit b3e1fad
Show file tree
Hide file tree
Showing 10 changed files with 1,161 additions and 56 deletions.
27 changes: 27 additions & 0 deletions e2e_test/commands/sr_register
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

set -euo pipefail

# Register a schema to schema registry
#
# Usage: sr_register <subject> <schema>
#
# https://docs.confluent.io/platform/current/schema-registry/develop/api.html#post--subjects-(string-%20subject)-versions

# Validate arguments
if [[ $# -ne 2 ]]; then
echo "Usage: sr_register <subject> <schema>"
exit 1
fi

subject="$1"
schema="$2"


if [[ -z $subject || -z $schema ]]; then
echo "Error: Arguments cannot be empty"
exit 1
fi

echo "$schema" | jq '{"schema": tojson}' \
| curl -X POST -H 'content-type:application/vnd.schemaregistry.v1+json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/${subject}/versions"
6 changes: 2 additions & 4 deletions e2e_test/source_inline/kafka/avro/alter_source.slt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ system ok
rpk topic create 'avro_alter_source_test'

system ok
echo '{"type":"record","name":"Root","fields":[{"name":"foo","type":"string"}]}' | jq '{"schema": tojson}' \
| curl -X POST -H 'content-type:application/json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/avro_alter_source_test-value/versions"
sr_register avro_alter_source_test-value '{"type":"record","name":"Root","fields":[{"name":"foo","type":"string"}]}'

statement ok
create source s
Expand All @@ -27,8 +26,7 @@ FORMAT PLAIN ENCODE AVRO (

# create a new version of schema and produce a message
system ok
echo '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}]}' | jq '{"schema": tojson}' \
| curl -X POST -H 'content-type:application/json' -d @- "${RISEDEV_SCHEMA_REGISTRY_URL}/subjects/avro_alter_source_test-value/versions"
sr_register avro_alter_source_test-value '{"type":"record","name":"Root","fields":[{"name":"bar","type":"int","default":0},{"name":"foo","type":"string"}]}'

system ok
echo '{"foo":"ABC", "bar":1}' | rpk topic produce --schema-id=topic avro_alter_source_test
Expand Down
175 changes: 175 additions & 0 deletions e2e_test/source_inline/kafka/avro/union.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
control substitution on

system ok
rpk topic delete 'avro-union' || true; \
(rpk sr subject delete 'avro-union-value' && rpk sr subject delete 'avro-union-value' --permanent) || true;
rpk topic create avro-union

system ok
sr_register avro-union-value '
{
"type": "record",
"name": "Root",
"fields": [
{
"name": "unionType",
"type": ["int", "string"]
},
{
"name": "unionTypeComplex",
"type": [
"null",
{"type": "record", "name": "Email","fields": [{"name":"inner","type":"string"}]},
{"type": "record", "name": "Fax","fields": [{"name":"inner","type":"int"}]},
{"type": "record", "name": "Sms","fields": [{"name":"inner","type":"int"}]}
]
},
{
"name": "enumField",
"type": ["null", "int", {
"type": "enum",
"name": "myEnum",
"namespace": "my.namespace",
"symbols": ["A", "B", "C", "D"]
}],
"default": null
}
]
}
'

system ok
cat<<EOF | rpk topic produce avro-union --schema-id=topic
{"unionType": {"int":1}, "unionTypeComplex": {"Sms": {"inner":6}}, "enumField": {"my.namespace.myEnum": "A"}}
{"unionType": {"string":"2"}, "unionTypeComplex": {"Fax": {"inner":6}}}
{"unionType": {"int":3}, "unionTypeComplex": {"Email": {"inner":"[email protected]"}}, "enumField": {"int":66}}
EOF

statement error
create source avro_union
WITH (
${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
topic = 'avro-union'
)
FORMAT PLAIN ENCODE AVRO (
schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
);
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: connector error
2: failed to convert Avro union to struct
3: Feature is not yet implemented: Avro named type used in Union type: Record(RecordSchema { name: Name { name: "Email", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "inner", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 0, custom_attributes: {} }], lookup: {"inner": 0}, attributes: {} })
Tracking issue: https://github.com/risingwavelabs/risingwave/issues/17632


# FIXME: The following is the current buggy result.


# query ? rowsort
# select * from avro_union
# ----
# ("([email protected])",,)
# (,"(6)",)
# (,"(6)",)

# # Demonstrate how to access union variants (struct fields) below:
# # Note that we need to use quotes.

# query ? rowsort
# select ("enumField")."my.namespace.myEnum" from avro_union;
# ----
# A
# NULL
# NULL

# # To output the union’s tag (i.e. case in protobuf), a case-when can be used.
# query ? rowsort
# select
# case
# when ("unionTypeComplex")."Sms" is not null then 'Sms'
# when ("unionTypeComplex")."Fax" is not null then 'Fax'
# when ("unionTypeComplex")."Email" is not null then 'Email'
# else null -- optional
# end
# from avro_union;
# ----
# Email
# Fax
# Fax



system ok
rpk topic delete 'avro-union-simple' || true; \
(rpk sr subject delete 'avro-union-simple-value' && rpk sr subject delete 'avro-union-simple-value' --permanent) || true;
rpk topic create avro-union-simple

system ok
sr_register avro-union-simple-value '
{
"type": "record",
"name": "Root",
"fields": [
{
"name": "unionType",
"type": ["int", "string", "null", "boolean"]
}
]
}
'

system ok
cat<<EOF | rpk topic produce avro-union-simple --schema-id=topic
{"unionType": {"int":1}}
{"unionType": {"string":"2"}}
{"unionType": {"boolean": true}}
{"unionType": null}
EOF

statement ok
create source avro_union
WITH (
${RISEDEV_KAFKA_WITH_OPTIONS_COMMON},
topic = 'avro-union-simple'
)
FORMAT PLAIN ENCODE AVRO (
schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}'
);


query ? rowsort
select * from avro_union
----
(,,t)
(,2,)
(1,,)
NULL

# Demonstrate how to access union variants (struct fields) below:
# Note that we need to use quotes.

query ? rowsort
select ("unionType")."string" from avro_union;
----
2
NULL
NULL
NULL

# To output the union’s tag (i.e. case in protobuf), a case-when can be used.
query ? rowsort
select
case
when ("unionType")."int" is not null then 'int'
when ("unionType")."string" is not null then 'string'
when ("unionType")."boolean" is not null then 'boolean'
else null -- optional
end
from avro_union;
----
NULL
boolean
int
string
2 changes: 2 additions & 0 deletions src/common/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,8 @@ pub trait ScalarRef<'a>: ScalarBounds<ScalarRefImpl<'a>> + 'a + Copy {
macro_rules! scalar_impl_enum {
($( { $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty } ),*) => {
/// `ScalarImpl` embeds all possible scalars in the evaluation framework.
///
/// See `for_all_variants` for the definition.
#[derive(Debug, Clone, PartialEq, Eq, EstimateSize)]
pub enum ScalarImpl {
$( $variant_name($scalar) ),*
Expand Down
Loading

0 comments on commit b3e1fad

Please sign in to comment.