Skip to content

Commit

Permalink
feat(source): Avro with AWS Glue Schema Registry (#17605)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiangjinwu committed Jul 12, 2024
1 parent 3765be3 commit a1c735a
Show file tree
Hide file tree
Showing 13 changed files with 652 additions and 145 deletions.
23 changes: 23 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ aws-config = { version = "1", default-features = false, features = [
aws-credential-types = { version = "1", default-features = false, features = [
"hardcoded-credentials",
] }
aws-sdk-glue = "1"
aws-sdk-kinesis = { version = "1", default-features = false, features = [
"rt-tokio",
"rustls",
Expand Down
121 changes: 121 additions & 0 deletions e2e_test/source_inline/kafka/avro/glue.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
control substitution on

system ok
rpk topic delete 'glue-sample-my-event'

system ok
rpk topic create 'glue-sample-my-event'

system ok
rpk topic produce -f '%v{hex}\n' 'glue-sample-my-event' <<EOF
03005af405ef11b5444281a2e0563e5a734606666f6f80868dc8ebd98404
EOF

statement ok
create source t with (
connector = 'kafka',
properties.bootstrap.server='${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}',
topic = 'glue-sample-my-event')
format plain encode avro (
aws.glue.schema_arn = 'arn:aws:glue:ap-southeast-1:123456123456:schema/default-registry/MyEvent',
aws.glue.mock_config = '{
"by_id":{
"5af405ef-11b5-4442-81a2-e0563e5a7346": {
"type": "record",
"name": "MyEvent",
"fields": [
{
"name": "f1",
"type": "string"
},
{
"name": "f2",
"type": {
"type": "long",
"logicalType": "timestamp-micros"
}
}
]
}
},
"arn_to_latest_id":{
"arn:aws:glue:ap-southeast-1:123456123456:schema/default-registry/MyEvent": "5af405ef-11b5-4442-81a2-e0563e5a7346"
}
}');

query TT
select * from t;
----
foo 2006-01-02 22:04:05.123456+00:00

statement ok
alter source t format plain encode avro (
aws.glue.schema_arn = 'arn:aws:glue:ap-southeast-1:123456123456:schema/default-registry/MyEvent',
aws.glue.mock_config = '{
"by_id":{
"5af405ef-11b5-4442-81a2-e0563e5a7346": {
"type": "record",
"name": "MyEvent",
"fields": [
{
"name": "f1",
"type": "string"
},
{
"name": "f2",
"type": {
"type": "long",
"logicalType": "timestamp-micros"
}
}
]
},
"4516411b-b1e7-4e67-839f-3ef1b8c29280": {
"type": "record",
"name": "MyEvent",
"fields": [
{
"name": "f1",
"type": "string"
},
{
"name": "f2",
"type": {
"type": "long",
"logicalType": "timestamp-micros"
}
},
{
"name": "f3",
"type": ["null", "bytes"],
"default": null
}
]
}
},
"arn_to_latest_id":{
"arn:aws:glue:ap-southeast-1:123456123456:schema/default-registry/MyEvent": "4516411b-b1e7-4e67-839f-3ef1b8c29280"
}
}');

query TTT
select * from t;
----
foo 2006-01-02 22:04:05.123456+00:00 NULL

system ok
rpk topic produce -f '%v{hex}\n' 'glue-sample-my-event' <<EOF
03004516411bb1e74e67839f3ef1b8c292800441428089b5e9a886ee050208deadbeef
EOF

query TTT
select * from t order by 2;
----
foo 2006-01-02 22:04:05.123456+00:00 NULL
AB 2022-04-08 00:00:00.123456+00:00 \xdeadbeef

statement ok
drop source t;

system ok
rpk topic delete 'glue-sample-my-event'
5 changes: 5 additions & 0 deletions proto/catalog.proto
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ message StreamSourceInfo {
// deprecated
plan_common.RowFormatType row_format = 1;
string row_schema_location = 2;
// This means *use **confluent** schema registry* and is `false` for **aws glue** schema registry.
// Eventually we will deprecate it and rely on `enum SchemaLocation` derived from `format_encode_options` below.
// * schema.location false
// * schema.registry true
// * aws.glue.schema_arn false
bool use_schema_registry = 3;
string proto_message_name = 4;
int32 csv_delimiter = 5;
Expand Down
1 change: 1 addition & 0 deletions src/connector/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ aws-config = { workspace = true }
aws-credential-types = { workspace = true }
aws-msk-iam-sasl-signer = "1.0.0"
aws-sdk-dynamodb = "1"
aws-sdk-glue = { workspace = true }
aws-sdk-kinesis = { workspace = true }
aws-sdk-s3 = { workspace = true }
aws-smithy-http = { workspace = true }
Expand Down
Loading

0 comments on commit a1c735a

Please sign in to comment.