risingwavelabs · tabVersion · Jan 10, 2024 · Dec 20, 2023 · Dec 21, 2023 · Dec 26, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -107,7 +107,7 @@ aws-types = "1"
 etcd-client = { package = "madsim-etcd-client", version = "0.4" }
 futures-async-stream = "0.2.9"
 hytra = "0.1"
-rdkafka = { package = "madsim-rdkafka", version = "0.3.0", features = [
+rdkafka = { package = "madsim-rdkafka", version = "0.3.1", features = [
   "cmake-build",
 ] }
 hashbrown = { version = "0.14.0", features = [

diff --git a/e2e_test/s3/json_file.py b/e2e_test/s3/json_file.py
@@ -25,7 +25,9 @@ def do_test(client, config, N,  prefix):
         name TEXT,
         sex int,
         mark int,
-    ) WITH (
+    )
+    include file as file_col
+    WITH (
         connector = 's3',
         match_pattern = '{prefix}*.json',
         s3.region_name = '{config['S3_REGION']}',
@@ -60,14 +62,18 @@ def do_test(client, config, N,  prefix):
     cur.execute(
         'select count(*), sum(id), sum(sex), sum(mark) from s3_test_jsonfile')
     result = cur.fetchone()
-
     print(result)
 
     assert result[0] == total_row
     assert result[1] == int(((N - 1) * N / 2))
     assert result[2] == int(N / 2)
     assert result[3] == 0
 
+    cur.execute('select file_col from s3_test_jsonfile')
+    result = cur.fetchone()
+    file_col = result[0]
+    print(file_col)
+
     cur.execute('drop table s3_test_jsonfile')
 
     cur.close()

diff --git a/e2e_test/source/basic/inlcude_key_as.slt b/e2e_test/source/basic/inlcude_key_as.slt
@@ -48,9 +48,25 @@ WITH (
 	topic = 'upsert_json')
 FORMAT PLAIN ENCODE JSON
 
+statement ok
+create table additional_columns (a int)
+include key as key_col
+include partition as partition_col
+include offset as offset_col
+include timestamp as timestamp_col
+include header as header_col
+WITH (
+	connector = 'kafka',
+    properties.bootstrap.server = 'message_queue:29092',
+	topic = 'kafka_additional_columns')
+FORMAT PLAIN ENCODE JSON
+
 statement ok
 select * from upsert_students_default_key;
 
+statement ok
+select * from additional_columns;
+
 # Wait enough time to ensure SourceExecutor consumes all Kafka data.
 sleep 3s
 
@@ -59,5 +75,31 @@ select count(rw_key) from upsert_students_default_key
 ----
 15
 
+query I
+SELECT count(*)
+FROM   additional_columns
+WHERE  key_col IS NOT NULL
+       AND partition_col IS NOT NULL
+       AND offset_col IS NOT NULL
+       AND timestamp_col IS NOT NULL
+       AND header_col IS NOT NULL
+----
+101
 for i in {0..100}; do echo "key$i:{\"a\": $i}" | ${KCAT_BIN} -P -b message_queue:29092 -t ${ADDI_COLUMN_TOPIC} -K : -H "header1=v1" -H "header2=v2"; done 
 for i in {0..100}; do echo "key$i:{\"a\": $i}" | ${KCAT_BIN} -P -b message_queue:29092 -t ${ADDI_COLUMN_TOPIC} -K : -H "header1=v1" -H "header2=v2"; done 
+
+# the input data is from scripts/source/prepare_ci_kafka.sh
+# ```
+# for i in {0..100}; do echo "key$i:{\"a\": $i}" | ${KCAT_BIN} -P -b message_queue:29092 -t ${ADDI_COLUMN_TOPIC} -K : -H "header1=v1" -H "header2=v2"; done
+# ```
+# The command generates 101 messages with key `key0` to `key100` and value `{"a": 0}` to `{"a": 100}`, with fixed headers `header1=v1` and `header2=v2`.
+
+query TT
+SELECT (header_col[1]).key AS key, (header_col[1]).value::text AS value
+FROM   additional_columns limit 1;
+----
+header1 \x7631
+
 statement ok
 drop table upsert_students_default_key
+
+statement ok
+drop table additional_columns
diff --git a/scripts/source/prepare_ci_kafka.sh b/scripts/source/prepare_ci_kafka.sh
@@ -74,6 +74,10 @@ for filename in $kafka_data_files; do
     ) &
 done
 
+# test additional columns: produce messages with headers
+ADDI_COLUMN_TOPIC="kafka_additional_columns"
+for i in {0..100}; do echo "key$i:{\"a\": $i}" | ${KCAT_BIN} -P -b message_queue:29092 -t ${ADDI_COLUMN_TOPIC} -K : -H "header1=v1" -H "header2=v2"; done
+
 # write schema with name strategy
 
 ## topic: upsert_avro_json-record, key subject: string, value subject: CPLM.OBJ_ATTRIBUTE_VALUE

diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
@@ -34,6 +34,7 @@ aws-sdk-s3 = { workspace = true }
 aws-smithy-http = { workspace = true }
 aws-smithy-runtime-api = { workspace = true }
 aws-smithy-types = { workspace = true }
+aws-smithy-types-convert = { version = "0.60.1", features = ["convert-chrono"] }
 aws-types = { workspace = true }
 base64 = "0.21"
 byteorder = "1"