Merge branch 'main' into zp/deprecate-etcd

risingwavelabs · Oct 15, 2024 · cdf2395 · cdf2395
2 parents a7ed229 + cfc3f84
commit cdf2395
Show file tree

Hide file tree

Showing 55 changed files with 233 additions and 126 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1,6 +1,6 @@
 # regression test data
 src/tests/regress/data/** linguist-vendored
 # source test data
-scripts/source/test_data/** linguist-vendored
+e2e_test/source_legacy/basic/scripts/test_data/** linguist-vendored
 # generated grafana dashboard
 grafana/risingwave-dashboard.json linguist-generated
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/ci/scripts/deterministic-e2e-test.sh b/ci/scripts/deterministic-e2e-test.sh
@@ -11,7 +11,7 @@ download-and-decompress-artifact risingwave_simulation .
 chmod +x ./risingwave_simulation
 
 echo "--- Extract data for Kafka"
-pushd ./scripts/source/
+pushd ./e2e_test/source_legacy/basic/scripts/
 mkdir -p ./test_data
 unzip -o test_data.zip -d .
 popd
@@ -39,7 +39,7 @@ echo "--- deterministic simulation e2e, ci-3cn-2fe, batch"
 seq "$TEST_NUM" | parallel './risingwave_simulation ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/batch-{}.log && rm $LOGDIR/batch-{}.log'
 
 echo "--- deterministic simulation e2e, ci-3cn-2fe, kafka source"
-seq "$TEST_NUM" | parallel './risingwave_simulation --kafka-datadir=./scripts/source/test_data ./e2e_test/source_legacy/basic/kafka\*.slt 2> $LOGDIR/source-{}.log && rm $LOGDIR/source-{}.log'
+seq "$TEST_NUM" | parallel './risingwave_simulation --kafka-datadir=./e2e_test/source_legacy/basic/scripts/test_data ./e2e_test/source_legacy/basic/kafka\*.slt 2> $LOGDIR/source-{}.log && rm $LOGDIR/source-{}.log'
 
 echo "--- deterministic simulation e2e, ci-3cn-2fe, parallel, streaming"
 seq "$TEST_NUM" | parallel './risingwave_simulation -j 16 ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/parallel-streaming-{}.log && rm $LOGDIR/parallel-streaming-{}.log'

diff --git a/ci/scripts/deterministic-recovery-test.sh b/ci/scripts/deterministic-recovery-test.sh
@@ -93,6 +93,6 @@ echo "--- deterministic simulation e2e, ci-3cn-2fe-1meta, recovery, kafka source
 seq "$TEST_NUM" | parallel './risingwave_simulation \
 --kill \
 --kill-rate=${KILL_RATE} \
---kafka-datadir=./scripts/source/test_data \
+--kafka-datadir=./e2e_test/source_legacy/basic/scripts/test_data \
 ${EXTRA_ARGS:-} \
 ./e2e_test/source_legacy/basic/kafka\*.slt 2> $LOGDIR/recovery-source-{}.log && rm $LOGDIR/recovery-source-{}.log'
diff --git a/ci/scripts/e2e-source-test.sh b/ci/scripts/e2e-source-test.sh
@@ -148,11 +148,11 @@ echo "--- Kill cluster"
 risedev ci-kill
 export RISINGWAVE_CI=true
 
-echo "--- e2e, ci-kafka-plus-pubsub, kafka and pubsub source"
+echo "--- e2e, ci-kafka-plus-pubsub, legacy kafka tests"
 export RUST_MIN_STACK=4194304
 RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
 risedev ci-start ci-kafka
-./scripts/source/prepare_ci_kafka.sh
+./e2e_test/source_legacy/basic/scripts/prepare_ci_kafka.sh
 risedev slt './e2e_test/source_legacy/basic/*.slt'
 risedev slt './e2e_test/source_legacy/basic/old_row_format_syntax/*.slt'
 

diff --git a/e2e_test/source_inline/kafka/avro/name_strategy.slt b/e2e_test/source_inline/kafka/avro/name_strategy.slt
@@ -25,7 +25,7 @@ create source s1 () with (
 # Currently we are abusing this test case to also test data types.
 
 system ok
-python3 scripts/source/schema_registry_producer.py  "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" e2e_test/source_inline/kafka/avro/upsert_avro_json "topic" "avro"
+python3 e2e_test/source_legacy/basic/scripts/schema_registry_producer.py  "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" e2e_test/source_inline/kafka/avro/upsert_avro_json "topic" "avro"
 
 statement ok
 CREATE TABLE t_topic ( primary key (rw_key) )
@@ -44,7 +44,7 @@ FORMAT UPSERT ENCODE AVRO (schema.registry = '${RISEDEV_SCHEMA_REGISTRY_URL}');
 
 ## topic: upsert_avro_json-record, key subject: string, value subject: CPLM.OBJ_ATTRIBUTE_VALUE
 system ok
-python3 scripts/source/schema_registry_producer.py  "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" e2e_test/source_inline/kafka/avro/upsert_avro_json "record" "avro"
+python3 e2e_test/source_legacy/basic/scripts/schema_registry_producer.py  "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" e2e_test/source_inline/kafka/avro/upsert_avro_json "record" "avro"
 
 
 statement error key\.message
@@ -80,7 +80,7 @@ create table t_record_format_plain () with (
 ## key subject: upsert_avro_json-topic-record-string
 ## value subject: upsert_avro_json-topic-record-CPLM.OBJ_ATTRIBUTE_VALUE
 system ok
-python3 scripts/source/schema_registry_producer.py  "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" e2e_test/source_inline/kafka/avro/upsert_avro_json "topic-record" "avro"
+python3 e2e_test/source_legacy/basic/scripts/schema_registry_producer.py  "${RISEDEV_KAFKA_BOOTSTRAP_SERVERS}" "${RISEDEV_SCHEMA_REGISTRY_URL}" e2e_test/source_inline/kafka/avro/upsert_avro_json "topic-record" "avro"
 
 
 

diff --git a/e2e_test/source_legacy/README.md b/e2e_test/source_legacy/README.md
@@ -7,8 +7,8 @@
 
 Test in this directory needs some prior setup.
 
-See also `ci/scripts/e2e-source-test.sh`, and `scripts/source`
+See also `ci/scripts/e2e-source-test.sh`, and `e2e_test/source_legacy/basic/scripts`
 
 ## Kafka
 
-`scripts/source/test_data` contains the data. Filename's convention is `<topic_name>.<n_partitions>`.
+`e2e_test/source_legacy/basic/scripts/test_data` contains the data. Filename's convention is `<topic_name>.<n_partitions>`.
diff --git a/e2e_test/source_legacy/basic/scripts/README.md b/e2e_test/source_legacy/basic/scripts/README.md
@@ -0,0 +1,12 @@
+This folder contains scripts to prepare data for testing sources.
+
+## Kafka
+
+`e2e_test/source_legacy/basic/scripts/test_data` contains the data. Filename's convention is `<topic_name>.<n_partitions>`.
+
+- If `<topic_name>` ends with `bin`, the whole file is a message with binary data.
+- If `<topic_name>` ends with `avro_json` or `json_schema`:
+    - The first line is the schema. Key and value are separated by `^`.
+    - The rest of the lines are messages in JSON format. Key and value are separated by `^`.
+    - Produced to Kafka with `schema_registry_producer.py` (serialized to Avro or JSON)
+- Otherwise, each line is a message, and key/value is separated by `^`.