diff --git a/.gitignore b/.gitignore index 19fb6643dd8a6..375738f67093e 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,7 @@ simulation-it-test.tar.zst # hummock-trace .trace +# spark binary +e2e_test/iceberg/spark-*-bin* + **/poetry.lock \ No newline at end of file diff --git a/ci/scripts/e2e-iceberg-cdc.sh b/ci/scripts/e2e-iceberg-cdc.sh new file mode 100755 index 0000000000000..081f5bbd2afcb --- /dev/null +++ b/ci/scripts/e2e-iceberg-cdc.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +# prepare environment +export CONNECTOR_RPC_ENDPOINT="localhost:50051" +export CONNECTOR_LIBS_PATH="./connector-node/libs" + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node + +echo "--- e2e, ci-1cn-1fe, iceberg cdc" + +node_port=50051 +node_timeout=10 + +wait_for_connector_node_start() { + start_time=$(date +%s) + while : + do + if nc -z localhost $node_port; then + echo "Port $node_port is listened! Connector Node is up!" + break + fi + + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + if [ $elapsed_time -ge $node_timeout ]; then + echo "Timeout waiting for port $node_port to be listened!" + exit 1 + fi + sleep 0.1 + done + sleep 2 +} + +echo "--- starting risingwave cluster with connector node" + +RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \ +cargo make ci-start ci-1cn-1fe-with-recovery +./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-node.log 2>&1 & +echo "waiting for connector node to start" +wait_for_connector_node_start + +# prepare minio iceberg sink +echo "--- preparing iceberg" +.risingwave/bin/mcli -C .risingwave/config/mcli mb hummock-minio/icebergdata + +cd e2e_test/iceberg +bash ./start_spark_connect_server.sh + +# Don't remove the `--quiet` option since poetry has a bug when printing output, see +# https://github.com/python-poetry/poetry/issues/3412 +"$HOME"/.local/bin/poetry update --quiet + +# 1. import data to mysql +mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc.sql + +# 2. create table and sink +"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc_init.toml + +# 3. insert new data to mysql +mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc_insert.sql + +sleep 20 + +# 4. check change +"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc.toml \ No newline at end of file diff --git a/ci/workflows/integration-tests.yml b/ci/workflows/integration-tests.yml index 4bd0ec1a000b1..455f29b210ec1 100644 --- a/ci/workflows/integration-tests.yml +++ b/ci/workflows/integration-tests.yml @@ -29,6 +29,7 @@ steps: - "postgres-cdc" - "mysql-sink" - "postgres-sink" + - "iceberg-cdc" # - "iceberg-sink" - "debezium-mysql" format: @@ -79,6 +80,10 @@ steps: # testcase: "iceberg-sink" # format: "protobuf" # skip: true + - with: + testcase: "iceberg-cdc" + format: "protobuf" + skip: true - with: testcase: "debezium-mysql" format: "protobuf" diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml index 985bd0be4b822..3aaa09f0d7716 100644 --- a/ci/workflows/pull-request.yml +++ b/ci/workflows/pull-request.yml @@ -209,6 +209,21 @@ steps: timeout_in_minutes: 10 retry: *auto-retry + - label: "end-to-end iceberg cdc test" + if: build.pull_request.labels includes "ci/run-e2e-iceberg-sink-tests" + command: "ci/scripts/e2e-iceberg-cdc.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + - label: "end-to-end pulsar sink test" if: build.pull_request.labels includes "ci/run-e2e-pulsar-sink-tests" command: "ci/scripts/e2e-pulsar-sink-test.sh -p ci-dev" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index d25c94daf2670..4dbd5fe5bb28d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -260,6 +260,7 @@ services: MINIO_PROMETHEUS_URL: "http://prometheus-0:9500" MINIO_ROOT_PASSWORD: hummockadmin MINIO_ROOT_USER: hummockadmin + MINIO_DOMAIN: "minio-0" container_name: minio-0 healthcheck: test: diff --git a/e2e_test/iceberg/main.py b/e2e_test/iceberg/main.py index fa07aa367a9b3..3f3120227e6e7 100644 --- a/e2e_test/iceberg/main.py +++ b/e2e_test/iceberg/main.py @@ -42,14 +42,16 @@ def init_iceberg_table(args,init_sqls): spark.sql(sql) -def init_risingwave_mv(args,slt): +def execute_slt(args,slt): + if slt is None or slt == "": + return rw_config = args['risingwave'] cmd = f"sqllogictest -p {rw_config['port']} -d {rw_config['db']} {slt}" print(f"Command line is [{cmd}]") subprocess.run(cmd, shell=True, check=True) - time.sleep(10) + time.sleep(30) def verify_result(args,verify_sql,verify_schema,verify_data): @@ -110,6 +112,6 @@ def drop_table(args,drop_sqls): print({section: dict(config[section]) for section in config.sections()}) init_iceberg_table(config,init_sqls) - init_risingwave_mv(config,slt) + execute_slt(config,slt) verify_result(config,verify_sql,verify_schema,verify_data) drop_table(config,drop_sqls) diff --git a/e2e_test/iceberg/test_case/cdc/load.slt b/e2e_test/iceberg/test_case/cdc/load.slt new file mode 100644 index 0000000000000..caefd1326bbda --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/load.slt @@ -0,0 +1,46 @@ +# CDC source basic test + +# enable cdc backfill in ci +statement ok +set cdc_backfill='true'; + +statement ok +create table products ( id INT, + name STRING, + description STRING, + PRIMARY KEY (id) +) with ( + connector = 'mysql-cdc', + hostname = 'mysql', + port = '3306', + username = 'root', + password = '123456', + database.name = 'my@db', + table.name = 'products', + server.id = '5085' +); + + +statement ok +CREATE SINK s1 AS select * from products WITH ( + connector = 'iceberg', + type = 'upsert', + force_append_only = 'false', + database.name = 'demo', + table.name = 'demo_db.demo_table', + catalog.type = 'storage', + warehouse.path = 's3://icebergdata/demo', + s3.endpoint = 'http://127.0.0.1:9301', + s3.region = 'us-east-1', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin', + primary_key = 'id' +); + +query I +select count(*) from products; +---- +8 + +statement ok +flush; diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql new file mode 100644 index 0000000000000..b7b6f13af83cf --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql @@ -0,0 +1,21 @@ +DROP DATABASE IF EXISTS `my@db`; +CREATE DATABASE `my@db`; + +USE `my@db`; + +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description VARCHAR(512) +); + +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products VALUES (default,"101","101"), +(default,"102","102"), +(default,"103","103"), +(default,"104","104"), +(default,"105","105"), +(default,"106","106"), +(default,"107","107"), +(default,"108","108") diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql new file mode 100644 index 0000000000000..641d6220ea8dc --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql @@ -0,0 +1,7 @@ +USE `my@db`; + +INSERT INTO products VALUES (default,"109","109"), +(default,"110","110"), +(default,"111","111"), +(default,"112","112"), +(default,"113","113"); diff --git a/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml b/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml new file mode 100644 index 0000000000000..5ab9647b12eb0 --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml @@ -0,0 +1,25 @@ +init_sqls = [] + +slt = '' + +verify_schema = ['int','string','string'] + +verify_sql = 'SELECT * FROM demo_db.demo_table ORDER BY id ASC' + +verify_data = """ +101,101,101 +102,102,102 +103,103,103 +104,104,104 +105,105,105 +106,106,106 +107,107,107 +108,108,108 +109,109,109 +110,110,110 +111,111,111 +112,112,112 +113,113,113 +""" + +drop_sqls = [] diff --git a/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml b/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml new file mode 100644 index 0000000000000..17e5f7497aae5 --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml @@ -0,0 +1,31 @@ +init_sqls = [ + 'CREATE SCHEMA IF NOT EXISTS demo_db', + 'DROP TABLE IF EXISTS demo_db.demo_table', + ''' + CREATE TABLE demo_db.demo_table ( + id int, + name string, + description string + ) USING iceberg + TBLPROPERTIES ('format-version'='2'); + ''' +] + +slt = 'test_case/cdc/load.slt' + +verify_schema = ['int','string','string'] + +verify_sql = 'SELECT * FROM demo_db.demo_table ORDER BY id ASC' + +verify_data = """ +101,101,101 +102,102,102 +103,103,103 +104,104,104 +105,105,105 +106,106,106 +107,107,107 +108,108,108 +""" + +drop_sqls = [] diff --git a/integration_tests/iceberg-cdc/README.md b/integration_tests/iceberg-cdc/README.md new file mode 100644 index 0000000000000..56f40172c3dfa --- /dev/null +++ b/integration_tests/iceberg-cdc/README.md @@ -0,0 +1,5 @@ +# Iceberg CDC Integration Tests +`mysql -> rw -> iceberg` + +# How to run +./run_test.sh \ No newline at end of file diff --git a/integration_tests/iceberg-cdc/docker-compose.yaml b/integration_tests/iceberg-cdc/docker-compose.yaml new file mode 100644 index 0000000000000..8e9ad1062ef38 --- /dev/null +++ b/integration_tests/iceberg-cdc/docker-compose.yaml @@ -0,0 +1,142 @@ +version: '3.8' + +services: + compactor-0: + extends: + file: ../../docker/docker-compose.yml + service: compactor-0 + compute-node-0: + extends: + file: ../../docker/docker-compose.yml + service: compute-node-0 + etcd-0: + extends: + file: ../../docker/docker-compose.yml + service: etcd-0 + frontend-node-0: + extends: + file: ../../docker/docker-compose.yml + service: frontend-node-0 + meta-node-0: + extends: + file: ../../docker/docker-compose.yml + service: meta-node-0 + grafana-0: + extends: + file: ../../docker/docker-compose.yml + service: grafana-0 + prometheus-0: + extends: + file: ../../docker/docker-compose.yml + service: prometheus-0 + minio-0: + extends: + file: ../../docker/docker-compose.yml + service: minio-0 + mc: + depends_on: + - minio-0 + image: minio/mc + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio http://minio-0:9301 hummockadmin hummockadmin) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force minio/icebergdata; + /usr/bin/mc mb minio/icebergdata; + /usr/bin/mc anonymous set public minio/icebergdata; + tail -f /dev/null + " + + mysql: + image: mysql:8.0 + expose: + - 3306 + ports: + - "3306:3306" + environment: + - MYSQL_ROOT_PASSWORD=123456 + - MYSQL_USER=mysqluser + - MYSQL_PASSWORD=mysqlpw + - MYSQL_DATABASE=mydb + healthcheck: + test: [ "CMD-SHELL", "mysqladmin ping -h 127.0.0.1 -u root -p123456" ] + interval: 5s + timeout: 5s + retries: 5 + container_name: mysql + prepare_mysql: + image: mysql:8.0 + depends_on: + - mysql + command: + - /bin/sh + - -c + - "mysql -p123456 -h mysql mydb < mysql_prepare.sql" + volumes: + - "./mysql_prepare.sql:/mysql_prepare.sql" + container_name: prepare_mysql + restart: on-failure + + rest: + image: tabulario/iceberg-rest:0.6.0 + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + - CATALOG_CATOLOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog + - CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory + - CATALOG_WAREHOUSE=s3://icebergdata/demo + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio-0:9301 + depends_on: + - minio-0 + # let the rest access minio through: hummock001.minio-0 + links: + - minio-0:icebergdata.minio-0 + expose: + - 8181 + ports: + - "8181:8181" + + spark: + depends_on: + - minio-0 + - rest + image: ghcr.io/icelake-io/icelake-spark:latest + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + - SPARK_HOME=/opt/spark + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin:/opt/spark/sbin + user: root + links: + - minio-0:icebergdata.minio-0 + expose: + - 15002 + ports: + - "15002:15002" + healthcheck: + test: netstat -ltn | grep -c 15002 + interval: 1s + retries: 1200 + volumes: + - ./spark:/spark + command: [ "bash", "/spark/spark-connect-server.sh" ] + +volumes: + compute-node-0: + external: false + etcd-0: + external: false + grafana-0: + external: false + minio-0: + external: false + prometheus-0: + external: false + spark: + external: false diff --git a/integration_tests/iceberg-cdc/mysql_prepare.sql b/integration_tests/iceberg-cdc/mysql_prepare.sql new file mode 100644 index 0000000000000..3e5a236a41205 --- /dev/null +++ b/integration_tests/iceberg-cdc/mysql_prepare.sql @@ -0,0 +1,15 @@ +-- mysql -p123456 -uroot -h 127.0.0.1 mydb < mysql_prepare.sql +-- +-- Mysql +USE mydb; + +CREATE TABLE user_behaviors ( + user_id VARCHAR(60), + target_id VARCHAR(60), + target_type VARCHAR(60), + event_timestamp VARCHAR(100), + behavior_type VARCHAR(60), + parent_target_type VARCHAR(60), + parent_target_id VARCHAR(60), + PRIMARY KEY(user_id, target_id, event_timestamp) +); diff --git a/integration_tests/iceberg-cdc/python/check.py b/integration_tests/iceberg-cdc/python/check.py new file mode 100644 index 0000000000000..699fa4df29c30 --- /dev/null +++ b/integration_tests/iceberg-cdc/python/check.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession +import configparser +import psycopg2 + +def check_spark_table(args): + expect_row_count = 0 + rw_config = args['risingwave'] + with psycopg2.connect(database=rw_config['db'], user=rw_config['user'], host=rw_config['host'], + port=rw_config['port']) as conn: + with conn.cursor() as cursor: + cursor.execute("SELECT COUNT(*) FROM user_behaviors") + expect_row_count = cursor.fetchone()[0] + print(f"expect_row_count is {expect_row_count}") + spark_config = args['spark'] + spark = SparkSession.builder.remote(spark_config['url']).getOrCreate() + actual_row_count = spark.sql("SELECT COUNT(*) FROM s1.t1").collect()[0][0] + print(f"actual_row_count is {actual_row_count}") + assert actual_row_count==expect_row_count + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + print({section: dict(config[section]) for section in config.sections()}) + check_spark_table(config) diff --git a/integration_tests/iceberg-cdc/python/config.ini b/integration_tests/iceberg-cdc/python/config.ini new file mode 100644 index 0000000000000..bd95eddc5b80e --- /dev/null +++ b/integration_tests/iceberg-cdc/python/config.ini @@ -0,0 +1,8 @@ +[spark] +url=sc://localhost:15002 + +[risingwave] +db=dev +user=root +host=127.0.0.1 +port=4566 diff --git a/integration_tests/iceberg-cdc/python/init.py b/integration_tests/iceberg-cdc/python/init.py new file mode 100644 index 0000000000000..289fa2f161889 --- /dev/null +++ b/integration_tests/iceberg-cdc/python/init.py @@ -0,0 +1,103 @@ +from pyspark.sql import SparkSession +import configparser +import psycopg2 + + +def init_spark_table(args): + spark_config = args['spark'] + spark = SparkSession.builder.remote(spark_config['url']).getOrCreate() + + init_table_sqls = [ + "CREATE SCHEMA IF NOT EXISTS s1", + "DROP TABLE IF EXISTS s1.t1", + """ + CREATE TABLE s1.t1 + ( + user_id string, + target_id string, + target_type string, + event_timestamp string, + behavior_type string, + parent_target_type string, + parent_target_id string + ) USING iceberg + TBLPROPERTIES ('format-version'='2'); + """, + ] + + for sql in init_table_sqls: + print(f"Executing sql: {sql}") + spark.sql(sql) + + +def init_risingwave_mv(args): + rw_config = args['risingwave'] + sqls = [ + "set streaming_parallelism = 4", + """ + CREATE TABLE user_behaviors ( + user_id VARCHAR, + target_id VARCHAR, + target_type VARCHAR, + event_timestamp VARCHAR, + behavior_type VARCHAR, + parent_target_type VARCHAR, + parent_target_id VARCHAR, + PRIMARY KEY(user_id, target_id, event_timestamp) + ) with ( + connector = 'mysql-cdc', + hostname = 'mysql', + port = '3306', + username = 'root', + password = '123456', + database.name = 'mydb', + table.name = 'user_behaviors', + server.id = '1' + ); + """, + # f""" + # CREATE SINK s1 + # AS SELECT * FROM user_behaviors + # WITH ( + # connector='iceberg', + # type='upsert', + # primary_key = 'user_id, target_id, event_timestamp', + # catalog.type = 'storage', + # s3.endpoint = 'http://minio-0:9301', + # s3.access.key = 'hummockadmin', + # s3.secret.key = 'hummockadmin', + # database.name='demo', + # table.name='s1.t1',warehouse.path = 's3://hummock001/icebergdata/demo',s3.region = 'us-east-1' + # ); + # """ + f""" + CREATE SINK s1 + AS SELECT * FROM user_behaviors + WITH ( + connector='iceberg', + type='upsert', + primary_key = 'user_id, target_id, event_timestamp', + catalog.type = 'rest', + catalog.uri = 'http://rest:8181', + s3.endpoint = 'http://minio-0:9301', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin', + database.name='demo', + table.name='s1.t1',warehouse.path = 's3://icebergdata/demo/s1/t1',s3.region = 'us-east-1' + ); + """ + ] + with psycopg2.connect(database=rw_config['db'], user=rw_config['user'], host=rw_config['host'], + port=rw_config['port']) as conn: + with conn.cursor() as cursor: + for sql in sqls: + print(f"Executing sql {sql}") + cursor.execute(sql) + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + print({section: dict(config[section]) for section in config.sections()}) + init_spark_table(config) + init_risingwave_mv(config) diff --git a/integration_tests/iceberg-cdc/python/pyproject.toml b/integration_tests/iceberg-cdc/python/pyproject.toml new file mode 100644 index 0000000000000..4c7bce1165796 --- /dev/null +++ b/integration_tests/iceberg-cdc/python/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "icelake-integration-tests" +version = "0.0.9" +description = "" +authors = ["Renjie Liu "] +readme = "README.md" +packages = [{include = "icelake_integration_tests"}] + +[tool.poetry.dependencies] +python = "^3.11" +pyspark = { version = "3.4.1", extras = ["sql", "connect"] } +psycopg2-binary = "^2.9" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/integration_tests/iceberg-cdc/run_test.sh b/integration_tests/iceberg-cdc/run_test.sh new file mode 100755 index 0000000000000..2d8b691bc7284 --- /dev/null +++ b/integration_tests/iceberg-cdc/run_test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Start test environment. +docker-compose up -d --wait + +# To avoid exiting by unhealth, set it after start environment. +set -ex + +# Generate data +docker build -t iceberg-cdc-datagen ../datagen +timeout 20 docker run --network=iceberg-cdc_default iceberg-cdc-datagen /datagen --mode clickstream --qps 1 mysql --user mysqluser --password mysqlpw --host mysql --port 3306 --db mydb & + +cd python +poetry update --quiet +# Init source, mv, and sink. +poetry run python init.py +# Wait for sink to be finished. +sleep 40; +poetry run python check.py diff --git a/integration_tests/iceberg-cdc/spark/.gitignore b/integration_tests/iceberg-cdc/spark/.gitignore new file mode 100644 index 0000000000000..51dcf07222856 --- /dev/null +++ b/integration_tests/iceberg-cdc/spark/.gitignore @@ -0,0 +1,3 @@ +derby.log +metastore_db +.ivy \ No newline at end of file diff --git a/integration_tests/iceberg-cdc/spark/spark-connect-server.sh b/integration_tests/iceberg-cdc/spark/spark-connect-server.sh new file mode 100755 index 0000000000000..7c1cd64f1a2f2 --- /dev/null +++ b/integration_tests/iceberg-cdc/spark/spark-connect-server.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -ex + +JARS=$(find /opt/spark/deps -type f -name "*.jar" | tr '\n' ':') + +/opt/spark/sbin/start-connect-server.sh \ + --master local[3] \ + --driver-class-path $JARS \ + --conf spark.driver.bindAddress=0.0.0.0 \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.demo.catalog-impl=org.apache.iceberg.rest.RESTCatalog \ + --conf spark.sql.catalog.demo.uri=http://rest:8181 \ + --conf spark.sql.catalog.demo.s3.endpoint=http://minio-0:9301 \ + --conf spark.sql.catalog.demo.s3.path.style.access=true \ + --conf spark.sql.catalog.demo.s3.access.key=hummockadmin \ + --conf spark.sql.catalog.demo.s3.secret.key=hummockadmin \ + --conf spark.sql.defaultCatalog=demo + +tail -f /opt/spark/logs/spark*.out diff --git a/integration_tests/scripts/run_demos.py b/integration_tests/scripts/run_demos.py index 28623f7ddc4a7..da2519e18db44 100644 --- a/integration_tests/scripts/run_demos.py +++ b/integration_tests/scripts/run_demos.py @@ -42,6 +42,13 @@ def run_demo(demo: str, format: str, wait_time = 40): run_sql_file(sql_file, demo_dir) sleep(10) +def iceberg_cdc_demo(): + demo = "iceberg-cdc" + file_dir = dirname(abspath(__file__)) + project_dir = dirname(file_dir) + demo_dir = os.path.join(project_dir, demo) + print("Running demo: iceberg-cdc") + subprocess.run(["bash","./run_test.sh"], cwd=demo_dir, check=True) def run_iceberg_demo(): demo = "iceberg-sink" @@ -149,5 +156,7 @@ def run_clickhouse_demo(): run_iceberg_demo() elif args.case == "clickhouse-sink": run_clickhouse_demo() +elif args.case == "iceberg-cdc": + iceberg_cdc_demo() else: run_demo(args.case, args.format)