diff --git a/Cargo.lock b/Cargo.lock
index ec284f3ba4778..1c074e276553a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -685,19 +685,6 @@ dependencies = [
"futures-core",
]
-[[package]]
-name = "async-compat"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b48b4ff0c2026db683dea961cd8ea874737f56cffca86fa84415eaddc51c00d"
-dependencies = [
- "futures-core",
- "futures-io",
- "once_cell",
- "pin-project-lite",
- "tokio",
-]
-
[[package]]
name = "async-compression"
version = "0.4.5"
@@ -6658,12 +6645,11 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opendal"
-version = "0.44.0"
+version = "0.44.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c32736a48ef08a5d2212864e2295c8e54f4d6b352b7f49aa0c29a12fc410ff66"
+checksum = "4af824652d4d2ffabf606d337a071677ae621b05622adf35df9562f69d9b4498"
dependencies = [
"anyhow",
- "async-compat",
"async-trait",
"backon",
"base64 0.21.4",
@@ -6676,9 +6662,7 @@ dependencies = [
"log",
"md-5",
"once_cell",
- "parking_lot 0.12.1",
"percent-encoding",
- "pin-project",
"prometheus",
"quick-xml 0.30.0",
"reqsign",
@@ -7516,6 +7500,7 @@ dependencies = [
"postgres-protocol",
"serde",
"serde_json",
+ "uuid",
]
[[package]]
@@ -9058,6 +9043,7 @@ dependencies = [
"tracing-test",
"url",
"urlencoding",
+ "uuid",
"walkdir",
"with_options",
"workspace-hack",
diff --git a/Makefile.toml b/Makefile.toml
index 983b304d74e51..8820acf67c7bd 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -757,10 +757,10 @@ tmux list-windows -t risedev -F "#{window_name} #{pane_id}" \
if [[ -n $(tmux list-windows -t risedev | grep kafka) ]];
then
echo "kill kafka"
- kill_kafka
+ kill_kafka || true
echo "kill zookeeper"
- kill_zookeeper
+ kill_zookeeper || true
# Kill their tmux sessions
tmux list-windows -t risedev -F "#{pane_id}" | xargs -I {} tmux send-keys -t {} C-c C-d
diff --git a/README.md b/README.md
index 44443cfab8282..1611af1815175 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ Don’t have Docker? Learn how to install RisingWave on Mac, Ubuntu, and other e
## Production deployments
-For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-trial/?method=docker-compose).
+For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-docker-compose/).
For **distributed deployment**, please refer to [Kubernetes with Helm](https://docs.risingwave.com/docs/current/risingwave-k8s-helm/) or [Kubernetes with Operator](https://docs.risingwave.com/docs/current/risingwave-kubernetes/).
diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
index 4a9f2970b84c7..db017be647376 100644
--- a/ci/docker-compose.yml
+++ b/ci/docker-compose.yml
@@ -88,10 +88,27 @@ services:
- message_queue
- elasticsearch
- clickhouse-server
- - pulsar
+ - redis-server
+ - pulsar-server
+ - cassandra-server
+ - starrocks-fe-server
+ - starrocks-be-server
volumes:
- ..:/risingwave
+ sink-doris-env:
+ image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231109
+ depends_on:
+ - doris-fe-server
+ - doris-be-server
+ volumes:
+ - ..:/risingwave
+ command: >
+ sh -c "sudo sysctl -w vm.max_map_count=2000000"
+ networks:
+ mynetwork:
+ ipv4_address: 172.121.0.4
+
rw-build-env:
image: public.ecr.aws/x5u3w5h6/rw-build-env:v20240213
volumes:
@@ -159,10 +176,96 @@ services:
expose:
- 9009
-# Temporary workaround for json schema registry test since redpanda only supports
-# protobuf/avro schema registry. Should be removed after the support.
-# Related tracking issue:
-# https://github.com/redpanda-data/redpanda/issues/1878
+ redis-server:
+ container_name: redis-server
+ image: 'redis:latest'
+ expose:
+ - 6379
+ ports:
+ - 6378:6379
+ healthcheck:
+ test: ["CMD", "redis-cli", "ping"]
+ interval: 5s
+ timeout: 30s
+ retries: 50
+
+ doris-fe-server:
+ platform: linux/amd64
+ image: apache/doris:2.0.0_alpha-fe-x86_64
+ hostname: doris-fe-server
+ command: >
+ sh -c "sudo sysctl -w vm.max_map_count=2000000"
+ environment:
+ - FE_SERVERS=fe1:172.121.0.2:9010
+ - FE_ID=1
+ ports:
+ - "8030:8030"
+ - "9030:9030"
+ networks:
+ mynetwork:
+ ipv4_address: 172.121.0.2
+
+ doris-be-server:
+ platform: linux/amd64
+ image: apache/doris:2.0.0_alpha-be-x86_64
+ hostname: doris-be-server
+ command: >
+ sh -c "sudo sysctl -w vm.max_map_count=2000000"
+ environment:
+ - FE_SERVERS=fe1:172.121.0.2:9010
+ - BE_ADDR=172.121.0.3:9050
+ depends_on:
+ - doris-fe-server
+ ports:
+ - "9050:9050"
+ networks:
+ mynetwork:
+ ipv4_address: 172.121.0.3
+
+ cassandra-server:
+ container_name: cassandra-server
+ image: cassandra:4.0
+ ports:
+ - 9042:9042
+ environment:
+ - CASSANDRA_CLUSTER_NAME=cloudinfra
+
+ starrocks-fe-server:
+ container_name: starrocks-fe-server
+ image: starrocks/fe-ubuntu:3.1.7
+ hostname: starrocks-fe-server
+ command:
+ /opt/starrocks/fe/bin/start_fe.sh
+ ports:
+ - 28030:8030
+ - 29020:9020
+ - 29030:9030
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:9030"]
+ interval: 5s
+ timeout: 5s
+ retries: 30
+
+ starrocks-be-server:
+ image: starrocks/be-ubuntu:3.1.7
+ command:
+ - /bin/bash
+ - -c
+ - |
+ sleep 15s; mysql --connect-timeout 2 -h starrocks-fe-server -P9030 -uroot -e "alter system add backend \"starrocks-be-server:9050\";"
+ /opt/starrocks/be/bin/start_be.sh
+ ports:
+ - 28040:8040
+ - 29050:9050
+ hostname: starrocks-be-server
+ container_name: starrocks-be-server
+ depends_on:
+ - starrocks-fe-server
+
+# # Temporary workaround for json schema registry test since redpanda only supports
+# # protobuf/avro schema registry. Should be removed after the support.
+# # Related tracking issue:
+# # https://github.com/redpanda-data/redpanda/issues/1878
zookeeper:
container_name: zookeeper
image: confluentinc/cp-zookeeper:latest
@@ -201,8 +304,8 @@ services:
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9093,PLAINTEXT_INTERNAL://localhost:29093
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
- pulsar:
- container_name: pulsar
+ pulsar-server:
+ container_name: pulsar-server
image: apachepulsar/pulsar:latest
command: bin/pulsar standalone
ports:
@@ -216,3 +319,9 @@ services:
interval: 5s
timeout: 5s
retries: 5
+networks:
+ mynetwork:
+ ipam:
+ config:
+ - subnet: 172.121.80.0/16
+ default:
diff --git a/ci/scripts/e2e-cassandra-sink-test.sh b/ci/scripts/e2e-cassandra-sink-test.sh
new file mode 100755
index 0000000000000..c393d510d19a2
--- /dev/null
+++ b/ci/scripts/e2e-cassandra-sink-test.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+# prepare environment
+export CONNECTOR_LIBS_PATH="./connector-node/libs"
+
+while getopts 'p:' opt; do
+ case ${opt} in
+ p )
+ profile=$OPTARG
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG" 1>&2
+ exit 1
+ ;;
+ : )
+ echo "Invalid option: $OPTARG requires an argument" 1>&2
+ ;;
+ esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- Download connector node package"
+buildkite-agent artifact download risingwave-connector.tar.gz ./
+mkdir ./connector-node
+tar xf ./risingwave-connector.tar.gz -C ./connector-node
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+echo "--- create cassandra table"
+curl https://downloads.apache.org/cassandra/4.1.3/apache-cassandra-4.1.3-bin.tar.gz --output apache-cassandra-4.1.3-bin.tar.gz
+tar xfvz apache-cassandra-4.1.3-bin.tar.gz
+cd apache-cassandra-4.1.3/bin
+export CQLSH_HOST=cassandra-server
+export CQLSH_PORT=9042
+./cqlsh -e "CREATE KEYSPACE demo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};use demo;
+CREATE table demo_bhv_table(v1 int primary key,v2 smallint,v3 bigint,v4 float,v5 double,v6 text,v7 date,v8 timestamp,v9 boolean);"
+
+echo "--- testing sinks"
+cd ../../
+sqllogictest -p 4566 -d dev './e2e_test/sink/cassandra_sink.slt'
+sleep 1
+cd apache-cassandra-4.1.3/bin
+./cqlsh -e "COPY demo.demo_bhv_table TO './query_result.csv' WITH HEADER = false AND ENCODING = 'UTF-8';"
+
+if cat ./query_result.csv | awk -F "," '{
+ exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01.000+0000" && $9 == "False\r"); }'; then
+ echo "Cassandra sink check passed"
+else
+ cat ./query_result.csv
+ echo "The output is not as expected."
+ exit 1
+fi
+
+echo "--- Kill cluster"
+cd ../../
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-clickhouse-sink-test.sh b/ci/scripts/e2e-clickhouse-sink-test.sh
index 3464bd3c3c14d..c14d83e8c4281 100755
--- a/ci/scripts/e2e-clickhouse-sink-test.sh
+++ b/ci/scripts/e2e-clickhouse-sink-test.sh
@@ -24,7 +24,7 @@ shift $((OPTIND -1))
download_and_prepare_rw "$profile" source
echo "--- starting risingwave cluster"
-cargo make ci-start ci-clickhouse-test
+cargo make ci-start ci-sink-test
sleep 1
diff --git a/ci/scripts/e2e-deltalake-sink-rust-test.sh b/ci/scripts/e2e-deltalake-sink-rust-test.sh
index 71ff1eede8e4d..cc0c287e8b572 100755
--- a/ci/scripts/e2e-deltalake-sink-rust-test.sh
+++ b/ci/scripts/e2e-deltalake-sink-rust-test.sh
@@ -32,8 +32,7 @@ mkdir ./connector-node
tar xf ./risingwave-connector.tar.gz -C ./connector-node
echo "--- starting risingwave cluster"
-mkdir -p .risingwave/log
-cargo make ci-start ci-deltalake-test
+cargo make ci-start ci-sink-test
sleep 1
# prepare minio deltalake sink
diff --git a/ci/scripts/e2e-doris-sink-test.sh b/ci/scripts/e2e-doris-sink-test.sh
new file mode 100755
index 0000000000000..30bfdaf129e26
--- /dev/null
+++ b/ci/scripts/e2e-doris-sink-test.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+ case ${opt} in
+ p )
+ profile=$OPTARG
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG" 1>&2
+ exit 1
+ ;;
+ : )
+ echo "Invalid option: $OPTARG requires an argument" 1>&2
+ ;;
+ esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+echo "--- create doris table"
+apt-get update -y && apt-get install -y mysql-client
+sleep 2
+mysql -uroot -P 9030 -h doris-fe-server -e "CREATE database demo;use demo;
+CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 datev2,v8 datetime,v9 boolean) UNIQUE KEY(\`v1\`)
+DISTRIBUTED BY HASH(\`v1\`) BUCKETS 1
+PROPERTIES (
+ \"replication_allocation\" = \"tag.location.default: 1\"
+);
+CREATE USER 'users'@'%' IDENTIFIED BY '123456';
+GRANT ALL ON *.* TO 'users'@'%';"
+sleep 2
+
+echo "--- testing sinks"
+sqllogictest -p 4566 -d dev './e2e_test/sink/doris_sink.slt'
+sleep 1
+mysql -uroot -P 9030 -h doris-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv
+
+
+if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{
+ exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0); }'; then
+ echo "Doris sink check passed"
+else
+ cat ./query_result.csv
+ echo "The output is not as expected."
+ exit 1
+fi
+
+echo "--- Kill cluster"
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-pulsar-sink-test.sh b/ci/scripts/e2e-pulsar-sink-test.sh
index ee8848832f940..f942ad945b3e9 100755
--- a/ci/scripts/e2e-pulsar-sink-test.sh
+++ b/ci/scripts/e2e-pulsar-sink-test.sh
@@ -21,7 +21,7 @@ shift $((OPTIND -1))
download_and_prepare_rw "$profile" source
echo "--- starting risingwave cluster"
-cargo make ci-start ci-pulsar-test
+cargo make ci-start ci-sink-test
sleep 1
echo "--- waiting until pulsar is healthy"
diff --git a/ci/scripts/e2e-redis-sink-test.sh b/ci/scripts/e2e-redis-sink-test.sh
new file mode 100755
index 0000000000000..cf64662db4051
--- /dev/null
+++ b/ci/scripts/e2e-redis-sink-test.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+ case ${opt} in
+ p )
+ profile=$OPTARG
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG" 1>&2
+ exit 1
+ ;;
+ : )
+ echo "Invalid option: $OPTARG requires an argument" 1>&2
+ ;;
+ esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+apt-get update -y && apt-get install -y redis-server
+sleep 1
+
+echo "--- testing sinks"
+sqllogictest -p 4566 -d dev './e2e_test/sink/redis_sink.slt'
+sleep 1
+
+redis-cli -h redis-server -p 6379 get {\"v1\":1} >> ./query_result.txt
+redis-cli -h redis-server -p 6379 get V1:1 >> ./query_result.txt
+
+# check sink destination using shell
+if cat ./query_result.txt | tr '\n' '\0' | xargs -0 -n1 bash -c '[[ "$0" == "{\"v1\":1,\"v2\":1,\"v3\":1,\"v4\":1.100000023841858,\"v5\":1.2,\"v6\":\"test\",\"v7\":734869,\"v8\":\"2013-01-01T01:01:01.000000Z\",\"v9\":false}" || "$0" == "V2:1,V3:1" ]]'; then
+ echo "Redis sink check passed"
+else
+ cat ./query_result.txt
+ echo "The output is not as expected."
+ exit 1
+fi
+
+echo "--- Kill cluster"
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/scripts/e2e-starrocks-sink-test.sh b/ci/scripts/e2e-starrocks-sink-test.sh
new file mode 100755
index 0000000000000..256f4448f9198
--- /dev/null
+++ b/ci/scripts/e2e-starrocks-sink-test.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+while getopts 'p:' opt; do
+ case ${opt} in
+ p )
+ profile=$OPTARG
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG" 1>&2
+ exit 1
+ ;;
+ : )
+ echo "Invalid option: $OPTARG requires an argument" 1>&2
+ ;;
+ esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- starting risingwave cluster"
+cargo make ci-start ci-sink-test
+sleep 1
+
+
+echo "--- create starrocks table"
+apt-get update -y && apt-get install -y mysql-client
+sleep 2
+mysql -uroot -P 9030 -h starrocks-fe-server -e "CREATE database demo;use demo;
+CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 date,v8 datetime,v9 boolean,v10 json) ENGINE=OLAP
+PRIMARY KEY(\`v1\`)
+DISTRIBUTED BY HASH(\`v1\`) properties(\"replication_num\" = \"1\");
+CREATE USER 'users'@'%' IDENTIFIED BY '123456';
+GRANT ALL ON *.* TO 'users'@'%';"
+sleep 2
+
+echo "--- testing sinks"
+sqllogictest -p 4566 -d dev './e2e_test/sink/starrocks_sink.slt'
+sleep 1
+mysql -uroot -P 9030 -h starrocks-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv
+
+
+if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{
+ exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0 && $10 = "{"v101": 100}"); }'; then
+ echo "Starrocks sink check passed"
+else
+ cat ./query_result.csv
+ echo "The output is not as expected."
+ exit 1
+fi
+
+echo "--- Kill cluster"
+cargo make ci-kill
\ No newline at end of file
diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml
index 835c46fb01e60..934458bcca1bc 100644
--- a/ci/workflows/main-cron.yml
+++ b/ci/workflows/main-cron.yml
@@ -815,6 +815,94 @@ steps:
timeout_in_minutes: 10
retry: *auto-retry
+ - label: "end-to-end redis sink test"
+ key: "e2e-redis-sink-tests"
+ command: "ci/scripts/e2e-redis-sink-test.sh -p ci-release"
+ if: |
+ !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+ || build.pull_request.labels includes "ci/run-e2e-redis-sink-tests"
+ || build.env("CI_STEPS") =~ /(^|,)e2e-redis-sink-tests?(,|$$)/
+ depends_on:
+ - "build"
+ - "build-other"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-test-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
+ - label: "set vm_max_map_count_2000000"
+ key: "set-vm_max_map_count"
+ if: |
+ !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+ || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests"
+ || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/
+ command: "sudo sysctl -w vm.max_map_count=2000000"
+ depends_on:
+ - "build"
+ - "build-other"
+
+ - label: "end-to-end doris sink test"
+ key: "e2e-doris-sink-tests"
+ command: "ci/scripts/e2e-doris-sink-test.sh -p ci-release"
+ if: |
+ !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+ || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests"
+ || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/
+ depends_on:
+ - "build"
+ - "build-other"
+ - "set-vm_max_map_count"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-doris-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
+ - label: "end-to-end starrocks sink test"
+ key: "e2e-starrocks-sink-tests"
+ command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-release"
+ if: |
+ !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+ || build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests"
+ || build.env("CI_STEPS") =~ /(^|,)e2e-starrocks-sink-tests?(,|$$)/
+ depends_on:
+ - "build"
+ - "build-other"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-test-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
+ - label: "end-to-end cassandra sink test"
+ key: "e2e-cassandra-sink-tests"
+ command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-release"
+ if: |
+ !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
+ || build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests"
+ || build.env("CI_STEPS") =~ /(^|,)e2e-cassandra-sink-tests?(,|$$)/
+ depends_on:
+ - "build"
+ - "build-other"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-test-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
- label: "end-to-end clickhouse sink test"
key: "e2e-clickhouse-sink-tests"
command: "ci/scripts/e2e-clickhouse-sink-test.sh -p ci-release"
diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml
index c48de6df64f1c..a67f915d943cc 100644
--- a/ci/workflows/pull-request.yml
+++ b/ci/workflows/pull-request.yml
@@ -292,6 +292,75 @@ steps:
timeout_in_minutes: 10
retry: *auto-retry
+ - label: "end-to-end redis sink test"
+ if: build.pull_request.labels includes "ci/run-e2e-redis-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-redis-sink-tests?(,|$$)/
+ command: "ci/scripts/e2e-redis-sink-test.sh -p ci-dev"
+ depends_on:
+ - "build"
+ - "build-other"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-test-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
+ - label: "set vm_max_map_count_2000000"
+ key: "set-vm_max_map_count"
+ if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/
+ command: "sudo sysctl -w vm.max_map_count=2000000"
+ depends_on:
+ - "build"
+ - "build-other"
+
+ - label: "end-to-end doris sink test"
+ if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/
+ command: "ci/scripts/e2e-doris-sink-test.sh -p ci-dev"
+ depends_on:
+ - "build"
+ - "build-other"
+ - "set-vm_max_map_count"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-doris-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
+ - label: "end-to-end starrocks sink test"
+ if: build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-starrocks-sink-tests?(,|$$)/
+ command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-dev"
+ depends_on:
+ - "build"
+ - "build-other"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-test-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
+ - label: "end-to-end cassandra sink test"
+ if: build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-cassandra-sink-tests?(,|$$)/
+ command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-dev"
+ depends_on:
+ - "build"
+ - "build-other"
+ plugins:
+ - docker-compose#v4.9.0:
+ run: sink-test-env
+ config: ci/docker-compose.yml
+ mount-buildkite-agent: true
+ - ./ci/plugins/upload-failure-logs
+ timeout_in_minutes: 10
+ retry: *auto-retry
+
- label: "e2e java-binding test"
if: build.pull_request.labels includes "ci/run-java-binding-tests" || build.env("CI_STEPS") =~ /(^|,)java-binding-tests?(,|$$)/
command: "ci/scripts/java-binding-test.sh -p ci-dev"
diff --git a/e2e_test/batch/catalog/pg_cast.slt.part b/e2e_test/batch/catalog/pg_cast.slt.part
index b8ab68a5ed5cd..b1558d1e144c4 100644
--- a/e2e_test/batch/catalog/pg_cast.slt.part
+++ b/e2e_test/batch/catalog/pg_cast.slt.part
@@ -82,8 +82,9 @@ SELECT * FROM pg_catalog.pg_cast;
78 3802 701 e
79 3802 1700 e
80 3802 1043 a
-81 1301 701 e
-82 1301 1043 a
+81 20 20 e
+82 1301 701 e
+83 1301 1043 a
query TT rowsort
SELECT s.typname, t.typname
diff --git a/e2e_test/batch/catalog/pg_settings.slt.part b/e2e_test/batch/catalog/pg_settings.slt.part
index 09e2546a856d8..c8e927ba72b9f 100644
--- a/e2e_test/batch/catalog/pg_settings.slt.part
+++ b/e2e_test/batch/catalog/pg_settings.slt.part
@@ -1,50 +1,102 @@
query TT
-SELECT name FROM pg_catalog.pg_settings order by name;
+SELECT context, name FROM pg_catalog.pg_settings ORDER BY (context, name);
----
-application_name
-background_ddl
-batch_enable_distributed_dml
-batch_parallelism
-bytea_output
-client_encoding
-client_min_messages
-create_compaction_group_for_mv
-datestyle
-extra_float_digits
-idle_in_transaction_session_timeout
-intervalstyle
-lock_timeout
-max_split_range_gap
-query_epoch
-query_mode
-row_security
-rw_batch_enable_lookup_join
-rw_batch_enable_sort_agg
-rw_enable_join_ordering
-rw_enable_share_plan
-rw_enable_two_phase_agg
-rw_force_split_distinct_agg
-rw_force_two_phase_agg
-rw_implicit_flush
-rw_streaming_allow_jsonb_in_stream_key
-rw_streaming_enable_bushy_join
-rw_streaming_enable_delta_join
-rw_streaming_over_window_cache_policy
-search_path
-server_encoding
-server_version
-server_version_num
-sink_decouple
-standard_conforming_strings
-statement_timeout
-streaming_enable_arrangement_backfill
-streaming_parallelism
-streaming_rate_limit
-synchronize_seqscans
-timezone
-transaction_isolation
-visibility_mode
+internal block_size_kb
+internal bloom_false_positive
+internal data_directory
+internal parallel_compact_size_mb
+internal sstable_size_mb
+internal state_store
+internal wasm_storage_url
+postmaster backup_storage_directory
+postmaster backup_storage_url
+postmaster barrier_interval_ms
+postmaster checkpoint_frequency
+postmaster enable_tracing
+postmaster max_concurrent_creating_streaming_jobs
+postmaster pause_on_next_bootstrap
+user application_name
+user background_ddl
+user batch_enable_distributed_dml
+user batch_parallelism
+user bytea_output
+user client_encoding
+user client_min_messages
+user create_compaction_group_for_mv
+user datestyle
+user extra_float_digits
+user idle_in_transaction_session_timeout
+user intervalstyle
+user lock_timeout
+user max_split_range_gap
+user query_epoch
+user query_mode
+user row_security
+user rw_batch_enable_lookup_join
+user rw_batch_enable_sort_agg
+user rw_enable_join_ordering
+user rw_enable_share_plan
+user rw_enable_two_phase_agg
+user rw_force_split_distinct_agg
+user rw_force_two_phase_agg
+user rw_implicit_flush
+user rw_streaming_allow_jsonb_in_stream_key
+user rw_streaming_enable_bushy_join
+user rw_streaming_enable_delta_join
+user rw_streaming_over_window_cache_policy
+user search_path
+user server_encoding
+user server_version
+user server_version_num
+user sink_decouple
+user standard_conforming_strings
+user statement_timeout
+user streaming_enable_arrangement_backfill
+user streaming_parallelism
+user streaming_rate_limit
+user synchronize_seqscans
+user timezone
+user transaction_isolation
+user visibility_mode
query TT
SELECT * FROM pg_catalog.pg_settings where name='dummy';
-----
\ No newline at end of file
+----
+
+# https://github.com/risingwavelabs/risingwave/issues/15125
+query TT
+SELECT min(name) name, context FROM pg_catalog.pg_settings GROUP BY context;
+----
+application_name user
+backup_storage_directory postmaster
+block_size_kb internal
+
+# Tab-completion of `SET` command
+query T
+SELECT name
+FROM
+ (SELECT pg_catalog.lower(name) AS name
+ FROM pg_catalog.pg_settings
+ WHERE context IN ('user',
+ 'superuser')
+ UNION ALL SELECT 'constraints'
+ UNION ALL SELECT 'transaction'
+ UNION ALL SELECT 'session'
+ UNION ALL SELECT 'role'
+ UNION ALL SELECT 'tablespace'
+ UNION ALL SELECT 'all') ss
+WHERE substring(name, 1, 8)='search_p';
+----
+search_path
+
+# Tab-completion of `ALTER SYSTEM SET` command
+query T
+SELECT name
+FROM
+ (SELECT pg_catalog.lower(name) AS name
+ FROM pg_catalog.pg_settings
+ WHERE context != 'internal'
+ UNION ALL SELECT 'all') ss
+WHERE substring(name, 1, 7)='checkpo';
+----
+checkpoint_frequency
diff --git a/e2e_test/batch/catalog/version.slt.part b/e2e_test/batch/catalog/version.slt.part
index b2ba9e2a877c5..dc3e0399b1e6a 100644
--- a/e2e_test/batch/catalog/version.slt.part
+++ b/e2e_test/batch/catalog/version.slt.part
@@ -1,4 +1,4 @@
query T
-select substring(version() from 1 for 14);
+select substring(version() from 1 for 16);
----
-PostgreSQL 9.5
+PostgreSQL 13.14
diff --git a/e2e_test/batch/functions/setting.slt.part b/e2e_test/batch/functions/setting.slt.part
index 77d1d80e46590..233399d80a025 100644
--- a/e2e_test/batch/functions/setting.slt.part
+++ b/e2e_test/batch/functions/setting.slt.part
@@ -1,12 +1,12 @@
query T
SELECT current_setting('server_version');
----
-9.5.0
+13.14.0
query I
-SELECT CAST(current_setting('server_version_num') AS INT) / 100 AS version;
+SELECT current_setting('server_version_num') AS version;
----
-905
+130014
query T
SELECT set_config('client_min_messages', 'warning', false);
diff --git a/e2e_test/error_ui/simple/main.slt b/e2e_test/error_ui/simple/main.slt
index b4cebbdfeff70..3197544b45d75 100644
--- a/e2e_test/error_ui/simple/main.slt
+++ b/e2e_test/error_ui/simple/main.slt
@@ -27,7 +27,7 @@ db error: ERROR: Failed to run the query
Caused by these errors (recent errors listed first):
1: gRPC request to meta service failed: Internal error
- 2: SystemParams error: unrecognized system param "not_exist_key"
+ 2: SystemParams error: unrecognized system parameter "not_exist_key"
query error
diff --git a/e2e_test/sink/cassandra_sink.slt b/e2e_test/sink/cassandra_sink.slt
new file mode 100644
index 0000000000000..7091e8da70783
--- /dev/null
+++ b/e2e_test/sink/cassandra_sink.slt
@@ -0,0 +1,33 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s6
+FROM
+ mv6 WITH (
+ connector = 'cassandra',
+ type = 'append-only',
+ force_append_only='true',
+ cassandra.url = 'cassandra-server:9042',
+ cassandra.keyspace = 'demo',
+ cassandra.table = 'demo_bhv_table',
+ cassandra.datacenter = 'datacenter1',
+);
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false);
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s6;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/sink/doris_sink.slt b/e2e_test/sink/doris_sink.slt
new file mode 100644
index 0000000000000..2c552bbb26143
--- /dev/null
+++ b/e2e_test/sink/doris_sink.slt
@@ -0,0 +1,34 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s6
+FROM
+ mv6 WITH (
+ connector = 'doris',
+ type = 'append-only',
+ doris.url = 'http://doris-fe-server:8030',
+ doris.user = 'users',
+ doris.password = '123456',
+ doris.database = 'demo',
+ doris.table='demo_bhv_table',
+ force_append_only='true'
+);
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false);
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s6;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/sink/redis_sink.slt b/e2e_test/sink/redis_sink.slt
new file mode 100644
index 0000000000000..7475a80ae696e
--- /dev/null
+++ b/e2e_test/sink/redis_sink.slt
@@ -0,0 +1,41 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s61
+FROM
+ mv6 WITH (
+ primary_key = 'v1',
+ connector = 'redis',
+ redis.url= 'redis://redis-server:6379/',
+)FORMAT PLAIN ENCODE JSON(force_append_only='true');
+
+statement ok
+CREATE SINK s62
+FROM
+ mv6 WITH (
+ primary_key = 'v1',
+ connector = 'redis',
+ redis.url= 'redis://redis-server:6379/',
+)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'V1:{v1}', value_format = 'V2:{v2},V3:{v3}');
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false);
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s61;
+
+statement ok
+DROP SINK s62;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/sink/sink_into_table/basic.slt b/e2e_test/sink/sink_into_table/basic.slt
index 1bc5a47907077..890087e207fd0 100644
--- a/e2e_test/sink/sink_into_table/basic.slt
+++ b/e2e_test/sink/sink_into_table/basic.slt
@@ -362,6 +362,35 @@ drop table t_b;
statement ok
drop table t_c;
+# cycle check (with materialize view)
+
+statement ok
+create table t_a(v int primary key);
+
+statement ok
+create materialized view m_a as select v from t_a;
+
+statement ok
+create table t_b(v int primary key);
+
+statement ok
+create sink s_a into t_b as select v from m_a;
+
+statement error Creating such a sink will result in circular dependency
+create sink s_b into t_a as select v from t_b;
+
+statement ok
+drop sink s_a;
+
+statement ok
+drop table t_b;
+
+statement ok
+drop materialized view m_a;
+
+statement ok
+drop table t_a;
+
# multi sinks
statement ok
diff --git a/e2e_test/sink/starrocks_sink.slt b/e2e_test/sink/starrocks_sink.slt
new file mode 100644
index 0000000000000..a1ee1b0ffe039
--- /dev/null
+++ b/e2e_test/sink/starrocks_sink.slt
@@ -0,0 +1,36 @@
+statement ok
+CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean, v10 jsonb);
+
+statement ok
+CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
+
+statement ok
+CREATE SINK s6
+FROM
+ mv6 WITH (
+ connector = 'starrocks',
+ type = 'upsert',
+ starrocks.host = 'starrocks-fe-server',
+ starrocks.mysqlport = '9030',
+ starrocks.httpport = '8030',
+ starrocks.user = 'users',
+ starrocks.password = '123456',
+ starrocks.database = 'demo',
+ starrocks.table = 'demo_bhv_table',
+ primary_key = 'v1'
+);
+
+statement ok
+INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false, '{"v101":100}');
+
+statement ok
+FLUSH;
+
+statement ok
+DROP SINK s6;
+
+statement ok
+DROP MATERIALIZED VIEW mv6;
+
+statement ok
+DROP TABLE t6;
\ No newline at end of file
diff --git a/e2e_test/source/cdc/cdc.share_stream.slt b/e2e_test/source/cdc/cdc.share_stream.slt
index d4b50ed4db6d6..7739d3f1ad6ea 100644
--- a/e2e_test/source/cdc/cdc.share_stream.slt
+++ b/e2e_test/source/cdc/cdc.share_stream.slt
@@ -205,6 +205,7 @@ CREATE TABLE IF NOT EXISTS postgres_all_types(
c_timestamptz_array timestamptz[],
c_interval_array interval[],
c_jsonb_array jsonb[],
+ c_uuid varchar,
PRIMARY KEY (c_boolean,c_bigint,c_date)
) from pg_source table 'public.postgres_all_types';
@@ -234,9 +235,9 @@ CREATE MATERIALIZED VIEW person_new_cnt AS SELECT COUNT(*) AS cnt FROM person_ne
sleep 3s
query TTTTTTT
-SELECT c_boolean,c_date,c_time,c_timestamp,c_jsonb,c_smallint_array,c_timestamp_array FROM postgres_all_types where c_bigint=-9223372036854775807
+SELECT c_boolean,c_date,c_time,c_timestamp,c_jsonb,c_smallint_array,c_timestamp_array,c_uuid FROM postgres_all_types where c_bigint=-9223372036854775807
----
-f 0001-01-01 00:00:00 0001-01-01 00:00:00 {} {-32767} {"0001-01-01 00:00:00"}
+f 0001-01-01 00:00:00 0001-01-01 00:00:00 {} {-32767} {"0001-01-01 00:00:00"} bb488f9b-330d-4012-b849-12adeb49e57e
# postgres streaming test
diff --git a/e2e_test/source/cdc/postgres_cdc.sql b/e2e_test/source/cdc/postgres_cdc.sql
index 43dba14950b36..a4de0e447a0cc 100644
--- a/e2e_test/source/cdc/postgres_cdc.sql
+++ b/e2e_test/source/cdc/postgres_cdc.sql
@@ -67,7 +67,8 @@ CREATE TABLE IF NOT EXISTS postgres_all_types(
c_timestamptz_array timestamptz[],
c_interval_array interval[],
c_jsonb_array jsonb[],
+ c_uuid uuid,
PRIMARY KEY (c_boolean,c_bigint,c_date)
);
-INSERT INTO postgres_all_types VALUES ( False, 0, 0, 0, 0, 0, 0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[]::boolean[], array[]::smallint[], array[]::integer[], array[]::bigint[], array[]::decimal[], array[]::real[], array[]::double precision[], array[]::varchar[], array[]::bytea[], array[]::date[], array[]::time[], array[]::timestamp[], array[]::timestamptz[], array[]::interval[], array[]::jsonb[]);
-INSERT INTO postgres_all_types VALUES ( False, -32767, -2147483647, -9223372036854775807, -10.0, -9999.999999, -10000.0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[False::boolean]::boolean[], array[-32767::smallint]::smallint[], array[-2147483647::integer]::integer[], array[-9223372036854775807::bigint]::bigint[], array[-10.0::decimal]::decimal[], array[-9999.999999::real]::real[], array[-10000.0::double precision]::double precision[], array[''::varchar]::varchar[], array['\x00'::bytea]::bytea[], array['0001-01-01'::date]::date[], array['00:00:00'::time]::time[], array['0001-01-01 00:00:00'::timestamp::timestamp]::timestamp[], array['0001-01-01 00:00:00'::timestamptz::timestamptz]::timestamptz[], array[interval '0 second'::interval]::interval[], array['{}'::jsonb]::jsonb[]);
+INSERT INTO postgres_all_types VALUES ( False, 0, 0, 0, 0, 0, 0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[]::boolean[], array[]::smallint[], array[]::integer[], array[]::bigint[], array[]::decimal[], array[]::real[], array[]::double precision[], array[]::varchar[], array[]::bytea[], array[]::date[], array[]::time[], array[]::timestamp[], array[]::timestamptz[], array[]::interval[], array[]::jsonb[], null);
+INSERT INTO postgres_all_types VALUES ( False, -32767, -2147483647, -9223372036854775807, -10.0, -9999.999999, -10000.0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[False::boolean]::boolean[], array[-32767::smallint]::smallint[], array[-2147483647::integer]::integer[], array[-9223372036854775807::bigint]::bigint[], array[-10.0::decimal]::decimal[], array[-9999.999999::real]::real[], array[-10000.0::double precision]::double precision[], array[''::varchar]::varchar[], array['\x00'::bytea]::bytea[], array['0001-01-01'::date]::date[], array['00:00:00'::time]::time[], array['0001-01-01 00:00:00'::timestamp::timestamp]::timestamp[], array['0001-01-01 00:00:00'::timestamptz::timestamptz]::timestamptz[], array[interval '0 second'::interval]::interval[], array['{}'::jsonb]::jsonb[], 'bb488f9b-330d-4012-b849-12adeb49e57e');
diff --git a/e2e_test/streaming/bug_fixes/issue_15198.slt b/e2e_test/streaming/bug_fixes/issue_15198.slt
new file mode 100644
index 0000000000000..a69aede18c2c9
--- /dev/null
+++ b/e2e_test/streaming/bug_fixes/issue_15198.slt
@@ -0,0 +1,23 @@
+# https://github.com/risingwavelabs/risingwave/issues/15198
+
+statement ok
+SET RW_IMPLICIT_FLUSH TO TRUE;
+
+statement ok
+create materialized view "tumble_with_offset"
+as (
+ with
+ input as (
+ select 1 as id, TO_TIMESTAMP('2024-01-01 01:30:02', 'YYYY-MM-DD HH24:MI:SS') as timestamps
+ )
+ select *
+ from tumble(input, timestamps, interval '1 DAY', '+6 HOURS')
+);
+
+query ITTT
+select * from tumble_with_offset;
+----
+1 2024-01-01 01:30:02+00:00 2023-12-31 06:00:00+00:00 2024-01-01 06:00:00+00:00
+
+statement ok
+drop materialized view tumble_with_offset;
diff --git a/integration_tests/http-sink/README.md b/integration_tests/http-sink/README.md
new file mode 100644
index 0000000000000..d956cb4ea95a4
--- /dev/null
+++ b/integration_tests/http-sink/README.md
@@ -0,0 +1,34 @@
+# Demo: Sinking to Http
+
+In this demo, we want to showcase how RisingWave is able to sink data to Http. This feature is depended on https://github.com/getindata/flink-http-connector.
+
+It has a few limitations:
+1. It offers only two options for HTTP method, i.e, PUT and POST.
+2. It can only execute one request-reply round to the service (session-less).
+3. It cannot handle status codes in the SQL API.
+
+Therefore, we suggest you to try Python UDF at first.
+
+### Demo:
+1. Launch the cluster:
+
+```sh
+docker-compose up -d
+```
+
+The cluster contains a RisingWave cluster and its necessary dependencies, a datagen that generates the data.
+
+2. Build an Http Server that can be built on its own
+
+3. Execute the SQL queries in sequence:
+
+- create_source.sql
+- create_mv.sql
+- create_sink.sql
+
+4. Check the contents in Http Server:
+On the Http Server side it will receive the json string, something like:
+```
+{"user_id":5,"target_id":"siFqrkdlCn"}
+```
+The number of json is 1000
diff --git a/integration_tests/http-sink/create_mv.sql b/integration_tests/http-sink/create_mv.sql
new file mode 100644
index 0000000000000..8a291a3c95ea7
--- /dev/null
+++ b/integration_tests/http-sink/create_mv.sql
@@ -0,0 +1,6 @@
+CREATE MATERIALIZED VIEW bhv_mv AS
+SELECT
+ user_id,
+ target_id
+FROM
+ user_behaviors;
diff --git a/integration_tests/http-sink/create_sink.sql b/integration_tests/http-sink/create_sink.sql
new file mode 100644
index 0000000000000..0644d1d51934b
--- /dev/null
+++ b/integration_tests/http-sink/create_sink.sql
@@ -0,0 +1,11 @@
+CREATE sink bhv_http_sink FROM bhv_mv WITH (
+ connector = 'http',
+ url = 'http://localhost:8080/endpoint',
+ format = 'json',
+ type = 'append-only',
+ force_append_only='true',
+ primary_key = 'user_id',
+ gid.connector.http.sink.header.Origin = '*',
+ "gid.connector.http.sink.header.X-Content-Type-Options" = 'nosniff',
+ "gid.connector.http.sink.header.Content-Type" = 'application/json'
+);
\ No newline at end of file
diff --git a/integration_tests/http-sink/create_source.sql b/integration_tests/http-sink/create_source.sql
new file mode 100644
index 0000000000000..c28c10f3616da
--- /dev/null
+++ b/integration_tests/http-sink/create_source.sql
@@ -0,0 +1,18 @@
+CREATE table user_behaviors (
+ user_id int,
+ target_id VARCHAR,
+ target_type VARCHAR,
+ event_timestamp TIMESTAMP,
+ behavior_type VARCHAR,
+ parent_target_type VARCHAR,
+ parent_target_id VARCHAR,
+ PRIMARY KEY(user_id)
+) WITH (
+ connector = 'datagen',
+ fields.user_id.kind = 'sequence',
+ fields.user_id.start = '1',
+ fields.user_id.end = '1000',
+ fields.user_name.kind = 'random',
+ fields.user_name.length = '10',
+ datagen.rows.per.second = '10'
+) FORMAT PLAIN ENCODE JSON;
\ No newline at end of file
diff --git a/integration_tests/http-sink/docker-compose.yml b/integration_tests/http-sink/docker-compose.yml
new file mode 100644
index 0000000000000..8fba5ff352dc0
--- /dev/null
+++ b/integration_tests/http-sink/docker-compose.yml
@@ -0,0 +1,37 @@
+---
+version: "3"
+services:
+ risingwave-standalone:
+ extends:
+ file: ../../docker/docker-compose.yml
+ service: risingwave-standalone
+ etcd-0:
+ extends:
+ file: ../../docker/docker-compose.yml
+ service: etcd-0
+ grafana-0:
+ extends:
+ file: ../../docker/docker-compose.yml
+ service: grafana-0
+ minio-0:
+ extends:
+ file: ../../docker/docker-compose.yml
+ service: minio-0
+ prometheus-0:
+ extends:
+ file: ../../docker/docker-compose.yml
+ service: prometheus-0
+volumes:
+ risingwave-standalone:
+ external: false
+ etcd-0:
+ external: false
+ grafana-0:
+ external: false
+ minio-0:
+ external: false
+ prometheus-0:
+ external: false
+ message_queue:
+ external: false
+name: risingwave-compose
diff --git a/java/connector-node/risingwave-connector-service/pom.xml b/java/connector-node/risingwave-connector-service/pom.xml
index 047c523c1c7db..d51d67497ce05 100644
--- a/java/connector-node/risingwave-connector-service/pom.xml
+++ b/java/connector-node/risingwave-connector-service/pom.xml
@@ -99,7 +99,6 @@
com.risingwave
risingwave-sink-mock-flink-http-sink
- provided
diff --git a/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties b/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties
index 8b1d571082f6e..a361dcf71cefe 100644
--- a/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties
+++ b/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties
@@ -22,5 +22,5 @@ heartbeat.interval.ms=${debezium.heartbeat.interval.ms:-60000}
# In sharing cdc mode, we will subscribe to multiple tables in the given database,
# so here we set ${table.name} to a default value `RW_CDC_Sharing` just for display.
name=${hostname}:${port}:${database.name}.${table.name:-RW_CDC_Sharing}
-# Enable transaction metadata by default
-provide.transaction.metadata=${transactional:-true}
+# In sharing cdc mode, transaction metadata will be enabled in frontend
+provide.transaction.metadata=${transactional:-false}
diff --git a/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties b/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties
index 8d0284d03892e..326138403d3b2 100644
--- a/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties
+++ b/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties
@@ -20,5 +20,5 @@ heartbeat.interval.ms=${debezium.heartbeat.interval.ms:-300000}
# In sharing cdc source mode, we will subscribe to multiple tables in the given database,
# so here we set ${table.name} to a default value `RW_CDC_Sharing` just for display.
name=${hostname}:${port}:${database.name}.${schema.name}.${table.name:-RW_CDC_Sharing}
-# Enable transaction metadata by default
-provide.transaction.metadata=${transactional:-true}
+# In sharing cdc mode, transaction metadata will be enabled in frontend
+provide.transaction.metadata=${transactional:-false}
diff --git a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java
index 9ac3d257b2bad..7c883335cfc23 100644
--- a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java
+++ b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java
@@ -23,6 +23,7 @@
public class CassandraConfig extends CommonSinkConfig {
/** Required */
private String type;
+
/** Required */
private String url;
diff --git a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java
index a969dddd620f7..d316eeae74bed 100644
--- a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java
+++ b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java
@@ -26,6 +26,8 @@
/**
* The `FlinkMockSinkFactory` implementation of the http sink is responsible for creating the http
* counterpart of the `DynamicTableSinkFactory`. And `validate` don't need to do anything.
+ *
+ *
This feature is depended on https://github.com/getindata/flink-http-connector
*/
public class HttpFlinkMockSinkFactory implements FlinkMockSinkFactory {
@Override
diff --git a/java/pom.xml b/java/pom.xml
index 5f168c48bd9ef..c6e39b34cfc0b 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -69,7 +69,7 @@
1.53.0
2.10
0.1.0-SNAPSHOT
- 2.27.1
+ 2.43.0
2.20.0
2.0.9
1.5.0
@@ -391,7 +391,7 @@
- 1.7
+ 1.20.0
diff --git a/proto/buf.yaml b/proto/buf.yaml
index 1aa31816ce0af..abad30f04506c 100644
--- a/proto/buf.yaml
+++ b/proto/buf.yaml
@@ -1,7 +1,8 @@
version: v1
breaking:
use:
- - WIRE # https://docs.buf.build/breaking/rules
+ - WIRE_JSON # https://docs.buf.build/breaking/rules
+ # https://github.com/risingwavelabs/risingwave/issues/15030
lint:
use:
- DEFAULT
diff --git a/proto/expr.proto b/proto/expr.proto
index 4c9be4d15ea24..48bf9b55227ef 100644
--- a/proto/expr.proto
+++ b/proto/expr.proto
@@ -58,12 +58,12 @@ message ExprNode {
MAKE_TIMESTAMP = 115;
// From f64 to timestamp.
// e.g. `select to_timestamp(1672044740.0)`
- TO_TIMESTAMP = 104;
+ SEC_TO_TIMESTAMPTZ = 104;
AT_TIME_ZONE = 105;
DATE_TRUNC = 106;
// Parse text to timestamp by format string.
// e.g. `select to_timestamp('2022 08 21', 'YYYY MM DD')`
- TO_TIMESTAMP1 = 107;
+ CHAR_TO_TIMESTAMPTZ = 107;
CHAR_TO_DATE = 111;
// Performs a cast with additional timezone information.
CAST_WITH_TIME_ZONE = 108;
diff --git a/proto/plan_common.proto b/proto/plan_common.proto
index 82f9fbc63a0f8..1dd45ad08a6ef 100644
--- a/proto/plan_common.proto
+++ b/proto/plan_common.proto
@@ -136,6 +136,7 @@ enum FormatType {
FORMAT_TYPE_CANAL = 5;
FORMAT_TYPE_UPSERT = 6;
FORMAT_TYPE_PLAIN = 7;
+ FORMAT_TYPE_NONE = 8;
}
enum EncodeType {
@@ -147,6 +148,7 @@ enum EncodeType {
ENCODE_TYPE_JSON = 5;
ENCODE_TYPE_BYTES = 6;
ENCODE_TYPE_TEMPLATE = 7;
+ ENCODE_TYPE_NONE = 8;
}
enum RowFormatType {
diff --git a/risedev.yml b/risedev.yml
index 69b0c23b05dd3..cb352daab6cf9 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -164,6 +164,17 @@ profile:
- use: compactor
# - use: prometheus
# - use: grafana
+ fs:
+ steps:
+ # - use: etcd
+ - use: meta-node
+ - use: compute-node
+ - use: frontend
+ - use: opendal
+ engine: fs
+ - use: compactor
+ # - use: prometheus
+ # - use: grafana
webhdfs:
steps:
# - use: etcd
@@ -872,27 +883,7 @@ profile:
- use: frontend
- use: compactor
- ci-deltalake-test:
- config-path: src/config/ci.toml
- steps:
- - use: minio
- - use: meta-node
- - use: compute-node
- enable-tiered-cache: true
- - use: frontend
- - use: compactor
-
- ci-clickhouse-test:
- config-path: src/config/ci.toml
- steps:
- - use: minio
- - use: meta-node
- - use: compute-node
- enable-tiered-cache: true
- - use: frontend
- - use: compactor
-
- ci-pulsar-test:
+ ci-sink-test:
config-path: src/config/ci.toml
steps:
- use: minio
diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs
index 2c167fc1bdc20..e9173abefe1df 100644
--- a/src/cmd_all/src/bin/risingwave.rs
+++ b/src/cmd_all/src/bin/risingwave.rs
@@ -239,6 +239,7 @@ fn standalone(opts: StandaloneOpts) {
/// high level options to standalone mode node-level options.
/// We will start a standalone instance, with all nodes in the same process.
fn single_node(opts: SingleNodeOpts) {
+ opts.create_store_directories().unwrap();
let opts = risingwave_cmd_all::map_single_node_opts_to_standalone_opts(&opts);
let settings = risingwave_rt::LoggerSettings::from_opts(&opts)
.with_target("risingwave_storage", Level::WARN)
diff --git a/src/cmd_all/src/single_node.rs b/src/cmd_all/src/single_node.rs
index b89f861f6e4fd..042a0feee9863 100644
--- a/src/cmd_all/src/single_node.rs
+++ b/src/cmd_all/src/single_node.rs
@@ -14,6 +14,7 @@
use std::sync::LazyLock;
+use anyhow::Result;
use clap::Parser;
use home::home_dir;
use risingwave_common::config::{AsyncStackTraceOption, MetaBackend};
@@ -64,7 +65,7 @@ pub struct SingleNodeOpts {
/// The store directory used by meta store and object store.
#[clap(long, env = "RW_SINGLE_NODE_STORE_DIRECTORY")]
- store_directory: Option,
+ pub store_directory: Option,
/// The address of the meta node.
#[clap(long, env = "RW_SINGLE_NODE_META_ADDR")]
@@ -142,6 +143,7 @@ pub fn map_single_node_opts_to_standalone_opts(opts: &SingleNodeOpts) -> ParsedS
}
}
+// Defaults
impl SingleNodeOpts {
fn default_frontend_opts() -> FrontendOpts {
FrontendOpts {
@@ -227,3 +229,15 @@ impl SingleNodeOpts {
}
}
}
+
+impl SingleNodeOpts {
+ pub fn create_store_directories(&self) -> Result<()> {
+ let store_directory = self
+ .store_directory
+ .as_ref()
+ .unwrap_or_else(|| &*DEFAULT_STORE_DIRECTORY);
+ std::fs::create_dir_all(format!("{}/meta_store", store_directory))?;
+ std::fs::create_dir_all(format!("{}/state_store", store_directory))?;
+ Ok(())
+ }
+}
diff --git a/src/common/fields-derive/src/gen/test_empty_pk.rs b/src/common/fields-derive/src/gen/test_empty_pk.rs
new file mode 100644
index 0000000000000..ffb5ff268bed1
--- /dev/null
+++ b/src/common/fields-derive/src/gen/test_empty_pk.rs
@@ -0,0 +1,29 @@
+impl ::risingwave_common::types::Fields for Data {
+ const PRIMARY_KEY: Option<&'static [usize]> = Some(&[]);
+ fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
+ vec![
+ ("v1", < i16 as ::risingwave_common::types::WithDataType >
+ ::default_data_type()), ("v2", < String as
+ ::risingwave_common::types::WithDataType > ::default_data_type())
+ ]
+ }
+ fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow {
+ ::risingwave_common::row::OwnedRow::new(
+ vec![
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1),
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2)
+ ],
+ )
+ }
+}
+impl From for ::risingwave_common::types::ScalarImpl {
+ fn from(v: Data) -> Self {
+ ::risingwave_common::types::StructValue::new(
+ vec![
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1),
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2)
+ ],
+ )
+ .into()
+ }
+}
diff --git a/src/common/fields-derive/src/gen/test_no_pk.rs b/src/common/fields-derive/src/gen/test_no_pk.rs
new file mode 100644
index 0000000000000..9e1b3e7892969
--- /dev/null
+++ b/src/common/fields-derive/src/gen/test_no_pk.rs
@@ -0,0 +1,29 @@
+impl ::risingwave_common::types::Fields for Data {
+ const PRIMARY_KEY: Option<&'static [usize]> = None;
+ fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
+ vec![
+ ("v1", < i16 as ::risingwave_common::types::WithDataType >
+ ::default_data_type()), ("v2", < String as
+ ::risingwave_common::types::WithDataType > ::default_data_type())
+ ]
+ }
+ fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow {
+ ::risingwave_common::row::OwnedRow::new(
+ vec![
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1),
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2)
+ ],
+ )
+ }
+}
+impl From for ::risingwave_common::types::ScalarImpl {
+ fn from(v: Data) -> Self {
+ ::risingwave_common::types::StructValue::new(
+ vec![
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1),
+ ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2)
+ ],
+ )
+ .into()
+ }
+}
diff --git a/src/common/fields-derive/src/gen/test_output.rs b/src/common/fields-derive/src/gen/test_output.rs
index 517dcdefc7a8c..a804a379bfd4a 100644
--- a/src/common/fields-derive/src/gen/test_output.rs
+++ b/src/common/fields-derive/src/gen/test_output.rs
@@ -1,4 +1,5 @@
impl ::risingwave_common::types::Fields for Data {
+ const PRIMARY_KEY: Option<&'static [usize]> = Some(&[1usize, 0usize]);
fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
vec![
("v1", < i16 as ::risingwave_common::types::WithDataType >
@@ -21,9 +22,6 @@ impl ::risingwave_common::types::Fields for Data {
],
)
}
- fn primary_key() -> &'static [usize] {
- &[1usize, 0usize]
- }
}
impl From for ::risingwave_common::types::ScalarImpl {
fn from(v: Data) -> Self {
diff --git a/src/common/fields-derive/src/lib.rs b/src/common/fields-derive/src/lib.rs
index 86fa229a5adcd..dae648d1dc343 100644
--- a/src/common/fields-derive/src/lib.rs
+++ b/src/common/fields-derive/src/lib.rs
@@ -16,7 +16,7 @@ use proc_macro2::TokenStream;
use quote::quote;
use syn::{Data, DeriveInput, Result};
-#[proc_macro_derive(Fields, attributes(primary_key))]
+#[proc_macro_derive(Fields, attributes(primary_key, fields))]
pub fn fields(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream {
inner(tokens.into()).into()
}
@@ -46,6 +46,16 @@ fn gen(tokens: TokenStream) -> Result {
));
};
+ let style = get_style(&input);
+ if let Some(style) = &style {
+ if !["Title Case", "TITLE CASE", "snake_case"].contains(&style.value().as_str()) {
+ return Err(syn::Error::new_spanned(
+ style,
+ "only `Title Case`, `TITLE CASE`, and `snake_case` are supported",
+ ));
+ }
+ }
+
let fields_rw: Vec = struct_
.fields
.iter()
@@ -55,6 +65,12 @@ fn gen(tokens: TokenStream) -> Result {
if name.starts_with("r#") {
name = name[2..].to_string();
}
+ // cast style
+ match style.as_ref().map_or(String::new(), |f| f.value()).as_str() {
+ "Title Case" => name = to_title_case(&name),
+ "TITLE CASE" => name = to_title_case(&name).to_uppercase(),
+ _ => {}
+ }
let ty = &field.ty;
quote! {
(#name, <#ty as ::risingwave_common::types::WithDataType>::default_data_type())
@@ -66,16 +82,17 @@ fn gen(tokens: TokenStream) -> Result {
.iter()
.map(|field| field.ident.as_ref().expect("field no name"))
.collect::>();
- let primary_key = get_primary_key(&input).map(|indices| {
- quote! {
- fn primary_key() -> &'static [usize] {
- &[#(#indices),*]
- }
- }
- });
+ let primary_key = get_primary_key(&input).map_or_else(
+ || quote! { None },
+ |indices| {
+ quote! { Some(&[#(#indices),*]) }
+ },
+ );
Ok(quote! {
impl ::risingwave_common::types::Fields for #ident {
+ const PRIMARY_KEY: Option<&'static [usize]> = #primary_key;
+
fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> {
vec![#(#fields_rw),*]
}
@@ -84,7 +101,6 @@ fn gen(tokens: TokenStream) -> Result {
::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.#names)
),*])
}
- #primary_key
}
impl From<#ident> for ::risingwave_common::types::ScalarImpl {
fn from(v: #ident) -> Self {
@@ -117,7 +133,9 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option> {
return Some(
keys.to_string()
.split(',')
- .map(|s| index(s.trim()))
+ .map(|s| s.trim())
+ .filter(|s| !s.is_empty())
+ .map(index)
.collect(),
);
}
@@ -132,6 +150,46 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option> {
None
}
+/// Get name style from `#[fields(style = "xxx")]` attribute.
+fn get_style(input: &syn::DeriveInput) -> Option {
+ let style = input.attrs.iter().find_map(|attr| match &attr.meta {
+ syn::Meta::List(list) if list.path.is_ident("fields") => {
+ let name_value: syn::MetaNameValue = syn::parse2(list.tokens.clone()).ok()?;
+ if name_value.path.is_ident("style") {
+ Some(name_value.value)
+ } else {
+ None
+ }
+ }
+ _ => None,
+ })?;
+ match style {
+ syn::Expr::Lit(lit) => match lit.lit {
+ syn::Lit::Str(s) => Some(s),
+ _ => None,
+ },
+ _ => None,
+ }
+}
+
+/// Convert `snake_case` to `Title Case`.
+fn to_title_case(s: &str) -> String {
+ let mut title = String::new();
+ let mut next_upper = true;
+ for c in s.chars() {
+ if c == '_' {
+ title.push(' ');
+ next_upper = true;
+ } else if next_upper {
+ title.push(c.to_uppercase().next().unwrap());
+ next_upper = false;
+ } else {
+ title.push(c);
+ }
+ }
+ title
+}
+
#[cfg(test)]
mod tests {
use indoc::indoc;
@@ -143,6 +201,18 @@ mod tests {
prettyplease::unparse(&output)
}
+ fn do_test(code: &str, expected_path: &str) {
+ let input: TokenStream = str::parse(code).unwrap();
+
+ let output = super::gen(input).unwrap();
+
+ let output = pretty_print(output);
+
+ let expected = expect_test::expect_file![expected_path];
+
+ expected.assert_eq(&output);
+ }
+
#[test]
fn test_gen() {
let code = indoc! {r#"
@@ -157,14 +227,33 @@ mod tests {
}
"#};
- let input: TokenStream = str::parse(code).unwrap();
+ do_test(code, "gen/test_output.rs");
+ }
- let output = super::gen(input).unwrap();
+ #[test]
+ fn test_no_pk() {
+ let code = indoc! {r#"
+ #[derive(Fields)]
+ struct Data {
+ v1: i16,
+ v2: String,
+ }
+ "#};
- let output = pretty_print(output);
+ do_test(code, "gen/test_no_pk.rs");
+ }
- let expected = expect_test::expect_file!["gen/test_output.rs"];
+ #[test]
+ fn test_empty_pk() {
+ let code = indoc! {r#"
+ #[derive(Fields)]
+ #[primary_key()]
+ struct Data {
+ v1: i16,
+ v2: String,
+ }
+ "#};
- expected.assert_eq(&output);
+ do_test(code, "gen/test_empty_pk.rs");
}
}
diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs
index 980897d5636e7..313c0bada6616 100644
--- a/src/common/src/lib.rs
+++ b/src/common/src/lib.rs
@@ -92,9 +92,9 @@ pub const UNKNOWN_GIT_SHA: &str = "unknown";
// The single source of truth of the pg parameters, Used in ConfigMap and current_cluster_version.
// The version of PostgreSQL that Risingwave claims to be.
-pub const PG_VERSION: &str = "9.5.0";
+pub const PG_VERSION: &str = "13.14.0";
/// The version of PostgreSQL that Risingwave claims to be.
-pub const SERVER_VERSION_NUM: i32 = 90500;
+pub const SERVER_VERSION_NUM: i32 = 130014;
/// Shows the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time. It is also the default value of `client_encoding`.
pub const SERVER_ENCODING: &str = "UTF8";
/// see
diff --git a/src/common/src/system_param/mod.rs b/src/common/src/system_param/mod.rs
index 278390887dd51..82677e57e9753 100644
--- a/src/common/src/system_param/mod.rs
+++ b/src/common/src/system_param/mod.rs
@@ -340,7 +340,7 @@ macro_rules! impl_set_system_param {
)*
_ => {
Err(format!(
- "unrecognized system param {:?}",
+ "unrecognized system parameter {:?}",
key
))
}
diff --git a/src/common/src/system_param/reader.rs b/src/common/src/system_param/reader.rs
index c6b8d8c5af6aa..cf17c7bb43dd5 100644
--- a/src/common/src/system_param/reader.rs
+++ b/src/common/src/system_param/reader.rs
@@ -16,9 +16,17 @@ use std::borrow::Borrow;
use risingwave_pb::meta::PbSystemParams;
-use super::{default, system_params_to_kv, ParamValue};
+use super::{default, ParamValue};
use crate::for_all_params;
+/// Information about a system parameter.
+pub struct ParameterInfo {
+ pub name: &'static str,
+ pub mutable: bool,
+ pub value: String,
+ pub description: &'static str,
+}
+
macro_rules! define_system_params_read_trait {
($({ $field:ident, $type:ty, $default:expr, $is_mutable:expr, $doc:literal, $($rest:tt)* },)*) => {
/// The trait delegating reads on [`risingwave_pb::meta::SystemParams`].
@@ -32,6 +40,20 @@ macro_rules! define_system_params_read_trait {
#[doc = $doc]
fn $field(&self) -> <$type as ParamValue>::Borrowed<'_>;
)*
+
+ /// Return the information of all parameters.
+ fn get_all(&self) -> Vec {
+ vec![
+ $(
+ ParameterInfo {
+ name: stringify!($field),
+ mutable: $is_mutable,
+ value: self.$field().to_string(),
+ description: $doc,
+ },
+ )*
+ ]
+ }
}
};
}
@@ -70,10 +92,6 @@ where
}
}
- pub fn to_kv(&self) -> Vec<(String, String)> {
- system_params_to_kv(self.inner()).unwrap()
- }
-
fn inner(&self) -> &PbSystemParams {
self.inner.borrow()
}
diff --git a/src/common/src/types/fields.rs b/src/common/src/types/fields.rs
index f52717297792e..df1795804af00 100644
--- a/src/common/src/types/fields.rs
+++ b/src/common/src/types/fields.rs
@@ -58,17 +58,18 @@ use crate::util::chunk_coalesce::DataChunkBuilder;
/// }
/// ```
pub trait Fields {
+ /// The primary key of the table.
+ ///
+ /// - `None` if the primary key is not applicable.
+ /// - `Some(&[])` if the primary key is empty, i.e., there'll be at most one row in the table.
+ const PRIMARY_KEY: Option<&'static [usize]>;
+
/// Return the schema of the struct.
fn fields() -> Vec<(&'static str, DataType)>;
/// Convert the struct to an `OwnedRow`.
fn into_owned_row(self) -> OwnedRow;
- /// The primary key of the table.
- fn primary_key() -> &'static [usize] {
- &[]
- }
-
/// Create a [`DataChunkBuilder`](crate::util::chunk_coalesce::DataChunkBuilder) with the schema of the struct.
fn data_chunk_builder(capacity: usize) -> DataChunkBuilder {
DataChunkBuilder::new(
diff --git a/src/common/src/types/serial.rs b/src/common/src/types/serial.rs
index 9bfbf5e4fcac7..5c84c95fa0f7a 100644
--- a/src/common/src/types/serial.rs
+++ b/src/common/src/types/serial.rs
@@ -26,6 +26,12 @@ use crate::util::row_id::RowId;
#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Default, Hash)]
pub struct Serial(i64);
+impl From for i64 {
+ fn from(value: Serial) -> i64 {
+ value.0
+ }
+}
+
impl From for Serial {
fn from(value: i64) -> Self {
Self(value)
diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
index f73bd5f51c3e0..e1f7ea97812c5 100644
--- a/src/connector/Cargo.toml
+++ b/src/connector/Cargo.toml
@@ -135,7 +135,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
"signal",
"fs",
] }
-tokio-postgres = "0.7"
+tokio-postgres = { version = "0.7", features = ["with-uuid-1"] }
tokio-retry = "0.3"
tokio-stream = "0.1"
tokio-util = { version = "0.7", features = ["codec", "io"] }
@@ -143,6 +143,7 @@ tonic = { workspace = true }
tracing = "0.1"
url = "2"
urlencoding = "2"
+uuid = { version = "1", features = ["v4", "fast-rng"] }
with_options = { path = "./with_options" }
yup-oauth2 = "8.3"
diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs
index 9a2383dbb4a96..e34171717ae6c 100644
--- a/src/connector/src/macros.rs
+++ b/src/connector/src/macros.rs
@@ -36,7 +36,8 @@ macro_rules! for_all_classified_sources {
{ Gcs, $crate::source::filesystem::opendal_source::GcsProperties , $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalGcs> },
{ OpendalS3, $crate::source::filesystem::opendal_source::OpendalS3Properties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalS3> },
{ PosixFs, $crate::source::filesystem::opendal_source::PosixFsProperties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalPosixFs> },
- { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit}
+ { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit},
+ { Iceberg, $crate::source::iceberg::IcebergProperties, $crate::source::iceberg::IcebergSplit}
}
$(
,$extra_args
diff --git a/src/connector/src/parser/postgres.rs b/src/connector/src/parser/postgres.rs
index fe1906614698c..acfbe5e4ae435 100644
--- a/src/connector/src/parser/postgres.rs
+++ b/src/connector/src/parser/postgres.rs
@@ -24,6 +24,7 @@ use risingwave_common::types::{
};
use rust_decimal::Decimal as RustDecimal;
use thiserror_ext::AsReport;
+use tokio_postgres::types::Type;
static LOG_SUPPERSSER: LazyLock = LazyLock::new(LogSuppresser::default);
@@ -139,7 +140,29 @@ pub fn postgres_row_to_owned_row(row: tokio_postgres::Row, schema: &Schema) -> O
handle_data_type!(row, i, name, RustDecimal, Decimal)
}
DataType::Varchar => {
- handle_data_type!(row, i, name, String)
+ match row.columns()[i].type_() {
+ // Since we don't support UUID natively, adapt it to a VARCHAR column
+ &Type::UUID => {
+ let res = row.try_get::<_, Option>(i);
+ match res {
+ Ok(val) => val.map(|v| ScalarImpl::from(v.to_string())),
+ Err(err) => {
+ if let Ok(sc) = LOG_SUPPERSSER.check() {
+ tracing::error!(
+ suppressed_count = sc,
+ column_name = name,
+ error = %err.as_report(),
+ "parse uuid column failed",
+ );
+ }
+ None
+ }
+ }
+ }
+ _ => {
+ handle_data_type!(row, i, name, String)
+ }
+ }
}
DataType::Date => {
handle_data_type!(row, i, name, NaiveDate, Date)
diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs
index d4e38cac4d1c9..e6a654f75a5fd 100644
--- a/src/connector/src/sink/catalog/mod.rs
+++ b/src/connector/src/sink/catalog/mod.rs
@@ -205,7 +205,12 @@ impl TryFrom for SinkFormatDesc {
F::Plain => SinkFormat::AppendOnly,
F::Upsert => SinkFormat::Upsert,
F::Debezium => SinkFormat::Debezium,
- f @ (F::Unspecified | F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => {
+ f @ (F::Unspecified
+ | F::Native
+ | F::DebeziumMongo
+ | F::Maxwell
+ | F::Canal
+ | F::None) => {
return Err(SinkError::Config(anyhow!(
"sink format unsupported: {}",
f.as_str_name()
@@ -217,7 +222,7 @@ impl TryFrom for SinkFormatDesc {
E::Protobuf => SinkEncode::Protobuf,
E::Template => SinkEncode::Template,
E::Avro => SinkEncode::Avro,
- e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => {
+ e @ (E::Unspecified | E::Native | E::Csv | E::Bytes | E::None) => {
return Err(SinkError::Config(anyhow!(
"sink encode unsupported: {}",
e.as_str_name()
diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs
index 68c5654533a64..326f8586d76eb 100644
--- a/src/connector/src/sink/iceberg/mod.rs
+++ b/src/connector/src/sink/iceberg/mod.rs
@@ -927,7 +927,7 @@ impl SinkCommitCoordinator for IcebergSinkCommitter {
}
/// Try to match our schema with iceberg schema.
-fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> {
+pub fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> {
if rw_schema.fields.len() != arrow_schema.fields().len() {
return Err(SinkError::Iceberg(anyhow!(
"Schema length not match, ours is {}, and iceberg is {}",
diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs
index 11594133695d4..c5a0740b0736f 100644
--- a/src/connector/src/sink/starrocks.rs
+++ b/src/connector/src/sink/starrocks.rs
@@ -52,10 +52,10 @@ pub struct StarrocksCommon {
#[serde(rename = "starrocks.host")]
pub host: String,
/// The port to the MySQL server of StarRocks FE.
- #[serde(rename = "starrocks.mysqlport")]
+ #[serde(rename = "starrocks.mysqlport", alias = "starrocks.query_port")]
pub mysql_port: String,
/// The port to the HTTP server of StarRocks FE.
- #[serde(rename = "starrocks.httpport")]
+ #[serde(rename = "starrocks.httpport", alias = "starrocks.http_port")]
pub http_port: String,
/// The user name used to access the StarRocks database.
#[serde(rename = "starrocks.user")]
@@ -175,7 +175,7 @@ impl StarrocksSink {
Ok(starrocks_data_type.contains("varchar"))
}
risingwave_common::types::DataType::Time => Err(SinkError::Starrocks(
- "starrocks can not support Time".to_string(),
+ "TIME is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(),
)),
risingwave_common::types::DataType::Timestamp => {
Ok(starrocks_data_type.contains("datetime"))
@@ -184,24 +184,24 @@ impl StarrocksSink {
"TIMESTAMP WITH TIMEZONE is not supported for Starrocks sink as Starrocks doesn't store time values with timezone information. Please convert to TIMESTAMP first.".to_string(),
)),
risingwave_common::types::DataType::Interval => Err(SinkError::Starrocks(
- "starrocks can not support Interval".to_string(),
+ "INTERVAL is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(),
)),
// todo! Validate the type struct and list
risingwave_common::types::DataType::Struct(_) => Err(SinkError::Starrocks(
- "starrocks can not support import struct".to_string(),
+ "STRUCT is not supported for Starrocks sink.".to_string(),
)),
risingwave_common::types::DataType::List(_) => {
Ok(starrocks_data_type.contains("unknown"))
}
risingwave_common::types::DataType::Bytea => Err(SinkError::Starrocks(
- "starrocks can not support Bytea".to_string(),
+ "BYTEA is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(),
)),
risingwave_common::types::DataType::Jsonb => Ok(starrocks_data_type.contains("json")),
risingwave_common::types::DataType::Serial => {
Ok(starrocks_data_type.contains("bigint"))
}
risingwave_common::types::DataType::Int256 => Err(SinkError::Starrocks(
- "starrocks can not support Int256".to_string(),
+ "INT256 is not supported for Starrocks sink.".to_string(),
)),
}
}
diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs
index 5b909a2738f3c..fed8e0263aac4 100644
--- a/src/connector/src/source/base.rs
+++ b/src/connector/src/source/base.rs
@@ -150,7 +150,7 @@ pub struct SourceEnumeratorContext {
pub connector_client: Option,
}
-#[derive(Clone, Copy, Debug, Default)]
+#[derive(Clone, Debug, Default)]
pub struct SourceEnumeratorInfo {
pub source_id: u32,
}
diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs
index ae9490bca3c56..5fc6aefdfefdd 100644
--- a/src/connector/src/source/cdc/mod.rs
+++ b/src/connector/src/source/cdc/mod.rs
@@ -37,6 +37,8 @@ pub const CDC_SNAPSHOT_BACKFILL: &str = "rw_cdc_backfill";
pub const CDC_SHARING_MODE_KEY: &str = "rw.sharing.mode.enable";
// User can set snapshot='false' to disable cdc backfill
pub const CDC_BACKFILL_ENABLE_KEY: &str = "snapshot";
+// We enable transaction for shared cdc source by default
+pub const CDC_TRANSACTIONAL_KEY: &str = "transactional";
pub const MYSQL_CDC_CONNECTOR: &str = Mysql::CDC_CONNECTOR_NAME;
pub const POSTGRES_CDC_CONNECTOR: &str = Postgres::CDC_CONNECTOR_NAME;
diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs
new file mode 100644
index 0000000000000..e274f639f15b2
--- /dev/null
+++ b/src/connector/src/source/iceberg/mod.rs
@@ -0,0 +1,128 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use async_trait::async_trait;
+use risingwave_common::types::JsonbVal;
+use serde::{Deserialize, Serialize};
+
+use crate::parser::ParserConfig;
+use crate::source::{
+ BoxChunkSourceStream, Column, SourceContextRef, SourceEnumeratorContextRef, SourceProperties,
+ SplitEnumerator, SplitId, SplitMetaData, SplitReader, UnknownFields,
+};
+
+pub const ICEBERG_CONNECTOR: &str = "iceberg";
+
+#[derive(Clone, Debug, Deserialize, PartialEq, with_options::WithOptions)]
+pub struct IcebergProperties {
+ #[serde(rename = "catalog.type")]
+ pub catalog_type: String,
+ #[serde(rename = "s3.region")]
+ pub region_name: String,
+ #[serde(rename = "s3.endpoint", default)]
+ pub endpoint: String,
+ #[serde(rename = "s3.access.key", default)]
+ pub s3_access: String,
+ #[serde(rename = "s3.secret.key", default)]
+ pub s3_secret: String,
+ #[serde(rename = "warehouse.path")]
+ pub warehouse_path: String,
+ #[serde(rename = "database.name")]
+ pub database_name: String,
+ #[serde(rename = "table.name")]
+ pub table_name: String,
+
+ #[serde(flatten)]
+ pub unknown_fields: HashMap,
+}
+
+impl SourceProperties for IcebergProperties {
+ type Split = IcebergSplit;
+ type SplitEnumerator = IcebergSplitEnumerator;
+ type SplitReader = IcebergFileReader;
+
+ const SOURCE_NAME: &'static str = ICEBERG_CONNECTOR;
+}
+
+impl UnknownFields for IcebergProperties {
+ fn unknown_fields(&self) -> HashMap {
+ self.unknown_fields.clone()
+ }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
+pub struct IcebergSplit {}
+
+impl SplitMetaData for IcebergSplit {
+ fn id(&self) -> SplitId {
+ unimplemented!()
+ }
+
+ fn restore_from_json(_value: JsonbVal) -> anyhow::Result {
+ unimplemented!()
+ }
+
+ fn encode_to_json(&self) -> JsonbVal {
+ unimplemented!()
+ }
+
+ fn update_with_offset(&mut self, _start_offset: String) -> anyhow::Result<()> {
+ unimplemented!()
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct IcebergSplitEnumerator {}
+
+#[async_trait]
+impl SplitEnumerator for IcebergSplitEnumerator {
+ type Properties = IcebergProperties;
+ type Split = IcebergSplit;
+
+ async fn new(
+ _properties: Self::Properties,
+ _context: SourceEnumeratorContextRef,
+ ) -> anyhow::Result {
+ Ok(Self {})
+ }
+
+ async fn list_splits(&mut self) -> anyhow::Result> {
+ Ok(vec![])
+ }
+}
+
+#[derive(Debug)]
+pub struct IcebergFileReader {}
+
+#[async_trait]
+impl SplitReader for IcebergFileReader {
+ type Properties = IcebergProperties;
+ type Split = IcebergSplit;
+
+ async fn new(
+ _props: IcebergProperties,
+ _splits: Vec,
+ _parser_config: ParserConfig,
+ _source_ctx: SourceContextRef,
+ _columns: Option>,
+ ) -> anyhow::Result {
+ unimplemented!()
+ }
+
+ fn into_stream(self) -> BoxChunkSourceStream {
+ unimplemented!()
+ }
+}
diff --git a/src/connector/src/source/mod.rs b/src/connector/src/source/mod.rs
index cba63b3005c1a..3656820ed95b0 100644
--- a/src/connector/src/source/mod.rs
+++ b/src/connector/src/source/mod.rs
@@ -31,6 +31,7 @@ pub use kafka::KAFKA_CONNECTOR;
pub use kinesis::KINESIS_CONNECTOR;
pub use nats::NATS_CONNECTOR;
mod common;
+pub mod iceberg;
mod manager;
pub mod reader;
pub mod test_source;
diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml
index 74cb5c21e9c7f..2b23913a1fc32 100644
--- a/src/connector/with_options_sink.yaml
+++ b/src/connector/with_options_sink.yaml
@@ -466,10 +466,12 @@ StarrocksConfig:
field_type: String
comments: The port to the MySQL server of StarRocks FE.
required: true
+ alias: starrocks.query_port
- name: starrocks.httpport
field_type: String
comments: The port to the HTTP server of StarRocks FE.
required: true
+ alias: starrocks.http_port
- name: starrocks.user
field_type: String
comments: The user name used to access the StarRocks database.
diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml
index 2d811ce639c96..dec3cf6a8941a 100644
--- a/src/connector/with_options_source.yaml
+++ b/src/connector/with_options_source.yaml
@@ -33,6 +33,35 @@ GcsProperties:
field_type: String
required: false
default: Default::default
+IcebergProperties:
+ fields:
+ - name: catalog.type
+ field_type: String
+ required: true
+ - name: s3.region
+ field_type: String
+ required: true
+ - name: s3.endpoint
+ field_type: String
+ required: false
+ default: Default::default
+ - name: s3.access.key
+ field_type: String
+ required: false
+ default: Default::default
+ - name: s3.secret.key
+ field_type: String
+ required: false
+ default: Default::default
+ - name: warehouse.path
+ field_type: String
+ required: true
+ - name: database.name
+ field_type: String
+ required: true
+ - name: table.name
+ field_type: String
+ required: true
KafkaProperties:
fields:
- name: bytes.per.second
diff --git a/src/expr/impl/benches/expr.rs b/src/expr/impl/benches/expr.rs
index fc2ad441cfb96..8468ae86e241b 100644
--- a/src/expr/impl/benches/expr.rs
+++ b/src/expr/impl/benches/expr.rs
@@ -304,7 +304,7 @@ fn bench_expr(c: &mut Criterion) {
}
if [
"date_trunc(character varying, timestamp with time zone) -> timestamp with time zone",
- "to_timestamp1(character varying, character varying) -> timestamp with time zone",
+ "char_to_timestamptz(character varying, character varying) -> timestamp with time zone",
"to_char(timestamp with time zone, character varying) -> character varying",
]
.contains(&format!("{sig:?}").as_str())
@@ -321,12 +321,12 @@ fn bench_expr(c: &mut Criterion) {
for (i, t) in sig.inputs_type.iter().enumerate() {
use DataType::*;
let idx = match (sig.name.as_scalar(), i) {
- (PbType::ToTimestamp1, 0) => TIMESTAMP_FORMATTED_STRING,
- (PbType::ToChar | PbType::ToTimestamp1, 1) => {
+ (PbType::CharToTimestamptz, 0) => TIMESTAMP_FORMATTED_STRING,
+ (PbType::ToChar | PbType::CharToTimestamptz, 1) => {
children.push(string_literal("YYYY/MM/DD HH:MM:SS"));
continue;
}
- (PbType::ToChar | PbType::ToTimestamp1, 2) => {
+ (PbType::ToChar | PbType::CharToTimestamptz, 2) => {
children.push(string_literal("Australia/Sydney"));
continue;
}
diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs
index dc81e3ab77bac..bf8afc7712f93 100644
--- a/src/expr/impl/src/scalar/cast.rs
+++ b/src/expr/impl/src/scalar/cast.rs
@@ -87,6 +87,7 @@ pub fn jsonb_to_number>(v: JsonbRef<'_>) -> Result {
#[function("cast(int4) -> int2")]
#[function("cast(int8) -> int2")]
#[function("cast(int8) -> int4")]
+#[function("cast(serial) -> int8")]
#[function("cast(float4) -> int2")]
#[function("cast(float8) -> int2")]
#[function("cast(float4) -> int4")]
diff --git a/src/expr/impl/src/scalar/timestamptz.rs b/src/expr/impl/src/scalar/timestamptz.rs
index 06433d25f2892..83e77011ec6be 100644
--- a/src/expr/impl/src/scalar/timestamptz.rs
+++ b/src/expr/impl/src/scalar/timestamptz.rs
@@ -28,7 +28,7 @@ pub fn time_zone_err(inner_err: String) -> ExprError {
}
}
-#[function("to_timestamp(float8) -> timestamptz")]
+#[function("sec_to_timestamptz(float8) -> timestamptz")]
pub fn f64_sec_to_timestamptz(elem: F64) -> Result {
// TODO(#4515): handle +/- infinity
let micros = (elem.0 * 1e6)
diff --git a/src/expr/impl/src/scalar/to_timestamp.rs b/src/expr/impl/src/scalar/to_timestamp.rs
index 3b3e4eaa90db6..d15703dbb78aa 100644
--- a/src/expr/impl/src/scalar/to_timestamp.rs
+++ b/src/expr/impl/src/scalar/to_timestamp.rs
@@ -66,7 +66,7 @@ fn parse(s: &str, tmpl: &ChronoPattern) -> Result {
}
#[function(
- "to_timestamp1(varchar, varchar) -> timestamp",
+ "char_to_timestamptz(varchar, varchar) -> timestamp",
prebuild = "ChronoPattern::compile($1)",
deprecated
)]
@@ -81,7 +81,7 @@ pub fn to_timestamp_legacy(s: &str, tmpl: &ChronoPattern) -> Result {
}
#[function(
- "to_timestamp1(varchar, varchar, varchar) -> timestamptz",
+ "char_to_timestamptz(varchar, varchar, varchar) -> timestamptz",
prebuild = "ChronoPattern::compile($1)"
)]
pub fn to_timestamp(s: &str, timezone: &str, tmpl: &ChronoPattern) -> Result {
@@ -93,7 +93,7 @@ pub fn to_timestamp(s: &str, timezone: &str, tmpl: &ChronoPattern) -> Result timestamptz", rewritten)]
+#[function("char_to_timestamptz(varchar, varchar) -> timestamptz", rewritten)]
fn _to_timestamp1() {}
#[function(
diff --git a/src/frontend/macro/src/lib.rs b/src/frontend/macro/src/lib.rs
index 8ba10a9f4454a..36b7f33eb99c0 100644
--- a/src/frontend/macro/src/lib.rs
+++ b/src/frontend/macro/src/lib.rs
@@ -117,11 +117,15 @@ fn gen_sys_table(attr: Attr, item_fn: ItemFn) -> Result {
#[linkme::distributed_slice(crate::catalog::system_catalog::SYS_CATALOGS_SLICE)]
#[no_mangle] // to prevent duplicate schema.table name
fn #gen_fn_name() -> crate::catalog::system_catalog::BuiltinCatalog {
+ const _: () = {
+ assert!(#struct_type::PRIMARY_KEY.is_some(), "primary key is required for system table");
+ };
+
crate::catalog::system_catalog::BuiltinCatalog::Table(crate::catalog::system_catalog::BuiltinTable {
name: #table_name,
schema: #schema_name,
columns: #struct_type::fields(),
- pk: #struct_type::primary_key(),
+ pk: #struct_type::PRIMARY_KEY.unwrap(),
function: |reader| std::boxed::Box::pin(async {
let rows = #user_fn_name(reader) #_await #handle_error;
let mut builder = #struct_type::data_chunk_builder(rows.len() + 1);
diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs
index b787632846e98..22fc4ce99c45f 100644
--- a/src/frontend/src/binder/expr/function.rs
+++ b/src/frontend/src/binder/expr/function.rs
@@ -959,8 +959,8 @@ impl Binder {
(
"to_timestamp",
dispatch_by_len(vec![
- (1, raw_call(ExprType::ToTimestamp)),
- (2, raw_call(ExprType::ToTimestamp1)),
+ (1, raw_call(ExprType::SecToTimestamptz)),
+ (2, raw_call(ExprType::CharToTimestamptz)),
]),
),
("date_trunc", raw_call(ExprType::DateTrunc)),
diff --git a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs
index 074b772ca0bb8..a9a0d8fc4f1b5 100644
--- a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs
+++ b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs
@@ -34,13 +34,37 @@ use risingwave_frontend_macro::system_catalog;
NULL::integer AS numeric_scale,
c.position AS ordinal_position,
'YES' AS is_nullable,
- NULL AS collation_name,
- 'pg_catalog' AS udt_schema,
CASE
WHEN c.data_type = 'varchar' THEN 'character varying'
ELSE c.data_type
END AS data_type,
- c.udt_type AS udt_name
+ CURRENT_DATABASE() AS udt_catalog,
+ 'pg_catalog' AS udt_schema,
+ c.udt_type AS udt_name,
+ NULL AS character_set_catalog,
+ NULL AS character_set_schema,
+ NULL AS character_set_name,
+ NULL AS collation_catalog,
+ NULL AS collation_schema,
+ NULL AS collation_name,
+ NULL AS domain_catalog,
+ NULL AS domain_schema,
+ NULL AS domain_name,
+ NULL AS scope_catalog,
+ NULL AS scope_schema,
+ NULL AS scope_name,
+ 'NO' AS is_identity,
+ NULL AS identity_generation,
+ NULL AS identity_start,
+ NULL AS identity_increment,
+ NULL AS identity_maximum,
+ NULL AS identity_minimum,
+ NULL AS identity_cycle,
+ CASE
+ WHEN c.is_generated THEN 'ALWAYS'
+ ELSE 'NEVER'
+ END AS is_generated,
+ c.generation_expression
FROM rw_catalog.rw_columns c
LEFT JOIN rw_catalog.rw_relations r ON c.relation_id = r.id
JOIN rw_catalog.rw_schemas s ON s.id = r.schema_id
@@ -58,8 +82,29 @@ struct Column {
numeric_scale: i32,
ordinal_position: i32,
is_nullable: String,
- collation_name: String,
- udt_schema: String,
data_type: String,
+ udt_catalog: String,
+ udt_schema: String,
udt_name: String,
+ character_set_catalog: String,
+ character_set_schema: String,
+ character_set_name: String,
+ collation_catalog: String,
+ collation_schema: String,
+ collation_name: String,
+ domain_catalog: String,
+ domain_schema: String,
+ domain_name: String,
+ scope_catalog: String,
+ scope_schema: String,
+ scope_name: String,
+ is_identity: String,
+ identity_generation: String,
+ identity_start: String,
+ identity_increment: String,
+ identity_maximum: String,
+ identity_minimum: String,
+ identity_cycle: String,
+ is_generated: String,
+ generation_expression: String,
}
diff --git a/src/frontend/src/catalog/system_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/mod.rs
index 61ec69b77ae5a..18a4757601aed 100644
--- a/src/frontend/src/catalog/system_catalog/mod.rs
+++ b/src/frontend/src/catalog/system_catalog/mod.rs
@@ -31,6 +31,7 @@ use risingwave_common::catalog::{
};
use risingwave_common::error::BoxedError;
use risingwave_common::session_config::ConfigMap;
+use risingwave_common::system_param::local_manager::SystemParamsReaderRef;
use risingwave_common::types::DataType;
use risingwave_pb::meta::list_table_fragment_states_response::TableFragmentState;
use risingwave_pb::meta::table_parallelism::{PbFixedParallelism, PbParallelism};
@@ -110,6 +111,8 @@ pub struct SysCatalogReaderImpl {
auth_context: Arc,
// Read config.
config: Arc>,
+ // Read system params.
+ system_params: SystemParamsReaderRef,
}
impl SysCatalogReaderImpl {
@@ -120,6 +123,7 @@ impl SysCatalogReaderImpl {
meta_client: Arc,
auth_context: Arc,
config: Arc>,
+ system_params: SystemParamsReaderRef,
) -> Self {
Self {
catalog_reader,
@@ -128,6 +132,7 @@ impl SysCatalogReaderImpl {
meta_client,
auth_context,
config,
+ system_params,
}
}
}
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs
index c1a935803f9f4..ce97aeaac552c 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs
@@ -35,6 +35,7 @@ mod pg_matviews;
mod pg_namespace;
mod pg_opclass;
mod pg_operator;
+mod pg_partitioned_table;
mod pg_proc;
mod pg_roles;
mod pg_settings;
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
index c13e87f162afe..11bcabcde0f69 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs
@@ -22,6 +22,7 @@ use crate::expr::cast_map_array;
/// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-cast.html`]
#[derive(Fields)]
struct PgCast {
+ #[primary_key]
oid: i32,
castsource: i32,
casttarget: i32,
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs
index 196c36ec7f1af..2dfb15f9e527b 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs
@@ -28,7 +28,8 @@ use risingwave_frontend_macro::system_catalog;
ARRAY[]::smallint[] as indoption,
NULL AS indexprs,
NULL AS indpred,
- FALSE AS indisprimary
+ FALSE AS indisprimary,
+ ARRAY[]::int[] AS indclass
FROM rw_catalog.rw_indexes"
)]
#[derive(Fields)]
@@ -46,4 +47,6 @@ struct PgIndex {
indpred: Option,
// TODO: we return false as the default value.
indisprimary: bool,
+ // Empty array. We only have a dummy implementation of `pg_opclass` yet.
+ indclass: Vec,
}
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs
new file mode 100644
index 0000000000000..e11739e2609fd
--- /dev/null
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs
@@ -0,0 +1,30 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use risingwave_common::types::Fields;
+use risingwave_frontend_macro::system_catalog;
+
+/// The catalog `pg_partitioned_table` stores information about how tables are partitioned. Reference: [`https://www.postgresql.org/docs/current/catalog-pg-partitioned-table.html`]
+#[system_catalog(view, "pg_catalog.pg_partitioned_table")]
+#[derive(Fields)]
+struct PgPartitionedTable {
+ partrelid: i32,
+ partstrat: String,
+ partnatts: i16,
+ partdefid: i32,
+ partattrs: Vec,
+ partclass: Vec,
+ partcollation: Vec,
+ partexprs: Option,
+}
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs
index 4fc0fb057108f..58d44b1aef92b 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs
@@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-use risingwave_common::types::Fields;
+use risingwave_common::system_param::reader::SystemParamsRead;
+use risingwave_common::types::{DataType, Datum, Fields, ToOwnedDatum, WithDataType};
use risingwave_frontend_macro::system_catalog;
use crate::catalog::system_catalog::SysCatalogReaderImpl;
@@ -20,23 +21,75 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl;
/// The catalog `pg_settings` stores settings.
/// Ref: [`https://www.postgresql.org/docs/current/view-pg-settings.html`]
#[derive(Fields)]
+#[primary_key(name, context)]
struct PgSetting {
name: String,
setting: String,
short_desc: String,
+ context: Context,
+}
+
+/// Context required to set the parameter's value.
+///
+/// Note that we do not strictly follow the PostgreSQL's semantics for each variant
+/// but only pick the minimum set of variants required for features like tab-completion.
+#[derive(Clone, Copy)]
+enum Context {
+ /// Used for immutable system parameters.
+ Internal,
+
+ /// Used for mutable system parameters.
+ // TODO: `postmaster` means that changes require a restart of the server. This is
+ // not accurate for all system parameters. Use lower contexts once we guarantee about
+ // the behavior of each parameter.
+ Postmaster,
+
+ /// Used for session variables.
+ // TODO: There might be variables that can only be set by superusers in the future.
+ // Should use `superuser` context then.
+ User,
+}
+
+impl WithDataType for Context {
+ fn default_data_type() -> DataType {
+ DataType::Varchar
+ }
+}
+
+impl ToOwnedDatum for Context {
+ fn to_owned_datum(self) -> Datum {
+ match self {
+ Context::Internal => "internal",
+ Context::Postmaster => "postmaster",
+ Context::User => "user",
+ }
+ .to_owned_datum()
+ }
}
#[system_catalog(table, "pg_catalog.pg_settings")]
fn read_pg_settings(reader: &SysCatalogReaderImpl) -> Vec {
- let config_reader = reader.config.read();
- let all_variables = config_reader.show_all();
+ let variables = (reader.config.read().show_all())
+ .into_iter()
+ .map(|info| PgSetting {
+ name: info.name,
+ setting: info.setting,
+ short_desc: info.description,
+ context: Context::User,
+ });
- all_variables
- .iter()
+ let system_params = (reader.system_params.load().get_all())
+ .into_iter()
.map(|info| PgSetting {
- name: info.name.clone(),
- setting: info.setting.clone(),
- short_desc: info.description.clone(),
- })
- .collect()
+ name: info.name.to_owned(),
+ setting: info.value,
+ short_desc: info.description.to_owned(),
+ context: if info.mutable {
+ Context::Postmaster
+ } else {
+ Context::Internal
+ },
+ });
+
+ variables.chain(system_params).collect()
}
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs
index 40760df81a492..8491da7062711 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs
@@ -17,6 +17,7 @@ use risingwave_frontend_macro::system_catalog;
use crate::catalog::system_catalog::SysCatalogReaderImpl;
use crate::error::Result;
+use crate::expr::{ExprDisplay, ExprImpl};
#[derive(Fields)]
#[primary_key(relation_id, name)]
@@ -27,6 +28,8 @@ struct RwColumn {
is_hidden: bool,
is_primary_key: bool,
is_distribution_key: bool,
+ is_generated: bool,
+ generation_expression: Option,
data_type: String,
type_oid: i32,
type_len: i16,
@@ -51,6 +54,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> {
is_hidden: false,
is_primary_key: false,
is_distribution_key: false,
+ is_generated: false,
+ generation_expression: None,
data_type: column.data_type().to_string(),
type_oid: column.data_type().to_oid(),
type_len: column.data_type().type_len(),
@@ -71,6 +76,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> {
is_hidden: column.is_hidden,
is_primary_key: sink.downstream_pk.contains(&index),
is_distribution_key: sink.distribution_key.contains(&index),
+ is_generated: false,
+ generation_expression: None,
data_type: column.data_type().to_string(),
type_oid: column.data_type().to_oid(),
type_len: column.data_type().type_len(),
@@ -93,6 +100,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> {
is_hidden: column.is_hidden,
is_primary_key: table.pk.contains(&index),
is_distribution_key: false,
+ is_generated: false,
+ generation_expression: None,
data_type: column.data_type().to_string(),
type_oid: column.data_type().to_oid(),
type_len: column.data_type().type_len(),
@@ -104,6 +113,7 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> {
let table_rows = schema
.iter_valid_table()
.flat_map(|table| {
+ let schema = table.column_schema();
table
.columns
.iter()
@@ -115,6 +125,15 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> {
is_hidden: column.is_hidden,
is_primary_key: table.pk().iter().any(|idx| idx.column_index == index),
is_distribution_key: table.distribution_key.contains(&index),
+ is_generated: column.is_generated(),
+ generation_expression: column.generated_expr().map(|expr_node| {
+ let expr = ExprImpl::from_expr_proto(expr_node).unwrap();
+ let expr_display = ExprDisplay {
+ expr: &expr,
+ input_schema: &schema,
+ };
+ expr_display.to_string()
+ }),
data_type: column.data_type().to_string(),
type_oid: column.data_type().to_oid(),
type_len: column.data_type().type_len(),
@@ -138,6 +157,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> {
is_hidden: column.is_hidden,
is_primary_key: source.pk_col_ids.contains(&column.column_id()),
is_distribution_key: false,
+ is_generated: false,
+ generation_expression: None,
data_type: column.data_type().to_string(),
type_oid: column.data_type().to_oid(),
type_len: column.data_type().type_len(),
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs
index 2699503a2fdd5..443fa255f4398 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs
@@ -19,6 +19,7 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl;
use crate::error::Result;
#[derive(Fields)]
+#[primary_key(object_id, sst_id)] // TODO: is this correct?
struct RwHummockBranchedObject {
object_id: i64,
sst_id: i64,
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs
index ac2b96bdc0023..e4f18c8fecaf3 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs
@@ -20,6 +20,7 @@ use crate::error::Result;
#[derive(Fields)]
struct RwHummockPinnedSnapshot {
+ #[primary_key]
worker_node_id: i32,
min_pinned_snapshot_id: i64,
}
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs
index 45a8e23f0ecc5..c0a9dd9e7fc45 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs
@@ -20,6 +20,7 @@ use crate::error::Result;
#[derive(Fields)]
struct RwHummockPinnedVersion {
+ #[primary_key]
worker_node_id: i32,
min_pinned_version_id: i64,
}
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
index 5551170e57a6f..37d1ceb6486ea 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
@@ -22,6 +22,7 @@ use crate::error::Result;
#[derive(Fields)]
struct RwHummockVersion {
+ #[primary_key]
version_id: i64,
max_committed_epoch: i64,
safe_epoch: i64,
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs
index ebb969cac462f..f31b1f7c67c5c 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs
@@ -21,6 +21,7 @@ use crate::error::Result;
#[derive(Fields)]
struct RwMetaSnapshot {
+ #[primary_key]
meta_snapshot_id: i64,
hummock_version_id: i64,
// the smallest epoch this meta snapshot includes
diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs
index fbb77a0ca0bb5..edb458997e33f 100644
--- a/src/frontend/src/catalog/table_catalog.rs
+++ b/src/frontend/src/catalog/table_catalog.rs
@@ -17,7 +17,7 @@ use std::collections::{HashMap, HashSet};
use fixedbitset::FixedBitSet;
use itertools::Itertools;
use risingwave_common::catalog::{
- ColumnCatalog, ConflictBehavior, TableDesc, TableId, TableVersionId,
+ ColumnCatalog, ConflictBehavior, Field, Schema, TableDesc, TableId, TableVersionId,
};
use risingwave_common::util::epoch::Epoch;
use risingwave_common::util::sort_util::ColumnOrder;
@@ -74,6 +74,8 @@ pub struct TableCatalog {
pub name: String,
+ pub dependent_relations: Vec,
+
/// All columns in this table.
pub columns: Vec,
@@ -492,6 +494,15 @@ impl TableCatalog {
pub fn has_generated_column(&self) -> bool {
self.columns.iter().any(|c| c.is_generated())
}
+
+ pub fn column_schema(&self) -> Schema {
+ Schema::new(
+ self.columns
+ .iter()
+ .map(|c| Field::from(&c.column_desc))
+ .collect(),
+ )
+ }
}
impl From for TableCatalog {
@@ -564,6 +575,11 @@ impl From for TableCatalog {
created_at_cluster_version: tb.created_at_cluster_version.clone(),
initialized_at_cluster_version: tb.initialized_at_cluster_version.clone(),
retention_seconds: tb.retention_seconds,
+ dependent_relations: tb
+ .dependent_relations
+ .into_iter()
+ .map(TableId::from)
+ .collect_vec(),
}
}
}
@@ -715,6 +731,7 @@ mod tests {
incoming_sinks: vec![],
created_at_cluster_version: None,
initialized_at_cluster_version: None,
+ dependent_relations: vec![],
}
);
assert_eq!(table, TableCatalog::from(table.to_prost(0, 0)));
diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs
index 5528b4614c355..4a7fc95cfd7ba 100644
--- a/src/frontend/src/expr/pure.rs
+++ b/src/frontend/src/expr/pure.rs
@@ -60,13 +60,13 @@ impl ExprVisitor for ImpureAnalyzer {
| expr_node::Type::Extract
| expr_node::Type::DatePart
| expr_node::Type::TumbleStart
- | expr_node::Type::ToTimestamp
+ | expr_node::Type::SecToTimestamptz
| expr_node::Type::AtTimeZone
| expr_node::Type::DateTrunc
| expr_node::Type::MakeDate
| expr_node::Type::MakeTime
| expr_node::Type::MakeTimestamp
- | expr_node::Type::ToTimestamp1
+ | expr_node::Type::CharToTimestamptz
| expr_node::Type::CharToDate
| expr_node::Type::CastWithTimeZone
| expr_node::Type::AddWithTimeZone
diff --git a/src/frontend/src/expr/session_timezone.rs b/src/frontend/src/expr/session_timezone.rs
index 8382e51398419..5ab35726c176b 100644
--- a/src/frontend/src/expr/session_timezone.rs
+++ b/src/frontend/src/expr/session_timezone.rs
@@ -216,9 +216,9 @@ impl SessionTimezone {
new_inputs.push(ExprImpl::literal_varchar(self.timezone()));
Some(FunctionCall::new(func_type, new_inputs).unwrap().into())
}
- // `to_timestamp1(input_string, format_string)`
- // => `to_timestamp1(input_string, format_string, zone_string)`
- ExprType::ToTimestamp1 => {
+ // `char_to_timestamptz(input_string, format_string)`
+ // => `char_to_timestamptz(input_string, format_string, zone_string)`
+ ExprType::CharToTimestamptz => {
if !(inputs.len() == 2
&& inputs[0].return_type() == DataType::Varchar
&& inputs[1].return_type() == DataType::Varchar)
diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs
index aa7e1c8ee9192..1f1a96e92b826 100644
--- a/src/frontend/src/expr/type_inference/cast.rs
+++ b/src/frontend/src/expr/type_inference/cast.rs
@@ -216,22 +216,23 @@ pub static CAST_MAP: LazyLock = LazyLock::new(|| {
use DataTypeName::*;
const CAST_TABLE: &[(&str, DataTypeName)] = &[
// 123456789ABCDEF
- (". e a", Boolean), // 0
- (" .iiiiii a", Int16), // 1
- ("ea.iiiii a", Int32), // 2
- (" aa.iiii a", Int64), // 3
- (" aaa.ii a", Decimal), // 4
- (" aaaa.i a", Float32), // 5
- (" aaaaa. a", Float64), // 6
- (" e. a", Int256), // 7
- (" .ii a", Date), // 8
- (" a.ia a", Timestamp), // 9
- (" aa.a a", Timestamptz), // A
- (" .i a", Time), // B
- (" a. a", Interval), // C
- ("eeeeeee . a", Jsonb), // D
- (" .a", Bytea), // E
- ("eeeeeeeeeeeeeee.", Varchar), // F
+ (". e a ", Boolean), // 0
+ (" .iiiiii a ", Int16), // 1
+ ("ea.iiiii a ", Int32), // 2
+ (" aa.iiii a ", Int64), // 3
+ (" aaa.ii a ", Decimal), // 4
+ (" aaaa.i a ", Float32), // 5
+ (" aaaaa. a ", Float64), // 6
+ (" e. a ", Int256), // 7
+ (" .ii a ", Date), // 8
+ (" a.ia a ", Timestamp), // 9
+ (" aa.a a ", Timestamptz), // A
+ (" .i a ", Time), // B
+ (" a. a ", Interval), // C
+ ("eeeeeee . a ", Jsonb), // D
+ (" .a ", Bytea), // E
+ ("eeeeeeeeeeeeeee. ", Varchar), // F
+ (" e .", Serial),
];
let mut map = BTreeMap::new();
for (row, source) in CAST_TABLE {
diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs
index 259c7400dc996..9db25b3dc554e 100644
--- a/src/frontend/src/expr/utils.rs
+++ b/src/frontend/src/expr/utils.rs
@@ -498,11 +498,23 @@ impl WatermarkAnalyzer {
_ => WatermarkDerivation::None,
},
ExprType::Subtract | ExprType::TumbleStart => {
- match self.visit_binary_op(func_call.inputs()) {
- (Constant, Constant) => Constant,
- (Watermark(idx), Constant) => Watermark(idx),
- (Nondecreasing, Constant) => Nondecreasing,
- _ => WatermarkDerivation::None,
+ if func_call.inputs().len() == 3 {
+ // With `offset` specified
+ // e.g., select * from tumble(t1, start, interval, offset);
+ assert_eq!(ExprType::TumbleStart, func_call.func_type());
+ match self.visit_ternary_op(func_call.inputs()) {
+ (Constant, Constant, Constant) => Constant,
+ (Watermark(idx), Constant, Constant) => Watermark(idx),
+ (Nondecreasing, Constant, Constant) => Nondecreasing,
+ _ => WatermarkDerivation::None,
+ }
+ } else {
+ match self.visit_binary_op(func_call.inputs()) {
+ (Constant, Constant) => Constant,
+ (Watermark(idx), Constant) => Watermark(idx),
+ (Nondecreasing, Constant) => Nondecreasing,
+ _ => WatermarkDerivation::None,
+ }
}
}
ExprType::Multiply | ExprType::Divide | ExprType::Modulus => {
@@ -577,8 +589,8 @@ impl WatermarkAnalyzer {
},
_ => unreachable!(),
},
- ExprType::ToTimestamp => self.visit_unary_op(func_call.inputs()),
- ExprType::ToTimestamp1 => WatermarkDerivation::None,
+ ExprType::SecToTimestamptz => self.visit_unary_op(func_call.inputs()),
+ ExprType::CharToTimestamptz => WatermarkDerivation::None,
ExprType::Cast => {
// TODO: need more derivation
WatermarkDerivation::None
diff --git a/src/frontend/src/handler/alter_source_with_sr.rs b/src/frontend/src/handler/alter_source_with_sr.rs
index a8e6892e5a908..06bb2d0387479 100644
--- a/src/frontend/src/handler/alter_source_with_sr.rs
+++ b/src/frontend/src/handler/alter_source_with_sr.rs
@@ -42,6 +42,7 @@ fn format_type_to_format(from: FormatType) -> Option {
FormatType::Canal => Format::Canal,
FormatType::Upsert => Format::Upsert,
FormatType::Plain => Format::Plain,
+ FormatType::None => Format::None,
})
}
@@ -55,6 +56,7 @@ fn encode_type_to_encode(from: EncodeType) -> Option {
EncodeType::Json => Encode::Json,
EncodeType::Bytes => Encode::Bytes,
EncodeType::Template => Encode::Template,
+ EncodeType::None => Encode::None,
})
}
diff --git a/src/frontend/src/handler/cancel_job.rs b/src/frontend/src/handler/cancel_job.rs
index f124a2a030bd1..278e01e3e1bc0 100644
--- a/src/frontend/src/handler/cancel_job.rs
+++ b/src/frontend/src/handler/cancel_job.rs
@@ -12,14 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-use itertools::Itertools;
-use pgwire::pg_field_descriptor::PgFieldDescriptor;
use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
-use risingwave_common::types::DataType;
+use risingwave_common::types::Fields;
use risingwave_pb::meta::cancel_creating_jobs_request::{CreatingJobIds, PbJobs};
use risingwave_sqlparser::ast::JobIdents;
+use super::RwPgResponseBuilderExt;
use crate::error::Result;
use crate::handler::{HandlerArgs, RwPgResponse};
@@ -36,16 +34,14 @@ pub(super) async fn handle_cancel(
.await?;
let rows = canceled_jobs
.into_iter()
- .map(|id| Row::new(vec![Some(id.to_string().into())]))
- .collect_vec();
+ .map(|id| CancelRow { id: id.to_string() });
Ok(PgResponse::builder(StatementType::CANCEL_COMMAND)
- .values(
- rows.into(),
- vec![PgFieldDescriptor::new(
- "Id".to_string(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- )],
- )
+ .rows(rows)
.into())
}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct CancelRow {
+ id: String,
+}
diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
index de8e93e04a784..245976bd913b9 100644
--- a/src/frontend/src/handler/create_sink.rs
+++ b/src/frontend/src/handler/create_sink.rs
@@ -504,7 +504,7 @@ fn check_cycle_for_sink(
if let Ok(table) = reader.get_table_by_id(table_id) {
visit_table(session, reader, sink_index, table.as_ref(), visited_tables)?
} else {
- bail!("table not found: {:?}", table_id);
+ bail!("streaming job not found: {:?}", table_id);
}
}
@@ -533,6 +533,14 @@ fn check_cycle_for_sink(
}
}
+ for table_id in &table.dependent_relations {
+ if let Ok(table) = reader.get_table_by_id(table_id) {
+ visit_table(session, reader, sink_index, table.as_ref(), visited_tables)?
+ } else {
+ bail!("streaming job not found: {:?}", table_id);
+ }
+ }
+
Ok(())
}
@@ -693,7 +701,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result {
F::Plain => SinkFormat::AppendOnly,
F::Upsert => SinkFormat::Upsert,
F::Debezium => SinkFormat::Debezium,
- f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => {
+ f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal | F::None) => {
return Err(ErrorCode::BindError(format!("sink format unsupported: {f}")).into());
}
};
@@ -702,7 +710,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result {
E::Protobuf => SinkEncode::Protobuf,
E::Avro => SinkEncode::Avro,
E::Template => SinkEncode::Template,
- e @ (E::Native | E::Csv | E::Bytes) => {
+ e @ (E::Native | E::Csv | E::Bytes | E::None) => {
return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into());
}
};
diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs
index 0ab4d9ec3a416..bbb2d93b21790 100644
--- a/src/frontend/src/handler/create_source.rs
+++ b/src/frontend/src/handler/create_source.rs
@@ -16,13 +16,13 @@ use std::collections::{BTreeMap, HashMap};
use std::rc::Rc;
use std::sync::LazyLock;
-use anyhow::Context;
+use anyhow::{anyhow, Context};
use either::Either;
use itertools::Itertools;
use maplit::{convert_args, hashmap};
use pgwire::pg_response::{PgResponse, StatementType};
use risingwave_common::catalog::{
- is_column_ids_dedup, ColumnCatalog, ColumnDesc, TableId, INITIAL_SOURCE_VERSION_ID,
+ is_column_ids_dedup, ColumnCatalog, ColumnDesc, Schema, TableId, INITIAL_SOURCE_VERSION_ID,
KAFKA_TIMESTAMP_COLUMN_NAME,
};
use risingwave_common::types::DataType;
@@ -36,17 +36,20 @@ use risingwave_connector::parser::{
use risingwave_connector::schema::schema_registry::{
name_strategy_from_str, SchemaRegistryAuth, SCHEMA_REGISTRY_PASSWORD, SCHEMA_REGISTRY_USERNAME,
};
+use risingwave_connector::sink::iceberg::IcebergConfig;
use risingwave_connector::source::cdc::external::CdcTableType;
use risingwave_connector::source::cdc::{
- CDC_SHARING_MODE_KEY, CDC_SNAPSHOT_BACKFILL, CDC_SNAPSHOT_MODE_KEY, CITUS_CDC_CONNECTOR,
- MYSQL_CDC_CONNECTOR, POSTGRES_CDC_CONNECTOR,
+ CDC_SHARING_MODE_KEY, CDC_SNAPSHOT_BACKFILL, CDC_SNAPSHOT_MODE_KEY, CDC_TRANSACTIONAL_KEY,
+ CITUS_CDC_CONNECTOR, MYSQL_CDC_CONNECTOR, POSTGRES_CDC_CONNECTOR,
};
use risingwave_connector::source::datagen::DATAGEN_CONNECTOR;
+use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR;
use risingwave_connector::source::nexmark::source::{get_event_data_types_with_names, EventType};
use risingwave_connector::source::test_source::TEST_CONNECTOR;
use risingwave_connector::source::{
- GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, NATS_CONNECTOR,
- NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR,
+ ConnectorProperties, GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR,
+ KINESIS_CONNECTOR, NATS_CONNECTOR, NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR,
+ PULSAR_CONNECTOR, S3_CONNECTOR,
};
use risingwave_pb::catalog::{
PbSchemaRegistryNameStrategy, PbSource, StreamSourceInfo, WatermarkDesc,
@@ -72,7 +75,7 @@ use crate::handler::create_table::{
ensure_table_constraints_supported, ColumnIdGenerator,
};
use crate::handler::util::{
- get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt,
+ connector_need_pk, get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt,
};
use crate::handler::HandlerArgs;
use crate::optimizer::plan_node::generic::SourceNodeKind;
@@ -316,6 +319,7 @@ pub(crate) async fn bind_columns_from_source(
let columns = match (&source_schema.format, &source_schema.row_encode) {
(Format::Native, Encode::Native)
+ | (Format::None, Encode::None)
| (Format::Plain, Encode::Bytes)
| (Format::DebeziumMongo, Encode::Json) => None,
(Format::Plain, Encode::Protobuf) => {
@@ -706,7 +710,9 @@ pub(crate) async fn bind_source_pk(
.collect_vec();
let res = match (&source_schema.format, &source_schema.row_encode) {
- (Format::Native, Encode::Native) | (Format::Plain, _) => sql_defined_pk_names,
+ (Format::Native, Encode::Native) | (Format::None, Encode::None) | (Format::Plain, _) => {
+ sql_defined_pk_names
+ }
// For all Upsert formats, we only accept one and only key column as primary key.
// Additional KEY columns must be set in this case and must be primary key.
@@ -977,6 +983,9 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock hashmap!(
Format::Plain => vec![Encode::Json],
+ ),
+ ICEBERG_CONNECTOR => hashmap!(
+ Format::None => vec![Encode::None],
)
))
});
@@ -1054,12 +1063,11 @@ pub fn validate_compatibility(
}
/// Performs early stage checking in frontend to see if the schema of the given `columns` is
-/// compatible with the connector extracted from the properties. Currently this only works for
-/// `nexmark` connector since it's in chunk format.
+/// compatible with the connector extracted from the properties.
///
/// One should only call this function after all properties of all columns are resolved, like
/// generated column descriptors.
-pub(super) fn check_source_schema(
+pub(super) async fn check_source_schema(
props: &HashMap,
row_id_index: Option,
columns: &[ColumnCatalog],
@@ -1068,10 +1076,22 @@ pub(super) fn check_source_schema(
return Ok(());
};
- if connector != NEXMARK_CONNECTOR {
- return Ok(());
+ if connector == NEXMARK_CONNECTOR {
+ check_nexmark_schema(props, row_id_index, columns)
+ } else if connector == ICEBERG_CONNECTOR {
+ Ok(check_iceberg_source(props, columns)
+ .await
+ .map_err(|err| ProtocolError(err.to_string()))?)
+ } else {
+ Ok(())
}
+}
+pub(super) fn check_nexmark_schema(
+ props: &HashMap,
+ row_id_index: Option,
+ columns: &[ColumnCatalog],
+) -> Result<()> {
let table_type = props
.get("nexmark.table.type")
.map(|t| t.to_ascii_lowercase());
@@ -1121,6 +1141,68 @@ pub(super) fn check_source_schema(
Ok(())
}
+pub async fn check_iceberg_source(
+ props: &HashMap,
+ columns: &[ColumnCatalog],
+) -> anyhow::Result<()> {
+ let props = ConnectorProperties::extract(props.clone(), true)?;
+ let ConnectorProperties::Iceberg(properties) = props else {
+ return Err(anyhow!(format!(
+ "Invalid properties for iceberg source: {:?}",
+ props
+ )));
+ };
+
+ let iceberg_config = IcebergConfig {
+ database_name: properties.database_name,
+ table_name: properties.table_name,
+ catalog_type: Some(properties.catalog_type),
+ path: properties.warehouse_path,
+ endpoint: Some(properties.endpoint),
+ access_key: properties.s3_access,
+ secret_key: properties.s3_secret,
+ region: Some(properties.region_name),
+ ..Default::default()
+ };
+
+ let schema = Schema {
+ fields: columns
+ .iter()
+ .cloned()
+ .map(|c| c.column_desc.into())
+ .collect(),
+ };
+
+ let table = iceberg_config.load_table().await?;
+
+ let iceberg_schema: arrow_schema::Schema = table
+ .current_table_metadata()
+ .current_schema()?
+ .clone()
+ .try_into()?;
+
+ for f1 in schema.fields() {
+ if !iceberg_schema.fields.iter().any(|f2| f2.name() == &f1.name) {
+ return Err(anyhow::anyhow!(format!(
+ "Column {} not found in iceberg table",
+ f1.name
+ )));
+ }
+ }
+
+ let new_iceberg_field = iceberg_schema
+ .fields
+ .iter()
+ .filter(|f1| schema.fields.iter().any(|f2| f1.name() == &f2.name))
+ .cloned()
+ .collect::>();
+ let new_iceberg_schema = arrow_schema::Schema::new(new_iceberg_field);
+
+ risingwave_connector::sink::iceberg::try_matches_arrow_schema(&schema, &new_iceberg_schema)?;
+
+ Ok(())
+}
+
pub async fn handle_create_source(
handler_args: HandlerArgs,
stmt: CreateSourceStatement,
@@ -1196,6 +1278,8 @@ pub async fn handle_create_source(
with_properties.insert(CDC_SNAPSHOT_MODE_KEY.into(), CDC_SNAPSHOT_BACKFILL.into());
// enable cdc sharing mode, which will capture all tables in the given `database.name`
with_properties.insert(CDC_SHARING_MODE_KEY.into(), "true".into());
+ // enable transactional cdc
+ with_properties.insert(CDC_TRANSACTIONAL_KEY.into(), "true".into());
}
// must behind `handle_addition_columns`
@@ -1213,8 +1297,8 @@ pub async fn handle_create_source(
)
.into());
}
-
- let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?;
+ let (mut columns, pk_column_ids, row_id_index) =
+ bind_pk_on_relation(columns, pk_names, connector_need_pk(&with_properties))?;
debug_assert!(is_column_ids_dedup(&columns));
@@ -1231,7 +1315,7 @@ pub async fn handle_create_source(
&pk_column_ids,
)?;
- check_source_schema(&with_properties, row_id_index, &columns)?;
+ check_source_schema(&with_properties, row_id_index, &columns).await?;
let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect();
@@ -1308,6 +1392,7 @@ fn format_to_prost(format: &Format) -> FormatType {
Format::DebeziumMongo => FormatType::DebeziumMongo,
Format::Maxwell => FormatType::Maxwell,
Format::Canal => FormatType::Canal,
+ Format::None => FormatType::None,
}
}
fn row_encode_to_prost(row_encode: &Encode) -> EncodeType {
@@ -1319,6 +1404,7 @@ fn row_encode_to_prost(row_encode: &Encode) -> EncodeType {
Encode::Csv => EncodeType::Csv,
Encode::Bytes => EncodeType::Bytes,
Encode::Template => EncodeType::Template,
+ Encode::None => EncodeType::None,
}
}
diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs
index 8fc30c2c30e19..7fc757b71b6b7 100644
--- a/src/frontend/src/handler/create_table.rs
+++ b/src/frontend/src/handler/create_table.rs
@@ -61,6 +61,7 @@ use crate::handler::create_source::{
bind_all_columns, bind_columns_from_source, bind_source_pk, bind_source_watermark,
check_source_schema, handle_addition_columns, validate_compatibility, UPSTREAM_SOURCE_KEY,
};
+use crate::handler::util::is_iceberg_connector;
use crate::handler::HandlerArgs;
use crate::optimizer::plan_node::generic::SourceNodeKind;
use crate::optimizer::plan_node::{LogicalCdcScan, LogicalSource};
@@ -411,6 +412,7 @@ fn multiple_pk_definition_err() -> RwError {
pub fn bind_pk_on_relation(
mut columns: Vec,
pk_names: Vec,
+ must_need_pk: bool,
) -> Result<(Vec, Vec, Option)> {
for c in &columns {
assert!(c.column_id() != ColumnId::placeholder());
@@ -431,8 +433,10 @@ pub fn bind_pk_on_relation(
})
.try_collect()?;
- // Add `_row_id` column if `pk_column_ids` is empty.
- let row_id_index = pk_column_ids.is_empty().then(|| {
+ // Add `_row_id` column if `pk_column_ids` is empty and must_need_pk
+ let need_row_id = pk_column_ids.is_empty() && must_need_pk;
+
+ let row_id_index = need_row_id.then(|| {
let column = ColumnCatalog::row_id_column();
let index = columns.len();
pk_column_ids = vec![column.column_id()];
@@ -510,7 +514,12 @@ pub(crate) async fn gen_create_table_plan_with_source(
c.column_desc.column_id = col_id_gen.generate(c.name())
}
- let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?;
+ if is_iceberg_connector(&with_properties) {
+ return Err(
+ ErrorCode::BindError("can't create table with iceberg connector".to_string()).into(),
+ );
+ }
+ let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?;
let watermark_descs = bind_source_watermark(
session,
@@ -531,7 +540,7 @@ pub(crate) async fn gen_create_table_plan_with_source(
&pk_column_ids,
)?;
- check_source_schema(&with_properties, row_id_index, &columns)?;
+ check_source_schema(&with_properties, row_id_index, &columns).await?;
gen_table_plan_inner(
context.into(),
@@ -594,7 +603,7 @@ pub(crate) fn gen_create_table_plan_without_bind(
) -> Result<(PlanRef, Option, PbTable)> {
ensure_table_constraints_supported(&constraints)?;
let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
- let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?;
+ let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?;
let watermark_descs = bind_source_watermark(
context.session_ctx(),
@@ -774,7 +783,7 @@ pub(crate) fn gen_create_table_plan_for_cdc_source(
}
let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
- let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?;
+ let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?;
let definition = context.normalized_sql().to_owned();
@@ -1275,7 +1284,7 @@ mod tests {
}
ensure_table_constraints_supported(&constraints)?;
let pk_names = bind_sql_pk_names(&column_defs, &constraints)?;
- let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?;
+ let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?;
Ok(pk_column_ids)
})();
match (expected, actual) {
diff --git a/src/frontend/src/handler/describe.rs b/src/frontend/src/handler/describe.rs
index ef1a601cca590..36cff2e20e2b6 100644
--- a/src/frontend/src/handler/describe.rs
+++ b/src/frontend/src/handler/describe.rs
@@ -17,17 +17,16 @@ use std::fmt::Display;
use itertools::Itertools;
use pgwire::pg_field_descriptor::PgFieldDescriptor;
use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
use risingwave_common::catalog::{ColumnCatalog, ColumnDesc};
-use risingwave_common::types::DataType;
+use risingwave_common::types::Fields;
use risingwave_sqlparser::ast::{display_comma_separated, ObjectName};
-use super::RwPgResponse;
+use super::show::ShowColumnRow;
+use super::{fields_to_descriptors, RwPgResponse};
use crate::binder::{Binder, Relation};
use crate::catalog::CatalogError;
use crate::error::Result;
-use crate::handler::util::col_descs_to_rows;
-use crate::handler::HandlerArgs;
+use crate::handler::{HandlerArgs, RwPgResponseBuilderExt};
pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Result {
let session = handler_args.session;
@@ -156,7 +155,10 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re
};
// Convert all column descs to rows
- let mut rows = col_descs_to_rows(columns);
+ let mut rows = columns
+ .into_iter()
+ .flat_map(ShowColumnRow::from_catalog)
+ .collect_vec();
fn concat(display_elems: impl IntoIterator- ) -> String
where
@@ -170,96 +172,68 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re
// Convert primary key to rows
if !pk_columns.is_empty() {
- rows.push(Row::new(vec![
- Some("primary key".into()),
- Some(concat(pk_columns.iter().map(|x| &x.name)).into()),
- None, // Is Hidden
- None, // Description
- ]));
+ rows.push(ShowColumnRow {
+ name: "primary key".into(),
+ r#type: concat(pk_columns.iter().map(|x| &x.name)),
+ is_hidden: None,
+ description: None,
+ });
}
// Convert distribution keys to rows
if !dist_columns.is_empty() {
- rows.push(Row::new(vec![
- Some("distribution key".into()),
- Some(concat(dist_columns.iter().map(|x| &x.name)).into()),
- None, // Is Hidden
- None, // Description
- ]));
+ rows.push(ShowColumnRow {
+ name: "distribution key".into(),
+ r#type: concat(dist_columns.iter().map(|x| &x.name)),
+ is_hidden: None,
+ description: None,
+ });
}
// Convert all indexes to rows
rows.extend(indices.iter().map(|index| {
let index_display = index.display();
- Row::new(vec![
- Some(index.name.clone().into()),
- if index_display.include_columns.is_empty() {
- Some(
- format!(
- "index({}) distributed by({})",
- display_comma_separated(&index_display.index_columns_with_ordering),
- display_comma_separated(&index_display.distributed_by_columns),
- )
- .into(),
+ ShowColumnRow {
+ name: index.name.clone(),
+ r#type: if index_display.include_columns.is_empty() {
+ format!(
+ "index({}) distributed by({})",
+ display_comma_separated(&index_display.index_columns_with_ordering),
+ display_comma_separated(&index_display.distributed_by_columns),
)
} else {
- Some(
- format!(
- "index({}) include({}) distributed by({})",
- display_comma_separated(&index_display.index_columns_with_ordering),
- display_comma_separated(&index_display.include_columns),
- display_comma_separated(&index_display.distributed_by_columns),
- )
- .into(),
+ format!(
+ "index({}) include({}) distributed by({})",
+ display_comma_separated(&index_display.index_columns_with_ordering),
+ display_comma_separated(&index_display.include_columns),
+ display_comma_separated(&index_display.distributed_by_columns),
)
},
- // Is Hidden
- None,
- // Description
+ is_hidden: None,
// TODO: index description
- None,
- ])
+ description: None,
+ }
}));
- rows.push(Row::new(vec![
- Some("table description".into()),
- Some(relname.into()),
- None, // Is Hidden
- description.map(Into::into), // Description
- ]));
+ rows.push(ShowColumnRow {
+ name: "table description".into(),
+ r#type: relname,
+ is_hidden: None,
+ description: description.map(Into::into),
+ });
// TODO: table name and description as title of response
// TODO: recover the original user statement
Ok(PgResponse::builder(StatementType::DESCRIBE)
- .values(
- rows.into(),
- vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Type".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Is Hidden".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Description".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- )
+ .rows(rows)
.into())
}
+pub fn infer_describe() -> Vec {
+ fields_to_descriptors(ShowColumnRow::fields())
+}
+
#[cfg(test)]
mod tests {
use std::collections::HashMap;
diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs
index c25bf7678bd04..b966cca8f50cf 100644
--- a/src/frontend/src/handler/explain.rs
+++ b/src/frontend/src/handler/explain.rs
@@ -12,12 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-use itertools::Itertools;
-use pgwire::pg_field_descriptor::PgFieldDescriptor;
use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
use risingwave_common::bail_not_implemented;
-use risingwave_common::types::DataType;
+use risingwave_common::types::Fields;
use risingwave_sqlparser::ast::{ExplainOptions, ExplainType, Statement};
use thiserror_ext::AsReport;
@@ -27,7 +24,7 @@ use super::create_sink::{gen_sink_plan, get_partition_compute_info};
use super::create_table::ColumnIdGenerator;
use super::query::gen_batch_plan_by_statement;
use super::util::SourceSchemaCompatExt;
-use super::RwPgResponse;
+use super::{RwPgResponse, RwPgResponseBuilderExt};
use crate::error::{ErrorCode, Result};
use crate::handler::create_table::handle_create_table_plan;
use crate::handler::HandlerArgs;
@@ -254,20 +251,17 @@ pub async fn handle_explain(
}
}
- let rows = blocks
- .iter()
- .flat_map(|b| b.lines().map(|l| l.to_owned()))
- .map(|l| Row::new(vec![Some(l.into())]))
- .collect_vec();
+ let rows = blocks.iter().flat_map(|b| b.lines()).map(|l| ExplainRow {
+ query_plan: l.into(),
+ });
Ok(PgResponse::builder(StatementType::EXPLAIN)
- .values(
- rows.into(),
- vec![PgFieldDescriptor::new(
- "QUERY PLAN".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- )],
- )
+ .rows(rows)
.into())
}
+
+#[derive(Fields)]
+#[fields(style = "TITLE CASE")]
+struct ExplainRow {
+ query_plan: String,
+}
diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs
index 3cdc4b191da92..827f28f87319e 100644
--- a/src/frontend/src/handler/mod.rs
+++ b/src/frontend/src/handler/mod.rs
@@ -18,11 +18,15 @@ use std::task::{Context, Poll};
use futures::stream::{self, BoxStream};
use futures::{Stream, StreamExt};
+use itertools::Itertools;
+use pgwire::pg_field_descriptor::PgFieldDescriptor;
use pgwire::pg_response::StatementType::{self, ABORT, BEGIN, COMMIT, ROLLBACK, START_TRANSACTION};
use pgwire::pg_response::{PgResponse, PgResponseBuilder, RowSetResult};
use pgwire::pg_server::BoxedError;
use pgwire::types::{Format, Row};
use risingwave_common::bail_not_implemented;
+use risingwave_common::types::Fields;
+use risingwave_common::util::iter_util::ZipEqFast;
use risingwave_sqlparser::ast::*;
use self::util::{DataChunkToRowSetAdapter, SourceSchemaCompatExt};
@@ -59,7 +63,7 @@ pub mod create_table;
pub mod create_table_as;
pub mod create_user;
pub mod create_view;
-mod describe;
+pub mod describe;
mod drop_connection;
mod drop_database;
pub mod drop_function;
@@ -78,7 +82,7 @@ pub mod handle_privilege;
mod kill_process;
pub mod privilege;
pub mod query;
-mod show;
+pub mod show;
mod transaction;
pub mod util;
pub mod variable;
@@ -90,6 +94,42 @@ pub type RwPgResponseBuilder = PgResponseBuilder;
/// The [`PgResponse`] used by RisingWave.
pub type RwPgResponse = PgResponse;
+#[easy_ext::ext(RwPgResponseBuilderExt)]
+impl RwPgResponseBuilder {
+ /// Append rows to the response.
+ pub fn rows(self, rows: impl IntoIterator
- ) -> Self {
+ let fields = T::fields();
+ self.values(
+ rows.into_iter()
+ .map(|row| {
+ Row::new(
+ row.into_owned_row()
+ .into_iter()
+ .zip_eq_fast(&fields)
+ .map(|(datum, (_, ty))| {
+ datum.map(|scalar| {
+ scalar.as_scalar_ref_impl().text_format(ty).into()
+ })
+ })
+ .collect(),
+ )
+ })
+ .collect_vec()
+ .into(),
+ fields_to_descriptors(fields),
+ )
+ }
+}
+
+pub fn fields_to_descriptors(
+ fields: Vec<(&str, risingwave_common::types::DataType)>,
+) -> Vec {
+ fields
+ .iter()
+ .map(|(name, ty)| PgFieldDescriptor::new(name.to_string(), ty.to_oid(), ty.type_len()))
+ .collect()
+}
+
pub enum PgResponseStream {
LocalQuery(DataChunkToRowSetAdapter),
DistributedQuery(DataChunkToRowSetAdapter),
diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs
index 4a98b6c7cd33d..226a219a11887 100644
--- a/src/frontend/src/handler/show.rs
+++ b/src/frontend/src/handler/show.rs
@@ -19,27 +19,24 @@ use pgwire::pg_field_descriptor::PgFieldDescriptor;
use pgwire::pg_protocol::truncated_fmt;
use pgwire::pg_response::{PgResponse, StatementType};
use pgwire::pg_server::Session;
-use pgwire::types::Row;
use risingwave_common::bail_not_implemented;
use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, DEFAULT_SCHEMA_NAME};
-use risingwave_common::types::DataType;
+use risingwave_common::types::{DataType, Fields};
use risingwave_common::util::addr::HostAddr;
use risingwave_connector::source::kafka::PRIVATELINK_CONNECTION;
use risingwave_expr::scalar::like::{i_like_default, like_default};
use risingwave_pb::catalog::connection;
use risingwave_sqlparser::ast::{
- Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter,
+ display_comma_separated, Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter,
};
use serde_json;
-use super::RwPgResponse;
+use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt};
use crate::binder::{Binder, Relation};
use crate::catalog::{CatalogError, IndexCatalog};
use crate::error::Result;
-use crate::handler::util::{col_descs_to_rows, indexes_to_rows};
use crate::handler::HandlerArgs;
use crate::session::SessionImpl;
-use crate::utils::infer_stmt_row_desc::infer_show_object;
pub fn get_columns_from_table(
session: &SessionImpl,
@@ -109,6 +106,136 @@ fn schema_or_default(schema: &Option) -> String {
.map_or_else(|| DEFAULT_SCHEMA_NAME.to_string(), |s| s.real_value())
}
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowObjectRow {
+ name: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+pub struct ShowColumnRow {
+ pub name: String,
+ pub r#type: String,
+ pub is_hidden: Option,
+ pub description: Option,
+}
+
+impl ShowColumnRow {
+ pub fn from_catalog(col: ColumnCatalog) -> Vec {
+ col.column_desc
+ .flatten()
+ .into_iter()
+ .map(|c| {
+ let type_name = if let DataType::Struct { .. } = c.data_type {
+ c.type_name.clone()
+ } else {
+ c.data_type.to_string()
+ };
+ ShowColumnRow {
+ name: c.name,
+ r#type: type_name,
+ is_hidden: Some(col.is_hidden.to_string()),
+ description: c.description,
+ }
+ })
+ .collect()
+ }
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowConnectionRow {
+ name: String,
+ r#type: String,
+ properties: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowFunctionRow {
+ name: String,
+ arguments: String,
+ return_type: String,
+ language: String,
+ link: Option,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowIndexRow {
+ name: String,
+ on: String,
+ key: String,
+ include: String,
+ distributed_by: String,
+}
+
+impl From> for ShowIndexRow {
+ fn from(index: Arc) -> Self {
+ let index_display = index.display();
+ ShowIndexRow {
+ name: index.name.clone(),
+ on: index.primary_table.name.clone(),
+ key: display_comma_separated(&index_display.index_columns_with_ordering).to_string(),
+ include: display_comma_separated(&index_display.include_columns).to_string(),
+ distributed_by: display_comma_separated(&index_display.distributed_by_columns)
+ .to_string(),
+ }
+ }
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowClusterRow {
+ addr: String,
+ state: String,
+ parallel_units: String,
+ is_streaming: String,
+ is_serving: String,
+ is_unschedulable: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowJobRow {
+ id: i64,
+ statement: String,
+ progress: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowProcessListRow {
+ id: String,
+ user: String,
+ host: String,
+ database: String,
+ time: Option,
+ info: Option,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowCreateObjectRow {
+ name: String,
+ create_sql: String,
+}
+
+/// Infer the row description for different show objects.
+pub fn infer_show_object(objects: &ShowObject) -> Vec {
+ fields_to_descriptors(match objects {
+ ShowObject::Columns { .. } => ShowColumnRow::fields(),
+ ShowObject::Connection { .. } => ShowConnectionRow::fields(),
+ ShowObject::Function { .. } => ShowFunctionRow::fields(),
+ ShowObject::Indexes { .. } => ShowIndexRow::fields(),
+ ShowObject::Cluster => ShowClusterRow::fields(),
+ ShowObject::Jobs => ShowJobRow::fields(),
+ ShowObject::ProcessList => ShowProcessListRow::fields(),
+ _ => ShowObjectRow::fields(),
+ })
+}
+
pub async fn handle_show_object(
handler_args: HandlerArgs,
command: ShowObject,
@@ -119,7 +246,6 @@ pub async fn handle_show_object(
if let Some(ShowStatementFilter::Where(..)) = filter {
bail_not_implemented!("WHERE clause in SHOW statement");
}
- let row_desc = infer_show_object(&command);
let catalog_reader = session.env().catalog_reader();
@@ -178,18 +304,15 @@ pub async fn handle_show_object(
.into());
};
- let rows = col_descs_to_rows(columns);
-
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(columns.into_iter().flat_map(ShowColumnRow::from_catalog))
.into());
}
ShowObject::Indexes { table } => {
let indexes = get_indexes_from_table(&session, table)?;
- let rows = indexes_to_rows(indexes);
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(indexes.into_iter().map(ShowIndexRow::from))
.into());
}
ShowObject::Connection { schema } => {
@@ -200,7 +323,7 @@ pub async fn handle_show_object(
.iter_connections()
.map(|c| {
let name = c.name.clone();
- let conn_type = match &c.info {
+ let r#type = match &c.info {
connection::Info::PrivateLinkService(_) => {
PRIVATELINK_CONNECTION.to_string()
},
@@ -230,105 +353,81 @@ pub async fn handle_show_object(
)
}
};
- Row::new(vec![
- Some(name.into()),
- Some(conn_type.into()),
- Some(properties.into()),
- ])
- })
- .collect_vec();
+ ShowConnectionRow {
+ name,
+ r#type,
+ properties,
+ }
+ });
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(rows)
.into());
}
ShowObject::Function { schema } => {
- let rows = catalog_reader
- .read_guard()
+ let reader = catalog_reader.read_guard();
+ let rows = reader
.get_schema_by_name(session.database(), &schema_or_default(&schema))?
.iter_function()
- .map(|t| {
- Row::new(vec![
- Some(t.name.clone().into()),
- Some(t.arg_types.iter().map(|t| t.to_string()).join(", ").into()),
- Some(t.return_type.to_string().into()),
- Some(t.language.clone().into()),
- t.link.clone().map(Into::into),
- ])
- })
- .collect_vec();
+ .map(|t| ShowFunctionRow {
+ name: t.name.clone(),
+ arguments: t.arg_types.iter().map(|t| t.to_string()).join(", "),
+ return_type: t.return_type.to_string(),
+ language: t.language.clone(),
+ link: t.link.clone(),
+ });
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(rows)
.into());
}
ShowObject::Cluster => {
let workers = session.env().worker_node_manager().list_worker_nodes();
- let rows = workers
- .into_iter()
- .map(|worker| {
- let addr: HostAddr = worker.host.as_ref().unwrap().into();
- let property = worker.property.as_ref().unwrap();
- Row::new(vec![
- Some(addr.to_string().into()),
- Some(worker.get_state().unwrap().as_str_name().into()),
- Some(
- worker
- .parallel_units
- .into_iter()
- .map(|pu| pu.id)
- .join(", ")
- .into(),
- ),
- Some(property.is_streaming.to_string().into()),
- Some(property.is_serving.to_string().into()),
- Some(property.is_unschedulable.to_string().into()),
- ])
- })
- .collect_vec();
+ let rows = workers.into_iter().map(|worker| {
+ let addr: HostAddr = worker.host.as_ref().unwrap().into();
+ let property = worker.property.as_ref().unwrap();
+ ShowClusterRow {
+ addr: addr.to_string(),
+ state: worker.get_state().unwrap().as_str_name().to_string(),
+ parallel_units: worker.parallel_units.into_iter().map(|pu| pu.id).join(", "),
+ is_streaming: property.is_streaming.to_string(),
+ is_serving: property.is_serving.to_string(),
+ is_unschedulable: property.is_unschedulable.to_string(),
+ }
+ });
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(rows)
.into());
}
ShowObject::Jobs => {
let resp = session.env().meta_client().list_ddl_progress().await?;
- let rows = resp
- .into_iter()
- .map(|job| {
- Row::new(vec![
- Some(job.id.to_string().into()),
- Some(job.statement.into()),
- Some(job.progress.into()),
- ])
- })
- .collect_vec();
+ let rows = resp.into_iter().map(|job| ShowJobRow {
+ id: job.id as i64,
+ statement: job.statement,
+ progress: job.progress,
+ });
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(rows)
.into());
}
ShowObject::ProcessList => {
- let rows = {
- let sessions_map = session.env().sessions_map();
- sessions_map
- .read()
- .values()
- .map(|s| {
- Row::new(vec![
- // Since process id and the secret id in the session id are the same in RisingWave, just display the process id.
- Some(format!("{}", s.id().0).into()),
- Some(s.user_name().to_owned().into()),
- Some(format!("{}", s.peer_addr()).into()),
- Some(s.database().to_owned().into()),
- s.elapse_since_running_sql()
- .map(|mills| format!("{}ms", mills).into()),
- s.running_sql().map(|sql| {
- format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024)).into()
- }),
- ])
- })
- .collect_vec()
- };
+ let sessions_map = session.env().sessions_map().read();
+ let rows = sessions_map.values().map(|s| {
+ ShowProcessListRow {
+ // Since process id and the secret id in the session id are the same in RisingWave, just display the process id.
+ id: format!("{}", s.id().0),
+ user: s.user_name().to_owned(),
+ host: format!("{}", s.peer_addr()),
+ database: s.database().to_owned(),
+ time: s
+ .elapse_since_running_sql()
+ .map(|mills| format!("{}ms", mills)),
+ info: s
+ .running_sql()
+ .map(|sql| format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024))),
+ }
+ });
return Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(rows.into(), row_desc)
+ .rows(rows)
.into());
}
};
@@ -341,21 +440,17 @@ pub async fn handle_show_object(
Some(ShowStatementFilter::Where(..)) => unreachable!(),
None => true,
})
- .map(|n| Row::new(vec![Some(n.into())]))
- .collect_vec();
+ .map(|name| ShowObjectRow { name });
Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(
- rows.into(),
- vec![PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- )],
- )
+ .rows(rows)
.into())
}
+pub fn infer_show_create_object() -> Vec {
+ fields_to_descriptors(ShowCreateObjectRow::fields())
+}
+
pub fn handle_show_create_object(
handle_args: HandlerArgs,
show_create_type: ShowCreateType,
@@ -415,21 +510,10 @@ pub fn handle_show_create_object(
let name = format!("{}.{}", schema_name, object_name);
Ok(PgResponse::builder(StatementType::SHOW_COMMAND)
- .values(
- vec![Row::new(vec![Some(name.into()), Some(sql.into())])].into(),
- vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Create Sql".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- )
+ .rows([ShowCreateObjectRow {
+ name,
+ create_sql: sql,
+ }])
.into())
}
diff --git a/src/frontend/src/handler/transaction.rs b/src/frontend/src/handler/transaction.rs
index 452cfe0ed9299..8ab7af36c29ca 100644
--- a/src/frontend/src/handler/transaction.rs
+++ b/src/frontend/src/handler/transaction.rs
@@ -13,14 +13,13 @@
// limitations under the License.
use pgwire::pg_response::StatementType;
-use pgwire::types::Row;
use risingwave_common::bail_not_implemented;
+use risingwave_common::types::Fields;
use risingwave_sqlparser::ast::{TransactionAccessMode, TransactionMode, Value};
-use super::{HandlerArgs, RwPgResponse};
+use super::{HandlerArgs, RwPgResponse, RwPgResponseBuilderExt};
use crate::error::Result;
use crate::session::transaction::AccessMode;
-use crate::utils::infer_stmt_row_desc::infer_show_variable;
macro_rules! not_impl {
($body:expr) => {
@@ -118,16 +117,20 @@ pub async fn handle_set(
.into())
}
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableRow {
+ name: String,
+}
+
pub fn handle_show_isolation_level(handler_args: HandlerArgs) -> Result {
let config_reader = handler_args.session.config();
- let parameter_name = "transaction_isolation";
- let row_desc = infer_show_variable(parameter_name);
- let rows = vec![Row::new(vec![Some(
- config_reader.get(parameter_name)?.into(),
- )])];
+ let rows = [ShowVariableRow {
+ name: config_reader.get("transaction_isolation")?,
+ }];
Ok(RwPgResponse::builder(StatementType::SHOW_VARIABLE)
- .values(rows.into(), row_desc)
+ .rows(rows)
.into())
}
diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs
index 6e91cf53f0b32..ab9d4fe415b33 100644
--- a/src/frontend/src/handler/util.rs
+++ b/src/frontend/src/handler/util.rs
@@ -27,14 +27,14 @@ use pgwire::pg_server::BoxedError;
use pgwire::types::{Format, FormatIterator, Row};
use pin_project_lite::pin_project;
use risingwave_common::array::DataChunk;
-use risingwave_common::catalog::{ColumnCatalog, Field};
+use risingwave_common::catalog::Field;
use risingwave_common::row::Row as _;
use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz};
use risingwave_common::util::iter_util::ZipEqFast;
+use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR;
use risingwave_connector::source::KAFKA_CONNECTOR;
-use risingwave_sqlparser::ast::{display_comma_separated, CompatibleSourceSchema, ConnectorSchema};
+use risingwave_sqlparser::ast::{CompatibleSourceSchema, ConnectorSchema};
-use crate::catalog::IndexCatalog;
use crate::error::{ErrorCode, Result as RwResult};
use crate::handler::create_source::UPSTREAM_SOURCE_KEY;
use crate::session::{current, SessionImpl};
@@ -172,66 +172,6 @@ fn to_pg_rows(
.try_collect()
}
-/// Convert column descs to rows which conclude name and type
-pub fn col_descs_to_rows(columns: Vec) -> Vec
{
- columns
- .iter()
- .flat_map(|col| {
- col.column_desc
- .flatten()
- .into_iter()
- .map(|c| {
- let type_name = if let DataType::Struct { .. } = c.data_type {
- c.type_name.clone()
- } else {
- c.data_type.to_string()
- };
- Row::new(vec![
- Some(c.name.into()),
- Some(type_name.into()),
- Some(col.is_hidden.to_string().into()),
- c.description.map(Into::into),
- ])
- })
- .collect_vec()
- })
- .collect_vec()
-}
-
-pub fn indexes_to_rows(indexes: Vec>) -> Vec {
- indexes
- .iter()
- .map(|index| {
- let index_display = index.display();
- Row::new(vec![
- Some(index.name.clone().into()),
- Some(index.primary_table.name.clone().into()),
- Some(
- format!(
- "{}",
- display_comma_separated(&index_display.index_columns_with_ordering)
- )
- .into(),
- ),
- Some(
- format!(
- "{}",
- display_comma_separated(&index_display.include_columns)
- )
- .into(),
- ),
- Some(
- format!(
- "{}",
- display_comma_separated(&index_display.distributed_by_columns)
- )
- .into(),
- ),
- ])
- })
- .collect_vec()
-}
-
/// Convert from [`Field`] to [`PgFieldDescriptor`].
pub fn to_pg_field(f: &Field) -> PgFieldDescriptor {
PgFieldDescriptor::new(
@@ -241,6 +181,11 @@ pub fn to_pg_field(f: &Field) -> PgFieldDescriptor {
)
}
+pub fn connector_need_pk(with_properties: &HashMap) -> bool {
+ // Currently only iceberg connector doesn't need primary key
+ !is_iceberg_connector(with_properties)
+}
+
#[inline(always)]
pub fn get_connector(with_properties: &HashMap) -> Option {
with_properties
@@ -265,6 +210,14 @@ pub fn is_cdc_connector(with_properties: &HashMap) -> bool {
connector.contains("-cdc")
}
+#[inline(always)]
+pub fn is_iceberg_connector(with_properties: &HashMap) -> bool {
+ let Some(connector) = get_connector(with_properties) else {
+ return false;
+ };
+ connector == ICEBERG_CONNECTOR
+}
+
#[easy_ext::ext(SourceSchemaCompatExt)]
impl CompatibleSourceSchema {
/// Convert `self` to [`ConnectorSchema`] and warn the user if the syntax is deprecated.
diff --git a/src/frontend/src/handler/variable.rs b/src/frontend/src/handler/variable.rs
index 884947c88b763..96fd232215ccd 100644
--- a/src/frontend/src/handler/variable.rs
+++ b/src/frontend/src/handler/variable.rs
@@ -14,19 +14,18 @@
use anyhow::Context;
use itertools::Itertools;
+use pgwire::pg_field_descriptor::PgFieldDescriptor;
use pgwire::pg_protocol::ParameterStatus;
use pgwire::pg_response::{PgResponse, StatementType};
-use pgwire::types::Row;
use risingwave_common::session_config::{ConfigReporter, SESSION_CONFIG_LIST_SEP};
-use risingwave_common::system_param::is_mutable;
-use risingwave_common::types::{DataType, ScalarRefImpl};
+use risingwave_common::system_param::reader::SystemParamsRead;
+use risingwave_common::types::Fields;
use risingwave_sqlparser::ast::{Ident, SetTimeZoneValue, SetVariableValue, Value};
use risingwave_sqlparser::keywords::Keyword;
-use super::RwPgResponse;
+use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt};
use crate::error::Result;
use crate::handler::HandlerArgs;
-use crate::utils::infer_stmt_row_desc::infer_show_variable;
/// convert `SetVariableValue` to string while remove the quotes on literals.
pub(crate) fn set_var_to_param_str(value: &SetVariableValue) -> Option {
@@ -117,40 +116,36 @@ pub(super) async fn handle_show(
) -> Result {
// TODO: Verify that the name used in `show` command is indeed always case-insensitive.
let name = variable.iter().map(|e| e.real_value()).join(" ");
- let row_desc = infer_show_variable(&name);
- let rows = if name.eq_ignore_ascii_case("PARAMETERS") {
- handle_show_system_params(handler_args).await?
+ if name.eq_ignore_ascii_case("PARAMETERS") {
+ handle_show_system_params(handler_args).await
} else if name.eq_ignore_ascii_case("ALL") {
- handle_show_all(handler_args.clone())?
+ handle_show_all(handler_args.clone())
} else {
let config_reader = handler_args.session.config();
- vec![Row::new(vec![Some(config_reader.get(&name)?.into())])]
- };
-
- Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
- .values(rows.into(), row_desc)
- .into())
+ Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
+ .rows([ShowVariableRow {
+ name: config_reader.get(&name)?,
+ }])
+ .into())
+ }
}
-fn handle_show_all(handler_args: HandlerArgs) -> Result> {
+fn handle_show_all(handler_args: HandlerArgs) -> Result {
let config_reader = handler_args.session.config();
let all_variables = config_reader.show_all();
- let rows = all_variables
- .iter()
- .map(|info| {
- Row::new(vec![
- Some(info.name.clone().into()),
- Some(info.setting.clone().into()),
- Some(info.description.clone().into()),
- ])
- })
- .collect_vec();
- Ok(rows)
+ let rows = all_variables.iter().map(|info| ShowVariableAllRow {
+ name: info.name.clone(),
+ setting: info.setting.clone(),
+ description: info.description.clone(),
+ });
+ Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
+ .rows(rows)
+ .into())
}
-async fn handle_show_system_params(handler_args: HandlerArgs) -> Result> {
+async fn handle_show_system_params(handler_args: HandlerArgs) -> Result {
let params = handler_args
.session
.env()
@@ -158,14 +153,48 @@ async fn handle_show_system_params(handler_args: HandlerArgs) -> Result
.get_system_params()
.await?;
let rows = params
- .to_kv()
+ .get_all()
.into_iter()
- .map(|(k, v)| {
- let is_mutable_bytes = ScalarRefImpl::Bool(is_mutable(&k).unwrap())
- .text_format(&DataType::Boolean)
- .into();
- Row::new(vec![Some(k.into()), Some(v.into()), Some(is_mutable_bytes)])
- })
- .collect_vec();
- Ok(rows)
+ .map(|info| ShowVariableParamsRow {
+ name: info.name.into(),
+ value: info.value,
+ description: info.description.into(),
+ mutable: info.mutable,
+ });
+ Ok(PgResponse::builder(StatementType::SHOW_VARIABLE)
+ .rows(rows)
+ .into())
+}
+
+pub fn infer_show_variable(name: &str) -> Vec {
+ fields_to_descriptors(if name.eq_ignore_ascii_case("ALL") {
+ ShowVariableAllRow::fields()
+ } else if name.eq_ignore_ascii_case("PARAMETERS") {
+ ShowVariableParamsRow::fields()
+ } else {
+ ShowVariableRow::fields()
+ })
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableRow {
+ name: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableAllRow {
+ name: String,
+ setting: String,
+ description: String,
+}
+
+#[derive(Fields)]
+#[fields(style = "Title Case")]
+struct ShowVariableParamsRow {
+ name: String,
+ value: String,
+ description: String,
+ mutable: bool,
}
diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs
index fa7ad908d01d4..43ec6d2a89de8 100644
--- a/src/frontend/src/optimizer/plan_node/logical_source.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_source.rs
@@ -23,7 +23,8 @@ use risingwave_common::bail_not_implemented;
use risingwave_common::catalog::{
ColumnCatalog, ColumnDesc, Field, Schema, KAFKA_TIMESTAMP_COLUMN_NAME,
};
-use risingwave_connector::source::DataType;
+use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR;
+use risingwave_connector::source::{DataType, UPSTREAM_SOURCE_KEY};
use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn;
use risingwave_pb::plan_common::GeneratedColumnDesc;
@@ -546,6 +547,18 @@ impl ToStream for LogicalSource {
}
}
}
+ if let Some(source) = &self.core.catalog {
+ let connector = &source
+ .with_properties
+ .get(UPSTREAM_SOURCE_KEY)
+ .map(|s| s.to_lowercase())
+ .unwrap();
+ if ICEBERG_CONNECTOR == connector {
+ return Err(
+ anyhow::anyhow!("Iceberg source is not supported in stream queries").into(),
+ );
+ }
+ }
Ok(plan)
}
diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs
index 3abc7ace0e494..f2acbcf9d258c 100644
--- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs
@@ -226,6 +226,7 @@ impl StreamMaterialize {
id: TableId::placeholder(),
associated_source_id: None,
name,
+ dependent_relations: vec![],
columns,
pk: table_pk,
stream_key,
diff --git a/src/frontend/src/optimizer/plan_node/utils.rs b/src/frontend/src/optimizer/plan_node/utils.rs
index 39d9ff5e7018d..c8cd1bb05fa83 100644
--- a/src/frontend/src/optimizer/plan_node/utils.rs
+++ b/src/frontend/src/optimizer/plan_node/utils.rs
@@ -141,6 +141,7 @@ impl TableCatalogBuilder {
id: TableId::placeholder(),
associated_source_id: None,
name: String::new(),
+ dependent_relations: vec![],
columns: self.columns.clone(),
pk: self.pk,
stream_key: vec![],
diff --git a/src/frontend/src/scheduler/distributed/query.rs b/src/frontend/src/scheduler/distributed/query.rs
index 6295d8036b566..515a83d0923ef 100644
--- a/src/frontend/src/scheduler/distributed/query.rs
+++ b/src/frontend/src/scheduler/distributed/query.rs
@@ -543,6 +543,7 @@ pub(crate) mod tests {
id: table_id,
associated_source_id: None,
name: "test".to_string(),
+ dependent_relations: vec![],
columns: vec![
ColumnCatalog {
column_desc: ColumnDesc::new_atomic(DataType::Int32, "a", 0),
diff --git a/src/frontend/src/scheduler/task_context.rs b/src/frontend/src/scheduler/task_context.rs
index dfb2496dad556..dcfbf30a215a1 100644
--- a/src/frontend/src/scheduler/task_context.rs
+++ b/src/frontend/src/scheduler/task_context.rs
@@ -52,6 +52,7 @@ impl BatchTaskContext for FrontendBatchTaskContext {
self.session.env().meta_client_ref(),
self.session.auth_context(),
self.session.shared_config(),
+ self.session.env().system_params_manager().get_params(),
))
}
diff --git a/src/frontend/src/session.rs b/src/frontend/src/session.rs
index 9419999f8e479..67a5da01e1213 100644
--- a/src/frontend/src/session.rs
+++ b/src/frontend/src/session.rs
@@ -43,7 +43,9 @@ use risingwave_common::catalog::{
};
use risingwave_common::config::{load_config, BatchConfig, MetaConfig, MetricLevel};
use risingwave_common::session_config::{ConfigMap, ConfigReporter, VisibilityMode};
-use risingwave_common::system_param::local_manager::LocalSystemParamsManager;
+use risingwave_common::system_param::local_manager::{
+ LocalSystemParamsManager, LocalSystemParamsManagerRef,
+};
use risingwave_common::telemetry::manager::TelemetryManager;
use risingwave_common::telemetry::telemetry_env_enabled;
use risingwave_common::types::DataType;
@@ -79,11 +81,14 @@ use crate::catalog::{
check_schema_writable, CatalogError, DatabaseId, OwnedByUserCatalog, SchemaId,
};
use crate::error::{ErrorCode, Result, RwError};
+use crate::handler::describe::infer_describe;
use crate::handler::extended_handle::{
handle_bind, handle_execute, handle_parse, Portal, PrepareStatement,
};
use crate::handler::privilege::ObjectCheckItem;
+use crate::handler::show::{infer_show_create_object, infer_show_object};
use crate::handler::util::to_pg_field;
+use crate::handler::variable::infer_show_variable;
use crate::handler::{handle, RwPgResponse};
use crate::health_service::HealthServiceImpl;
use crate::meta_client::{FrontendMetaClient, FrontendMetaClientImpl};
@@ -100,7 +105,6 @@ use crate::user::user_authentication::md5_hash_with_salt;
use crate::user::user_manager::UserInfoManager;
use crate::user::user_service::{UserInfoReader, UserInfoWriter, UserInfoWriterImpl};
use crate::user::UserId;
-use crate::utils::infer_stmt_row_desc::{infer_show_object, infer_show_variable};
use crate::{FrontendOpts, PgResponseStream};
pub(crate) mod current;
@@ -119,6 +123,8 @@ pub struct FrontendEnv {
worker_node_manager: WorkerNodeManagerRef,
query_manager: QueryManager,
hummock_snapshot_manager: HummockSnapshotManagerRef,
+ system_params_manager: LocalSystemParamsManagerRef,
+
server_addr: HostAddr,
client_pool: ComputeClientPoolRef,
@@ -159,6 +165,7 @@ impl FrontendEnv {
let worker_node_manager = Arc::new(WorkerNodeManager::mock(vec![]));
let meta_client = Arc::new(MockFrontendMetaClient {});
let hummock_snapshot_manager = Arc::new(HummockSnapshotManager::new(meta_client.clone()));
+ let system_params_manager = Arc::new(LocalSystemParamsManager::for_test());
let compute_client_pool = Arc::new(ComputeClientPool::default());
let query_manager = QueryManager::new(
worker_node_manager.clone(),
@@ -191,6 +198,7 @@ impl FrontendEnv {
worker_node_manager,
query_manager,
hummock_snapshot_manager,
+ system_params_manager,
server_addr,
client_pool,
sessions_map: Arc::new(RwLock::new(HashMap::new())),
@@ -383,6 +391,7 @@ impl FrontendEnv {
meta_client: frontend_meta_client,
query_manager,
hummock_snapshot_manager,
+ system_params_manager,
server_addr: frontend_address,
client_pool,
frontend_metrics,
@@ -448,6 +457,10 @@ impl FrontendEnv {
&self.hummock_snapshot_manager
}
+ pub fn system_params_manager(&self) -> &LocalSystemParamsManagerRef {
+ &self.system_params_manager
+ }
+
pub fn server_address(&self) -> &HostAddr {
&self.server_addr
}
@@ -1231,18 +1244,7 @@ fn infer(bound: Option, stmt: Statement) -> Result Ok(infer_show_object(&show_object)),
- Statement::ShowCreateObject { .. } => Ok(vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Create Sql".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ]),
+ Statement::ShowCreateObject { .. } => Ok(infer_show_create_object()),
Statement::ShowTransactionIsolationLevel => {
let name = "transaction_isolation";
Ok(infer_show_variable(name))
@@ -1251,28 +1253,7 @@ fn infer(bound: Option, stmt: Statement) -> Result Ok(vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Type".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Is Hidden".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Description".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ]),
+ Statement::Describe { name: _ } => Ok(infer_describe()),
Statement::Explain { .. } => Ok(vec![PgFieldDescriptor::new(
"QUERY PLAN".to_owned(),
DataType::Varchar.to_oid(),
diff --git a/src/frontend/src/utils/infer_stmt_row_desc.rs b/src/frontend/src/utils/infer_stmt_row_desc.rs
deleted file mode 100644
index 1ee950997720c..0000000000000
--- a/src/frontend/src/utils/infer_stmt_row_desc.rs
+++ /dev/null
@@ -1,248 +0,0 @@
-// Copyright 2024 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use pgwire::pg_field_descriptor::PgFieldDescriptor;
-use risingwave_common::types::DataType;
-use risingwave_sqlparser::ast::ShowObject;
-
-/// `infer_stmt_row_desc` is used to infer the row description for different show objects.
-pub fn infer_show_object(objects: &ShowObject) -> Vec {
- match objects {
- ShowObject::Columns { .. } => vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Type".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Is Hidden".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Description".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- ShowObject::Connection { .. } => vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Type".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Properties".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- ShowObject::Function { .. } => vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Arguments".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Return Type".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Language".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Link".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- ShowObject::Indexes { .. } => vec![
- PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "On".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Key".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Include".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Distributed By".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- ShowObject::Cluster => vec![
- PgFieldDescriptor::new(
- "Addr".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "State".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Parallel Units".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Is Streaming".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Is Serving".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Is Unschedulable".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- ShowObject::Jobs => vec![
- PgFieldDescriptor::new(
- "Id".to_owned(),
- DataType::Int64.to_oid(),
- DataType::Int64.type_len(),
- ),
- PgFieldDescriptor::new(
- "Statement".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Progress".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- ShowObject::ProcessList => vec![
- PgFieldDescriptor::new(
- "Id".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "User".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Host".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Database".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Time".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Info".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ],
- _ => vec![PgFieldDescriptor::new(
- "Name".to_owned(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- )],
- }
-}
-
-pub fn infer_show_variable(name: &str) -> Vec {
- if name.eq_ignore_ascii_case("ALL") {
- vec![
- PgFieldDescriptor::new(
- "Name".to_string(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Setting".to_string(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Description".to_string(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- ]
- } else if name.eq_ignore_ascii_case("PARAMETERS") {
- vec![
- PgFieldDescriptor::new(
- "Name".to_string(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Value".to_string(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- ),
- PgFieldDescriptor::new(
- "Mutable".to_string(),
- DataType::Boolean.to_oid(),
- DataType::Boolean.type_len(),
- ),
- ]
- } else {
- vec![PgFieldDescriptor::new(
- name.to_ascii_lowercase(),
- DataType::Varchar.to_oid(),
- DataType::Varchar.type_len(),
- )]
- }
-}
diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs
index bfe7cb093aad0..697b626fb3398 100644
--- a/src/frontend/src/utils/mod.rs
+++ b/src/frontend/src/utils/mod.rs
@@ -30,7 +30,6 @@ pub use rewrite_index::*;
mod index_set;
pub use index_set::*;
pub(crate) mod group_by;
-pub mod infer_stmt_row_desc;
pub mod overwrite_options;
pub use group_by::*;
pub use overwrite_options::*;
diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs
index 2e770fb841ada..8d7c4253631d5 100644
--- a/src/meta/node/src/lib.rs
+++ b/src/meta/node/src/lib.rs
@@ -254,7 +254,7 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> {
const MIN_TIMEOUT_INTERVAL_SEC: u64 = 20;
let compaction_task_max_progress_interval_secs = {
- config
+ (config
.storage
.object_store
.object_store_read_timeout_ms
@@ -271,7 +271,8 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> {
.object_store
.object_store_streaming_upload_timeout_ms,
)
- .max(config.meta.compaction_task_max_progress_interval_secs)
+ .max(config.meta.compaction_task_max_progress_interval_secs * 1000))
+ / 1000
} + MIN_TIMEOUT_INTERVAL_SEC;
let (mut join_handle, leader_lost_handle, shutdown_send) = rpc_serve(
diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs
index 6077efa7f88c1..e26e1af0f0cff 100644
--- a/src/meta/src/controller/catalog.rs
+++ b/src/meta/src/controller/catalog.rs
@@ -19,16 +19,18 @@ use std::sync::Arc;
use anyhow::anyhow;
use itertools::Itertools;
use risingwave_common::catalog::{TableOption, DEFAULT_SCHEMA_NAME, SYSTEM_SCHEMAS};
+use risingwave_common::util::stream_graph_visitor::visit_stream_node_cont;
use risingwave_common::{bail, current_cluster_version};
+use risingwave_meta_model_v2::fragment::StreamNode;
use risingwave_meta_model_v2::object::ObjectType;
use risingwave_meta_model_v2::prelude::*;
use risingwave_meta_model_v2::table::TableType;
use risingwave_meta_model_v2::{
- connection, database, function, index, object, object_dependency, schema, sink, source,
- streaming_job, table, user_privilege, view, ActorId, ColumnCatalogArray, ConnectionId,
- CreateType, DatabaseId, FragmentId, FunctionId, IndexId, JobStatus, ObjectId,
- PrivateLinkService, SchemaId, SourceId, StreamSourceInfo, StreamingParallelism, TableId,
- UserId,
+ actor, connection, database, fragment, function, index, object, object_dependency, schema,
+ sink, source, streaming_job, table, user_privilege, view, ActorId, ActorUpstreamActors,
+ ColumnCatalogArray, ConnectionId, CreateType, DatabaseId, FragmentId, FunctionId, I32Array,
+ IndexId, JobStatus, ObjectId, PrivateLinkService, SchemaId, SourceId, StreamSourceInfo,
+ StreamingParallelism, TableId, UserId,
};
use risingwave_pb::catalog::table::PbTableType;
use risingwave_pb::catalog::{
@@ -41,6 +43,8 @@ use risingwave_pb::meta::subscribe_response::{
Info as NotificationInfo, Info, Operation as NotificationOperation, Operation,
};
use risingwave_pb::meta::{PbRelation, PbRelationGroup};
+use risingwave_pb::stream_plan::stream_node::NodeBody;
+use risingwave_pb::stream_plan::FragmentTypeFlag;
use risingwave_pb::user::PbUserInfo;
use sea_orm::sea_query::{Expr, SimpleExpr};
use sea_orm::ActiveValue::Set;
@@ -423,6 +427,7 @@ impl CatalogController {
pub async fn clean_dirty_creating_jobs(&self) -> MetaResult {
let inner = self.inner.write().await;
let txn = inner.db.begin().await?;
+
let creating_job_ids: Vec = streaming_job::Entity::find()
.select_only()
.column(streaming_job::Column::JobId)
@@ -436,7 +441,14 @@ impl CatalogController {
.into_tuple()
.all(&txn)
.await?;
+
+ let changed = Self::clean_dirty_sink_downstreams(&txn).await?;
+
if creating_job_ids.is_empty() {
+ if changed {
+ txn.commit().await?;
+ }
+
return Ok(ReleaseContext::default());
}
@@ -476,6 +488,7 @@ impl CatalogController {
.exec(&txn)
.await?;
assert!(res.rows_affected > 0);
+
txn.commit().await?;
Ok(ReleaseContext {
@@ -485,6 +498,175 @@ impl CatalogController {
})
}
+ async fn clean_dirty_sink_downstreams(txn: &DatabaseTransaction) -> MetaResult {
+ // clean incoming sink from (table)
+ // clean upstream fragment ids from (fragment)
+ // clean stream node from (fragment)
+ // clean upstream actor ids from (actor)
+ let all_fragment_ids: Vec = Fragment::find()
+ .select_only()
+ .columns(vec![fragment::Column::FragmentId])
+ .into_tuple()
+ .all(txn)
+ .await?;
+
+ let all_fragment_ids: HashSet<_> = all_fragment_ids.into_iter().collect();
+
+ let table_sink_ids: Vec = Sink::find()
+ .select_only()
+ .column(sink::Column::SinkId)
+ .filter(sink::Column::TargetTable.is_not_null())
+ .into_tuple()
+ .all(txn)
+ .await?;
+
+ let all_table_with_incoming_sinks: Vec<(ObjectId, I32Array)> = Table::find()
+ .select_only()
+ .columns(vec![table::Column::TableId, table::Column::IncomingSinks])
+ .into_tuple()
+ .all(txn)
+ .await?;
+
+ let table_incoming_sinks_to_update = all_table_with_incoming_sinks
+ .into_iter()
+ .filter(|(_, incoming_sinks)| {
+ let inner_ref = incoming_sinks.inner_ref();
+ !inner_ref.is_empty()
+ && inner_ref
+ .iter()
+ .any(|sink_id| !table_sink_ids.contains(sink_id))
+ })
+ .collect_vec();
+
+ let new_table_incoming_sinks = table_incoming_sinks_to_update
+ .into_iter()
+ .map(|(table_id, incoming_sinks)| {
+ let new_incoming_sinks = incoming_sinks
+ .into_inner()
+ .extract_if(|id| table_sink_ids.contains(id))
+ .collect_vec();
+ (table_id, I32Array::from(new_incoming_sinks))
+ })
+ .collect_vec();
+
+ // no need to update, returning
+ if new_table_incoming_sinks.is_empty() {
+ return Ok(false);
+ }
+
+ for (table_id, new_incoming_sinks) in new_table_incoming_sinks {
+ tracing::info!("cleaning dirty table sink downstream table {}", table_id);
+ Table::update_many()
+ .col_expr(table::Column::IncomingSinks, new_incoming_sinks.into())
+ .filter(table::Column::TableId.eq(table_id))
+ .exec(txn)
+ .await?;
+
+ let fragments: Vec<(FragmentId, I32Array, StreamNode, i32)> = Fragment::find()
+ .select_only()
+ .columns(vec![
+ fragment::Column::FragmentId,
+ fragment::Column::UpstreamFragmentId,
+ fragment::Column::StreamNode,
+ fragment::Column::FragmentTypeMask,
+ ])
+ .filter(fragment::Column::JobId.eq(table_id))
+ .into_tuple()
+ .all(txn)
+ .await?;
+
+ for (fragment_id, upstream_fragment_ids, stream_node, fragment_mask) in fragments {
+ let mut upstream_fragment_ids = upstream_fragment_ids.into_inner();
+
+ let dirty_upstream_fragment_ids = upstream_fragment_ids
+ .extract_if(|id| !all_fragment_ids.contains(id))
+ .collect_vec();
+
+ if !dirty_upstream_fragment_ids.is_empty() {
+ // dirty downstream should be materialize fragment of table
+ assert!(fragment_mask & FragmentTypeFlag::Mview as i32 > 0);
+
+ tracing::info!(
+ "cleaning dirty table sink fragment {:?} from downstream fragment {}",
+ dirty_upstream_fragment_ids,
+ fragment_id
+ );
+
+ let mut pb_stream_node = stream_node.to_protobuf();
+
+ visit_stream_node_cont(&mut pb_stream_node, |node| {
+ if let Some(NodeBody::Union(_)) = node.node_body {
+ node.input.retain_mut(|input| {
+ if let Some(NodeBody::Merge(merge_node)) = &mut input.node_body
+ && all_fragment_ids
+ .contains(&(merge_node.upstream_fragment_id as i32))
+ {
+ true
+ } else {
+ false
+ }
+ });
+ }
+ true
+ });
+
+ Fragment::update_many()
+ .col_expr(
+ fragment::Column::UpstreamFragmentId,
+ I32Array::from(upstream_fragment_ids).into(),
+ )
+ .col_expr(
+ fragment::Column::StreamNode,
+ StreamNode::from_protobuf(&pb_stream_node).into(),
+ )
+ .filter(fragment::Column::FragmentId.eq(fragment_id))
+ .exec(txn)
+ .await?;
+
+ let actors: Vec<(ActorId, ActorUpstreamActors)> = Actor::find()
+ .select_only()
+ .columns(vec![
+ actor::Column::ActorId,
+ actor::Column::UpstreamActorIds,
+ ])
+ .filter(actor::Column::FragmentId.eq(fragment_id))
+ .into_tuple()
+ .all(txn)
+ .await?;
+
+ for (actor_id, upstream_actor_ids) in actors {
+ let mut upstream_actor_ids = upstream_actor_ids.into_inner();
+
+ let dirty_actor_upstreams = upstream_actor_ids
+ .extract_if(|id, _| !all_fragment_ids.contains(id))
+ .map(|(id, _)| id)
+ .collect_vec();
+
+ if !dirty_actor_upstreams.is_empty() {
+ tracing::debug!(
+ "cleaning dirty table sink fragment {:?} from downstream fragment {} actor {}",
+ dirty_actor_upstreams,
+ fragment_id,
+ actor_id,
+ );
+
+ Actor::update_many()
+ .col_expr(
+ actor::Column::UpstreamActorIds,
+ ActorUpstreamActors::from(upstream_actor_ids).into(),
+ )
+ .filter(actor::Column::ActorId.eq(actor_id))
+ .exec(txn)
+ .await?;
+ }
+ }
+ }
+ }
+ }
+
+ Ok(true)
+ }
+
/// `finish_streaming_job` marks job related objects as `Created` and notify frontend.
pub async fn finish_streaming_job(&self, job_id: ObjectId) -> MetaResult {
let inner = self.inner.write().await;
@@ -1487,6 +1669,52 @@ impl CatalogController {
);
to_drop_objects.push(obj);
+ // Special handling for 'sink into table'.
+ if object_type != ObjectType::Sink {
+ // When dropping a table downstream, all incoming sinks of the table should be dropped as well.
+ if object_type == ObjectType::Table {
+ let table = Table::find_by_id(object_id)
+ .one(&txn)
+ .await?
+ .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?;
+
+ let incoming_sinks = table.incoming_sinks.into_inner();
+
+ if !incoming_sinks.is_empty() {
+ let objs: Vec = Object::find()
+ .filter(object::Column::Oid.is_in(incoming_sinks))
+ .into_partial_model()
+ .all(&txn)
+ .await?;
+
+ to_drop_objects.extend(objs);
+ }
+ }
+
+ let to_drop_object_ids: HashSet<_> =
+ to_drop_objects.iter().map(|obj| obj.oid).collect();
+
+ // When there is a table sink in the dependency chain of drop cascade, an error message needs to be returned currently to manually drop the sink.
+ for obj in &to_drop_objects {
+ if obj.obj_type == ObjectType::Sink {
+ let sink = Sink::find_by_id(obj.oid)
+ .one(&txn)
+ .await?
+ .ok_or_else(|| MetaError::catalog_id_not_found("sink", obj.oid))?;
+
+ // Since dropping the sink into the table requires the frontend to handle some of the logic (regenerating the plan), it’s not compatible with the current cascade dropping.
+ if let Some(target_table) = sink.target_table
+ && !to_drop_object_ids.contains(&target_table)
+ {
+ bail!(
+ "Found sink into table with sink id {} in dependency, please drop them manually",
+ obj.oid,
+ );
+ }
+ }
+ }
+ }
+
let to_drop_table_ids = to_drop_objects
.iter()
.filter(|obj| obj.obj_type == ObjectType::Table || obj.obj_type == ObjectType::Index)
@@ -1856,22 +2084,28 @@ impl CatalogController {
});
}};
}
- let objs = get_referring_objects(object_id, &txn).await?;
- // TODO: For sink into table. when sink into table is ready.
- // if object_type == ObjectType::Table {
- // let incoming_sinks: Vec<_> = Table::find_by_id(object_id)
- // .select_only()
- // .column(table::Column::IncomingSinks)
- // .into_tuple()
- // .one(&txn)
- // .await?
- // .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?;
- // objs.extend(incoming_sinks.into_iter().map(|id| PartialObject {
- // oid: id as _,
- // obj_type: ObjectType::Sink,
- // ..Default::default()
- // }));
- // }
+ let mut objs = get_referring_objects(object_id, &txn).await?;
+ if object_type == ObjectType::Table {
+ let incoming_sinks: I32Array = Table::find_by_id(object_id)
+ .select_only()
+ .column(table::Column::IncomingSinks)
+ .into_tuple()
+ .one(&txn)
+ .await?
+ .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?;
+
+ objs.extend(
+ incoming_sinks
+ .into_inner()
+ .into_iter()
+ .map(|id| PartialObject {
+ oid: id,
+ obj_type: ObjectType::Sink,
+ schema_id: None,
+ database_id: None,
+ }),
+ );
+ }
for obj in objs {
match obj.obj_type {
diff --git a/src/meta/src/controller/rename.rs b/src/meta/src/controller/rename.rs
index bde954a587fdf..15be4d7ef83b8 100644
--- a/src/meta/src/controller/rename.rs
+++ b/src/meta/src/controller/rename.rs
@@ -79,6 +79,7 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str
stmt:
CreateSinkStatement {
sink_from: CreateSink::AsQuery(query),
+ into_table_name: None,
..
},
} => {
@@ -89,9 +90,27 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str
stmt:
CreateSinkStatement {
sink_from: CreateSink::From(table_name),
+ into_table_name: None,
..
},
} => replace_table_name(table_name, to),
+ Statement::CreateSink {
+ stmt: CreateSinkStatement {
+ sink_from,
+ into_table_name: Some(table_name),
+ ..
+ }
+ } => {
+ let idx = table_name.0.len() - 1;
+ if table_name.0[idx].real_value() == from {
+ table_name.0[idx] = Ident::new_unchecked(to);
+ } else {
+ match sink_from {
+ CreateSink::From(table_name) => replace_table_name(table_name, to),
+ CreateSink::AsQuery(query) => QueryRewriter::rewrite_query(query, from, to),
+ }
+ }
+ }
_ => unreachable!(),
};
stmt.to_string()
diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs
index 9bb8af6172469..7c4360a92f285 100644
--- a/src/meta/src/controller/streaming_job.rs
+++ b/src/meta/src/controller/streaming_job.rs
@@ -16,6 +16,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::num::NonZeroUsize;
use itertools::Itertools;
+use risingwave_common::bail;
use risingwave_common::buffer::Bitmap;
use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping};
use risingwave_common::util::column_index_mapping::ColIndexMapping;
@@ -64,8 +65,8 @@ use crate::barrier::Reschedule;
use crate::controller::catalog::CatalogController;
use crate::controller::rename::ReplaceTableExprRewriter;
use crate::controller::utils::{
- check_relation_name_duplicate, ensure_object_id, ensure_user_id, get_fragment_actor_ids,
- get_fragment_mappings,
+ check_relation_name_duplicate, check_sink_into_table_cycle, ensure_object_id, ensure_user_id,
+ get_fragment_actor_ids, get_fragment_mappings,
};
use crate::controller::ObjectModel;
use crate::manager::{NotificationVersion, SinkId, StreamingJob};
@@ -141,6 +142,21 @@ impl CatalogController {
Table::insert(table).exec(&txn).await?;
}
StreamingJob::Sink(sink, _) => {
+ if let Some(target_table_id) = sink.target_table {
+ if check_sink_into_table_cycle(
+ target_table_id as ObjectId,
+ sink.dependent_relations
+ .iter()
+ .map(|id| *id as ObjectId)
+ .collect(),
+ &txn,
+ )
+ .await?
+ {
+ bail!("Creating such a sink will result in circular dependency.");
+ }
+ }
+
let job_id = Self::create_streaming_job_obj(
&txn,
ObjectType::Sink,
diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs
index 4b2e598a2c221..855112acb7167 100644
--- a/src/meta/src/controller/system_param.rs
+++ b/src/meta/src/controller/system_param.rs
@@ -186,7 +186,7 @@ impl SystemParamsController {
.await?
else {
return Err(MetaError::system_params(format!(
- "unrecognized system parameter {}",
+ "unrecognized system parameter {:?}",
name
)));
};
diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs
index ff19892d516b5..6c7e61a316add 100644
--- a/src/meta/src/controller/utils.rs
+++ b/src/meta/src/controller/utils.rs
@@ -118,6 +118,107 @@ pub fn construct_obj_dependency_query(obj_id: ObjectId) -> WithQuery {
.to_owned()
}
+/// This function will construct a query using recursive cte to find if dependent objects are already relying on the target table.
+///
+/// # Examples
+///
+/// ```
+/// use risingwave_meta::controller::utils::construct_sink_cycle_check_query;
+/// use sea_orm::sea_query::*;
+/// use sea_orm::*;
+///
+/// let query = construct_sink_cycle_check_query(1, vec![2, 3]);
+///
+/// assert_eq!(
+/// query.to_string(MysqlQueryBuilder),
+/// r#"WITH RECURSIVE `used_by_object_ids_with_sink` (`oid`, `used_by`) AS (SELECT `oid`, `used_by` FROM `object_dependency` WHERE `object_dependency`.`oid` = 1 UNION ALL (SELECT `obj_dependency_with_sink`.`oid`, `obj_dependency_with_sink`.`used_by` FROM (SELECT `oid`, `used_by` FROM `object_dependency` UNION ALL (SELECT `sink_id`, `target_table` FROM `sink` WHERE `sink`.`target_table` IS NOT NULL)) AS `obj_dependency_with_sink` INNER JOIN `used_by_object_ids_with_sink` ON `used_by_object_ids_with_sink`.`used_by` = `obj_dependency_with_sink`.`oid` WHERE `used_by_object_ids_with_sink`.`used_by` <> `used_by_object_ids_with_sink`.`oid`)) SELECT COUNT(`used_by_object_ids_with_sink`.`used_by`) FROM `used_by_object_ids_with_sink` WHERE `used_by_object_ids_with_sink`.`used_by` IN (2, 3)"#
+/// );
+/// assert_eq!(
+/// query.to_string(PostgresQueryBuilder),
+/// r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL (SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL (SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL)) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid")) SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"#
+/// );
+/// assert_eq!(
+/// query.to_string(SqliteQueryBuilder),
+/// r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid") SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"#
+/// );
+/// ```
+pub fn construct_sink_cycle_check_query(
+ target_table: ObjectId,
+ dependent_objects: Vec,
+) -> WithQuery {
+ let cte_alias = Alias::new("used_by_object_ids_with_sink");
+ let depend_alias = Alias::new("obj_dependency_with_sink");
+
+ let mut base_query = SelectStatement::new()
+ .columns([
+ object_dependency::Column::Oid,
+ object_dependency::Column::UsedBy,
+ ])
+ .from(ObjectDependency)
+ .and_where(object_dependency::Column::Oid.eq(target_table))
+ .to_owned();
+
+ let query_sink_deps = SelectStatement::new()
+ .columns([sink::Column::SinkId, sink::Column::TargetTable])
+ .from(Sink)
+ .and_where(sink::Column::TargetTable.is_not_null())
+ .to_owned();
+
+ let cte_referencing = Query::select()
+ .column((depend_alias.clone(), object_dependency::Column::Oid))
+ .column((depend_alias.clone(), object_dependency::Column::UsedBy))
+ .from_subquery(
+ SelectStatement::new()
+ .columns([
+ object_dependency::Column::Oid,
+ object_dependency::Column::UsedBy,
+ ])
+ .from(ObjectDependency)
+ .union(UnionType::All, query_sink_deps)
+ .to_owned(),
+ depend_alias.clone(),
+ )
+ .inner_join(
+ cte_alias.clone(),
+ Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).eq(Expr::col((
+ depend_alias.clone(),
+ object_dependency::Column::Oid,
+ ))),
+ )
+ .and_where(
+ Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).ne(Expr::col((
+ cte_alias.clone(),
+ object_dependency::Column::Oid,
+ ))),
+ )
+ .to_owned();
+
+ let common_table_expr = CommonTableExpression::new()
+ .query(base_query.union(UnionType::All, cte_referencing).to_owned())
+ .columns([
+ object_dependency::Column::Oid,
+ object_dependency::Column::UsedBy,
+ ])
+ .table_name(cte_alias.clone())
+ .to_owned();
+
+ SelectStatement::new()
+ .expr(Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).count())
+ .from(cte_alias.clone())
+ .and_where(
+ Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy))
+ .is_in(dependent_objects),
+ )
+ .to_owned()
+ .with(
+ WithClause::new()
+ .recursive(true)
+ .cte(common_table_expr)
+ .to_owned(),
+ )
+ .to_owned()
+}
+
#[derive(Clone, DerivePartialModel, FromQueryResult)]
#[sea_orm(entity = "Object")]
pub struct PartialObject {
@@ -175,6 +276,36 @@ where
Ok(objects)
}
+/// Check if create a sink with given dependent objects into the target table will cause a cycle, return true if it will.
+pub async fn check_sink_into_table_cycle(
+ target_table: ObjectId,
+ dependent_objs: Vec,
+ db: &C,
+) -> MetaResult
+where
+ C: ConnectionTrait,
+{
+ if dependent_objs.is_empty() {
+ return Ok(false);
+ }
+
+ let query = construct_sink_cycle_check_query(target_table, dependent_objs);
+ let (sql, values) = query.build_any(&*db.get_database_backend().get_query_builder());
+
+ let res = db
+ .query_one(Statement::from_sql_and_values(
+ db.get_database_backend(),
+ sql,
+ values,
+ ))
+ .await?
+ .unwrap();
+
+ let cnt: i64 = res.try_get_by(0)?;
+
+ Ok(cnt != 0)
+}
+
/// `ensure_object_id` ensures the existence of target object in the cluster.
pub async fn ensure_object_id(
object_type: ObjectType,
diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs
index 7f40f8e3da033..0e571a0afebf7 100644
--- a/src/meta/src/stream/scale.rs
+++ b/src/meta/src/stream/scale.rs
@@ -31,15 +31,19 @@ use risingwave_common::catalog::TableId;
use risingwave_common::hash::{ActorMapping, ParallelUnitId, VirtualNode};
use risingwave_common::util::iter_util::ZipEqDebug;
use risingwave_meta_model_v2::StreamingParallelism;
-use risingwave_pb::common::{ActorInfo, ParallelUnit, WorkerNode};
+use risingwave_pb::common::{ActorInfo, Buffer, ParallelUnit, ParallelUnitMapping, WorkerNode};
use risingwave_pb::meta::get_reschedule_plan_request::{Policy, StableResizePolicy};
use risingwave_pb::meta::subscribe_response::{Info, Operation};
use risingwave_pb::meta::table_fragments::actor_status::ActorState;
-use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType;
-use risingwave_pb::meta::table_fragments::{self, ActorStatus, Fragment, State};
+use risingwave_pb::meta::table_fragments::fragment::{
+ FragmentDistributionType, PbFragmentDistributionType,
+};
+use risingwave_pb::meta::table_fragments::{self, ActorStatus, PbFragment, State};
use risingwave_pb::meta::FragmentParallelUnitMappings;
use risingwave_pb::stream_plan::stream_node::NodeBody;
-use risingwave_pb::stream_plan::{DispatcherType, FragmentTypeFlag, StreamActor, StreamNode};
+use risingwave_pb::stream_plan::{
+ Dispatcher, DispatcherType, FragmentTypeFlag, PbStreamActor, StreamNode,
+};
use thiserror_ext::AsReport;
use tokio::sync::oneshot::Receiver;
use tokio::sync::{oneshot, RwLock, RwLockReadGuard, RwLockWriteGuard};
@@ -105,15 +109,85 @@ pub struct ParallelUnitReschedule {
pub removed_parallel_units: BTreeSet,
}
+pub struct CustomFragmentInfo {
+ pub fragment_id: u32,
+ pub fragment_type_mask: u32,
+ pub distribution_type: PbFragmentDistributionType,
+ pub vnode_mapping: Option,
+ pub state_table_ids: Vec,
+ pub upstream_fragment_ids: Vec,
+ pub actor_template: PbStreamActor,
+ pub actors: Vec,
+}
+
+#[derive(Default)]
+pub struct CustomActorInfo {
+ pub actor_id: u32,
+ pub fragment_id: u32,
+ pub dispatcher: Vec,
+ pub upstream_actor_id: Vec,
+ pub vnode_bitmap: Option,
+}
+
+impl From<&PbStreamActor> for CustomActorInfo {
+ fn from(
+ PbStreamActor {
+ actor_id,
+ fragment_id,
+ dispatcher,
+ upstream_actor_id,
+ vnode_bitmap,
+ ..
+ }: &PbStreamActor,
+ ) -> Self {
+ CustomActorInfo {
+ actor_id: *actor_id,
+ fragment_id: *fragment_id,
+ dispatcher: dispatcher.clone(),
+ upstream_actor_id: upstream_actor_id.clone(),
+ vnode_bitmap: vnode_bitmap.clone(),
+ }
+ }
+}
+
+impl From<&PbFragment> for CustomFragmentInfo {
+ fn from(fragment: &PbFragment) -> Self {
+ CustomFragmentInfo {
+ fragment_id: fragment.fragment_id,
+ fragment_type_mask: fragment.fragment_type_mask,
+ distribution_type: fragment.distribution_type(),
+ vnode_mapping: fragment.vnode_mapping.clone(),
+ state_table_ids: fragment.state_table_ids.clone(),
+ upstream_fragment_ids: fragment.upstream_fragment_ids.clone(),
+ actor_template: fragment
+ .actors
+ .first()
+ .cloned()
+ .expect("no actor in fragment"),
+ actors: fragment.actors.iter().map(CustomActorInfo::from).collect(),
+ }
+ }
+}
+
+impl CustomFragmentInfo {
+ pub fn get_fragment_type_mask(&self) -> u32 {
+ self.fragment_type_mask
+ }
+
+ pub fn distribution_type(&self) -> FragmentDistributionType {
+ self.distribution_type
+ }
+}
+
pub struct RescheduleContext {
/// Index used to map `ParallelUnitId` to `WorkerId`
parallel_unit_id_to_worker_id: BTreeMap,
/// Meta information for all Actors
- actor_map: HashMap,
+ actor_map: HashMap,
/// Status of all Actors, used to find the location of the `Actor`
actor_status: BTreeMap,
/// Meta information of all `Fragment`, used to find the `Fragment`'s `Actor`
- fragment_map: HashMap,
+ fragment_map: HashMap,
/// Indexes for all `Worker`s
worker_nodes: HashMap,
/// Index of all `Actor` upstreams, specific to `Dispatcher`
@@ -180,7 +254,7 @@ impl RescheduleContext {
///
/// The return value is the bitmap distribution after scaling, which covers all virtual node indexes
pub fn rebalance_actor_vnode(
- actors: &[StreamActor],
+ actors: &[CustomActorInfo],
actors_to_remove: &BTreeSet,
actors_to_create: &BTreeSet,
) -> HashMap {
@@ -464,16 +538,29 @@ impl ScaleController {
let mut fragment_state = HashMap::new();
let mut fragment_to_table = HashMap::new();
- let all_table_fragments = self.list_all_table_fragments().await?;
-
- for table_fragments in all_table_fragments {
+ // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future.
+ fn fulfill_index_by_table_fragments_ref(
+ actor_map: &mut HashMap,
+ fragment_map: &mut HashMap,
+ actor_status: &mut BTreeMap,
+ fragment_state: &mut HashMap,
+ fragment_to_table: &mut HashMap,
+ table_fragments: &TableFragments,
+ ) {
fragment_state.extend(
table_fragments
.fragment_ids()
.map(|f| (f, table_fragments.state())),
);
- fragment_map.extend(table_fragments.fragments.clone());
- actor_map.extend(table_fragments.actor_map());
+
+ for (fragment_id, fragment) in &table_fragments.fragments {
+ for actor in &fragment.actors {
+ actor_map.insert(actor.actor_id, CustomActorInfo::from(actor));
+ }
+
+ fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment));
+ }
+
actor_status.extend(table_fragments.actor_status.clone());
fragment_to_table.extend(
@@ -483,6 +570,37 @@ impl ScaleController {
);
}
+ match &self.metadata_manager {
+ MetadataManager::V1(mgr) => {
+ let guard = mgr.fragment_manager.get_fragment_read_guard().await;
+
+ for table_fragments in guard.table_fragments().values() {
+ fulfill_index_by_table_fragments_ref(
+ &mut actor_map,
+ &mut fragment_map,
+ &mut actor_status,
+ &mut fragment_state,
+ &mut fragment_to_table,
+ table_fragments,
+ );
+ }
+ }
+ MetadataManager::V2(_) => {
+ let all_table_fragments = self.list_all_table_fragments().await?;
+
+ for table_fragments in &all_table_fragments {
+ fulfill_index_by_table_fragments_ref(
+ &mut actor_map,
+ &mut fragment_map,
+ &mut actor_status,
+ &mut fragment_state,
+ &mut fragment_to_table,
+ table_fragments,
+ );
+ }
+ }
+ };
+
// NoShuffle relation index
let mut no_shuffle_source_fragment_ids = HashSet::new();
let mut no_shuffle_target_fragment_ids = HashSet::new();
@@ -608,7 +726,7 @@ impl ScaleController {
}
if (fragment.get_fragment_type_mask() & FragmentTypeFlag::Source as u32) != 0 {
- let stream_node = fragment.actors.first().unwrap().get_nodes().unwrap();
+ let stream_node = fragment.actor_template.nodes.as_ref().unwrap();
if TableFragments::find_stream_source(stream_node).is_some() {
stream_source_fragment_ids.insert(*fragment_id);
}
@@ -698,7 +816,7 @@ impl ScaleController {
&self,
worker_nodes: &HashMap,
actor_infos_to_broadcast: BTreeMap,
- node_actors_to_create: HashMap>,
+ node_actors_to_create: HashMap>,
broadcast_worker_ids: HashSet,
) -> MetaResult<()> {
self.stream_rpc_manager
@@ -963,7 +1081,7 @@ impl ScaleController {
for (actor_to_create, sample_actor) in actors_to_create
.iter()
- .zip_eq_debug(repeat(fragment.actors.first().unwrap()).take(actors_to_create.len()))
+ .zip_eq_debug(repeat(&fragment.actor_template).take(actors_to_create.len()))
{
let new_actor_id = actor_to_create.0;
let mut new_actor = sample_actor.clone();
@@ -1407,7 +1525,7 @@ impl ScaleController {
fragment_actor_bitmap: &HashMap>,
no_shuffle_upstream_actor_map: &HashMap>,
no_shuffle_downstream_actors_map: &HashMap>,
- new_actor: &mut StreamActor,
+ new_actor: &mut PbStreamActor,
) -> MetaResult<()> {
let fragment = &ctx.fragment_map.get(&new_actor.fragment_id).unwrap();
let mut applied_upstream_fragment_actor_ids = HashMap::new();
@@ -1953,8 +2071,6 @@ impl ScaleController {
})
.collect::>();
- let all_table_fragments = self.list_all_table_fragments().await?;
-
// FIXME: only need actor id and dispatcher info, avoid clone it.
let mut actor_map = HashMap::new();
let mut actor_status = HashMap::new();
@@ -1962,24 +2078,56 @@ impl ScaleController {
let mut fragment_map = HashMap::new();
let mut fragment_parallelism = HashMap::new();
- for table_fragments in all_table_fragments {
- for (fragment_id, fragment) in table_fragments.fragments {
- fragment
- .actors
- .iter()
- .map(|actor| (actor.actor_id, actor))
- .for_each(|(id, actor)| {
- actor_map.insert(id as ActorId, actor.clone());
- });
+ // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future.
+ fn fulfill_index_by_table_fragments_ref(
+ actor_map: &mut HashMap,
+ actor_status: &mut HashMap,
+ fragment_map: &mut HashMap,
+ fragment_parallelism: &mut HashMap,
+ table_fragments: &TableFragments,
+ ) {
+ for (fragment_id, fragment) in &table_fragments.fragments {
+ for actor in &fragment.actors {
+ actor_map.insert(actor.actor_id, CustomActorInfo::from(actor));
+ }
- fragment_map.insert(fragment_id, fragment);
+ fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment));
- fragment_parallelism.insert(fragment_id, table_fragments.assigned_parallelism);
+ fragment_parallelism.insert(*fragment_id, table_fragments.assigned_parallelism);
}
- actor_status.extend(table_fragments.actor_status);
+ actor_status.extend(table_fragments.actor_status.clone());
}
+ match &self.metadata_manager {
+ MetadataManager::V1(mgr) => {
+ let guard = mgr.fragment_manager.get_fragment_read_guard().await;
+
+ for table_fragments in guard.table_fragments().values() {
+ fulfill_index_by_table_fragments_ref(
+ &mut actor_map,
+ &mut actor_status,
+ &mut fragment_map,
+ &mut fragment_parallelism,
+ table_fragments,
+ );
+ }
+ }
+ MetadataManager::V2(_) => {
+ let all_table_fragments = self.list_all_table_fragments().await?;
+
+ for table_fragments in &all_table_fragments {
+ fulfill_index_by_table_fragments_ref(
+ &mut actor_map,
+ &mut actor_status,
+ &mut fragment_map,
+ &mut fragment_parallelism,
+ table_fragments,
+ );
+ }
+ }
+ };
+
let mut no_shuffle_source_fragment_ids = HashSet::new();
let mut no_shuffle_target_fragment_ids = HashSet::new();
@@ -2034,7 +2182,7 @@ impl ScaleController {
},
) in fragment_worker_changes
{
- let fragment = match fragment_map.get(&fragment_id).cloned() {
+ let fragment = match fragment_map.get(&fragment_id) {
None => bail!("Fragment id {} not found", fragment_id),
Some(fragment) => fragment,
};
@@ -2122,7 +2270,7 @@ impl ScaleController {
// then we re-add the limited parallel units from the limited workers
target_parallel_unit_ids.extend(limited_worker_parallel_unit_ids.into_iter());
}
- match fragment.get_distribution_type().unwrap() {
+ match fragment.distribution_type() {
FragmentDistributionType::Unspecified => unreachable!(),
FragmentDistributionType::Single => {
let single_parallel_unit_id =
@@ -2274,7 +2422,7 @@ impl ScaleController {
}
pub fn build_no_shuffle_relation_index(
- actor_map: &HashMap,
+ actor_map: &HashMap,
no_shuffle_source_fragment_ids: &mut HashSet,
no_shuffle_target_fragment_ids: &mut HashSet,
) {
@@ -2302,7 +2450,7 @@ impl ScaleController {
}
pub fn build_fragment_dispatcher_index(
- actor_map: &HashMap,
+ actor_map: &HashMap,
fragment_dispatcher_map: &mut HashMap>,
) {
for actor in actor_map.values() {
@@ -2324,7 +2472,7 @@ impl ScaleController {
pub fn resolve_no_shuffle_upstream_tables(
fragment_ids: HashSet,
- fragment_map: &HashMap,
+ fragment_map: &HashMap,
no_shuffle_source_fragment_ids: &HashSet,
no_shuffle_target_fragment_ids: &HashSet,
fragment_to_table: &HashMap,
@@ -2394,7 +2542,7 @@ impl ScaleController {
pub fn resolve_no_shuffle_upstream_fragments(
reschedule: &mut HashMap,
- fragment_map: &HashMap,
+ fragment_map: &HashMap,
no_shuffle_source_fragment_ids: &HashSet,
no_shuffle_target_fragment_ids: &HashSet,
) -> MetaResult<()>
diff --git a/src/meta/src/stream/test_scale.rs b/src/meta/src/stream/test_scale.rs
index 2db55dbddbd4d..73d59ff52f2f4 100644
--- a/src/meta/src/stream/test_scale.rs
+++ b/src/meta/src/stream/test_scale.rs
@@ -21,10 +21,10 @@ mod tests {
use risingwave_common::buffer::Bitmap;
use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping, VirtualNode};
use risingwave_pb::common::ParallelUnit;
- use risingwave_pb::stream_plan::StreamActor;
use crate::model::ActorId;
use crate::stream::scale::rebalance_actor_vnode;
+ use crate::stream::CustomActorInfo;
fn simulated_parallel_unit_nums(min: Option, max: Option) -> Vec {
let mut raw = vec![1, 3, 12, 42, VirtualNode::COUNT];
@@ -39,13 +39,13 @@ mod tests {
raw
}
- fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec {
+ fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec {
let parallel_units = generate_parallel_units(info);
let vnode_bitmaps = ParallelUnitMapping::build(¶llel_units).to_bitmaps();
info.iter()
- .map(|(actor_id, parallel_unit_id)| StreamActor {
+ .map(|(actor_id, parallel_unit_id)| CustomActorInfo {
actor_id: *actor_id,
vnode_bitmap: vnode_bitmaps
.get(parallel_unit_id)
@@ -64,7 +64,7 @@ mod tests {
.collect_vec()
}
- fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &StreamActor) {
+ fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &CustomActorInfo) {
let prev_bitmap = Bitmap::from(actor.vnode_bitmap.as_ref().unwrap());
for idx in 0..VirtualNode::COUNT {
diff --git a/src/object_store/Cargo.toml b/src/object_store/Cargo.toml
index a7ae9a8bfb70d..5acc52937f4ba 100644
--- a/src/object_store/Cargo.toml
+++ b/src/object_store/Cargo.toml
@@ -27,7 +27,7 @@ hyper-rustls = { version = "0.24.2", features = ["webpki-roots"] }
hyper-tls = "0.5.0"
itertools = "0.12"
madsim = "0.2.22"
-opendal = "0.44"
+opendal = "0.44.2"
prometheus = { version = "0.13", features = ["process"] }
risingwave_common = { workspace = true }
rustls = "0.21.8"
diff --git a/src/object_store/src/object/mod.rs b/src/object_store/src/object/mod.rs
index 5399b6d253b2f..d9ae0bc37b868 100644
--- a/src/object_store/src/object/mod.rs
+++ b/src/object_store/src/object/mod.rs
@@ -818,15 +818,27 @@ pub async fn build_remote_object_store(
config: ObjectStoreConfig,
) -> ObjectStoreImpl {
match url {
- s3 if s3.starts_with("s3://") => ObjectStoreImpl::S3(
- S3ObjectStore::new_with_config(
- s3.strip_prefix("s3://").unwrap().to_string(),
- metrics.clone(),
- config,
- )
- .await
- .monitored(metrics),
- ),
+ s3 if s3.starts_with("s3://") => {
+ if std::env::var("RW_USE_OPENDAL_FOR_S3").is_ok() {
+ let bucket = s3.strip_prefix("s3://").unwrap();
+
+ ObjectStoreImpl::Opendal(
+ OpendalObjectStore::new_s3_engine(bucket.to_string(), config)
+ .unwrap()
+ .monitored(metrics),
+ )
+ } else {
+ ObjectStoreImpl::S3(
+ S3ObjectStore::new_with_config(
+ s3.strip_prefix("s3://").unwrap().to_string(),
+ metrics.clone(),
+ config,
+ )
+ .await
+ .monitored(metrics),
+ )
+ }
+ }
#[cfg(feature = "hdfs-backend")]
hdfs if hdfs.starts_with("hdfs://") => {
let hdfs = hdfs.strip_prefix("hdfs://").unwrap();
diff --git a/src/object_store/src/object/opendal_engine/fs.rs b/src/object_store/src/object/opendal_engine/fs.rs
index 23d7dcbd503e8..ece3555d5b777 100644
--- a/src/object_store/src/object/opendal_engine/fs.rs
+++ b/src/object_store/src/object/opendal_engine/fs.rs
@@ -17,15 +17,17 @@ use opendal::services::Fs;
use opendal::Operator;
use super::{EngineType, OpendalObjectStore};
+use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
use crate::object::ObjectResult;
+
impl OpendalObjectStore {
/// create opendal fs engine.
pub fn new_fs_engine(root: String) -> ObjectResult {
// Create fs backend builder.
let mut builder = Fs::default();
-
builder.root(&root);
-
+ let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR);
+ builder.atomic_write_dir(&atomic_write_dir);
let op: Operator = Operator::new(builder)?
.layer(RetryLayer::default())
.finish();
diff --git a/src/object_store/src/object/opendal_engine/hdfs.rs b/src/object_store/src/object/opendal_engine/hdfs.rs
index b52be4094df80..12ee292a85416 100644
--- a/src/object_store/src/object/opendal_engine/hdfs.rs
+++ b/src/object_store/src/object/opendal_engine/hdfs.rs
@@ -17,7 +17,9 @@ use opendal::services::Hdfs;
use opendal::Operator;
use super::{EngineType, OpendalObjectStore};
+use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
use crate::object::ObjectResult;
+
impl OpendalObjectStore {
/// create opendal hdfs engine.
pub fn new_hdfs_engine(namenode: String, root: String) -> ObjectResult {
@@ -26,7 +28,8 @@ impl OpendalObjectStore {
// Set the name node for hdfs.
builder.name_node(&namenode);
builder.root(&root);
-
+ let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR);
+ builder.atomic_write_dir(&atomic_write_dir);
let op: Operator = Operator::new(builder)?
.layer(LoggingLayer::default())
.layer(RetryLayer::default())
diff --git a/src/object_store/src/object/opendal_engine/mod.rs b/src/object_store/src/object/opendal_engine/mod.rs
index 1620ee30da7d7..c1ab929d5586f 100644
--- a/src/object_store/src/object/opendal_engine/mod.rs
+++ b/src/object_store/src/object/opendal_engine/mod.rs
@@ -26,8 +26,11 @@ pub mod gcs;
pub mod obs;
-pub mod oss;
-
pub mod azblob;
+pub mod opendal_s3;
+pub mod oss;
pub mod fs;
+
+// To make sure the the operation is consistent, we should specially set `atomic_write_dir` for fs, hdfs and webhdfs services.
+const ATOMIC_WRITE_DIR: &str = "atomic_write_dir/";
diff --git a/src/object_store/src/object/opendal_engine/opendal_object_store.rs b/src/object_store/src/object/opendal_engine/opendal_object_store.rs
index 19bddcfc7ac52..122506d37cdfa 100644
--- a/src/object_store/src/object/opendal_engine/opendal_object_store.rs
+++ b/src/object_store/src/object/opendal_engine/opendal_object_store.rs
@@ -38,6 +38,7 @@ pub enum EngineType {
Memory,
Hdfs,
Gcs,
+ S3,
Obs,
Oss,
Webhdfs,
@@ -158,7 +159,7 @@ impl ObjectStore for OpendalObjectStore {
.op
.lister_with(prefix)
.recursive(true)
- .metakey(Metakey::ContentLength | Metakey::ContentType)
+ .metakey(Metakey::ContentLength)
.await?;
let stream = stream::unfold(object_lister, |mut object_lister| async move {
@@ -190,6 +191,7 @@ impl ObjectStore for OpendalObjectStore {
match self.engine_type {
EngineType::Memory => "Memory",
EngineType::Hdfs => "Hdfs",
+ EngineType::S3 => "S3",
EngineType::Gcs => "Gcs",
EngineType::Obs => "Obs",
EngineType::Oss => "Oss",
@@ -206,7 +208,11 @@ pub struct OpendalStreamingUploader {
}
impl OpendalStreamingUploader {
pub async fn new(op: Operator, path: String) -> ObjectResult {
- let writer = op.writer_with(&path).buffer(OPENDAL_BUFFER_SIZE).await?;
+ let writer = op
+ .writer_with(&path)
+ .concurrent(8)
+ .buffer(OPENDAL_BUFFER_SIZE)
+ .await?;
Ok(Self { writer })
}
}
diff --git a/src/object_store/src/object/opendal_engine/opendal_s3.rs b/src/object_store/src/object/opendal_engine/opendal_s3.rs
new file mode 100644
index 0000000000000..c10aff55d342b
--- /dev/null
+++ b/src/object_store/src/object/opendal_engine/opendal_s3.rs
@@ -0,0 +1,63 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::time::Duration;
+
+use opendal::layers::{LoggingLayer, RetryLayer};
+use opendal::services::S3;
+use opendal::Operator;
+use risingwave_common::config::ObjectStoreConfig;
+
+use super::{EngineType, OpendalObjectStore};
+use crate::object::ObjectResult;
+
+impl OpendalObjectStore {
+ /// create opendal s3 engine.
+ pub fn new_s3_engine(
+ bucket: String,
+ object_store_config: ObjectStoreConfig,
+ ) -> ObjectResult {
+ // Create s3 builder.
+ let mut builder = S3::default();
+ builder.bucket(&bucket);
+ // For AWS S3, there is no need to set an endpoint; for other S3 compatible object stores, it is necessary to set this field.
+ if let Ok(endpoint_url) = std::env::var("RW_S3_ENDPOINT") {
+ builder.endpoint(&endpoint_url);
+ }
+
+ if std::env::var("RW_IS_FORCE_PATH_STYLE").is_err() {
+ builder.enable_virtual_host_style();
+ }
+
+ let op: Operator = Operator::new(builder)?
+ .layer(LoggingLayer::default())
+ .layer(
+ RetryLayer::new()
+ .with_min_delay(Duration::from_millis(
+ object_store_config.s3.object_store_req_retry_interval_ms,
+ ))
+ .with_max_delay(Duration::from_millis(
+ object_store_config.s3.object_store_req_retry_max_delay_ms,
+ ))
+ .with_max_times(object_store_config.s3.object_store_req_retry_max_attempts)
+ .with_factor(1.0)
+ .with_jitter(),
+ )
+ .finish();
+ Ok(Self {
+ op,
+ engine_type: EngineType::S3,
+ })
+ }
+}
diff --git a/src/object_store/src/object/opendal_engine/webhdfs.rs b/src/object_store/src/object/opendal_engine/webhdfs.rs
index ff61b39ec9e79..1f6b87b44fd5e 100644
--- a/src/object_store/src/object/opendal_engine/webhdfs.rs
+++ b/src/object_store/src/object/opendal_engine/webhdfs.rs
@@ -17,6 +17,7 @@ use opendal::services::Webhdfs;
use opendal::Operator;
use super::{EngineType, OpendalObjectStore};
+use crate::object::opendal_engine::ATOMIC_WRITE_DIR;
use crate::object::ObjectResult;
impl OpendalObjectStore {
@@ -30,6 +31,8 @@ impl OpendalObjectStore {
// NOTE: the root must be absolute path.
builder.root(&root);
+ let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR);
+ builder.atomic_write_dir(&atomic_write_dir);
let op: Operator = Operator::new(builder)?
.layer(LoggingLayer::default())
.layer(RetryLayer::default())
diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs
index 3dd923b610542..e876a197c265d 100644
--- a/src/sqlparser/src/ast/statement.rs
+++ b/src/sqlparser/src/ast/statement.rs
@@ -94,6 +94,7 @@ pub struct CreateSourceStatement {
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum Format {
Native,
+ None, // Keyword::NONE
Debezium, // Keyword::DEBEZIUM
DebeziumMongo, // Keyword::DEBEZIUM_MONGO
Maxwell, // Keyword::MAXWELL
@@ -116,6 +117,7 @@ impl fmt::Display for Format {
Format::Canal => "CANAL",
Format::Upsert => "UPSERT",
Format::Plain => "PLAIN",
+ Format::None => "NONE",
}
)
}
@@ -149,6 +151,7 @@ pub enum Encode {
Protobuf, // Keyword::PROTOBUF
Json, // Keyword::JSON
Bytes, // Keyword::BYTES
+ None, // Keyword::None
Native,
Template,
}
@@ -167,6 +170,7 @@ impl fmt::Display for Encode {
Encode::Bytes => "BYTES",
Encode::Native => "NATIVE",
Encode::Template => "TEMPLATE",
+ Encode::None => "NONE",
}
)
}
@@ -249,6 +253,18 @@ impl Parser {
} else {
ConnectorSchema::native().into()
})
+ } else if connector.contains("iceberg") {
+ let expected = ConnectorSchema::none();
+ if self.peek_source_schema_format() {
+ let schema = parse_source_schema(self)?.into_v2();
+ if schema != expected {
+ return Err(ParserError::ParserError(format!(
+ "Row format for iceberg connectors should be \
+ either omitted or set to `{expected}`",
+ )));
+ }
+ }
+ Ok(expected.into())
} else {
Ok(parse_source_schema(self)?)
}
@@ -304,6 +320,16 @@ impl ConnectorSchema {
}
}
+ /// Create a new source schema with `None` format and encoding.
+ /// Used for self-explanatory source like iceberg.
+ pub const fn none() -> Self {
+ ConnectorSchema {
+ format: Format::None,
+ row_encode: Encode::None,
+ row_options: Vec::new(),
+ }
+ }
+
pub fn row_options(&self) -> &[SqlOption] {
self.row_options.as_ref()
}
diff --git a/src/storage/src/hummock/file_cache/store.rs b/src/storage/src/hummock/file_cache/store.rs
index 3435227bd317b..c640ba8f1db58 100644
--- a/src/storage/src/hummock/file_cache/store.rs
+++ b/src/storage/src/hummock/file_cache/store.rs
@@ -701,13 +701,8 @@ mod tests {
builder.add_for_test(construct_full_key_struct(0, b"k3", 3), b"v03");
builder.add_for_test(construct_full_key_struct(0, b"k4", 4), b"v04");
- Box::new(
- Block::decode(
- builder.build().to_vec().into(),
- builder.uncompressed_block_size(),
- )
- .unwrap(),
- )
+ let uncompress = builder.uncompressed_block_size();
+ Box::new(Block::decode(builder.build().to_vec().into(), uncompress).unwrap())
}
fn sstable_for_test() -> Sstable {
diff --git a/src/storage/src/hummock/sstable/block.rs b/src/storage/src/hummock/sstable/block.rs
index 3d0b4f8c0f770..fe465bba5b41f 100644
--- a/src/storage/src/hummock/sstable/block.rs
+++ b/src/storage/src/hummock/sstable/block.rs
@@ -215,20 +215,20 @@ impl Block {
let mut decoder = lz4::Decoder::new(compressed_data.reader())
.map_err(HummockError::decode_error)?;
let mut decoded = Vec::with_capacity(uncompressed_capacity);
- decoder
+ let read_size = decoder
.read_to_end(&mut decoded)
.map_err(HummockError::decode_error)?;
- debug_assert_eq!(decoded.capacity(), uncompressed_capacity);
+ assert_eq!(read_size, uncompressed_capacity);
Bytes::from(decoded)
}
CompressionAlgorithm::Zstd => {
let mut decoder = zstd::Decoder::new(compressed_data.reader())
.map_err(HummockError::decode_error)?;
let mut decoded = Vec::with_capacity(uncompressed_capacity);
- decoder
+ let read_size = decoder
.read_to_end(&mut decoded)
.map_err(HummockError::decode_error)?;
- debug_assert_eq!(decoded.capacity(), uncompressed_capacity);
+ assert_eq!(read_size, uncompressed_capacity);
Bytes::from(decoded)
}
};
@@ -445,6 +445,8 @@ impl Default for BlockBuilderOptions {
pub struct BlockBuilder {
/// Write buffer.
buf: BytesMut,
+ /// Compress buffer
+ compress_buf: BytesMut,
/// Entry interval between restart points.
restart_count: usize,
/// Restart points.
@@ -465,8 +467,9 @@ pub struct BlockBuilder {
impl BlockBuilder {
pub fn new(options: BlockBuilderOptions) -> Self {
Self {
- // add more space to avoid re-allocate space.
- buf: BytesMut::with_capacity(options.capacity + 256),
+ // add more space to avoid re-allocate space. (for restart_points and restart_points_type_index)
+ buf: BytesMut::with_capacity(Self::buf_reserve_size(&options)),
+ compress_buf: BytesMut::default(),
restart_count: options.restart_interval,
restart_points: Vec::with_capacity(
options.capacity / DEFAULT_ENTRY_SIZE / options.restart_interval + 1,
@@ -664,22 +667,35 @@ impl BlockBuilder {
);
self.buf.put_u32_le(self.table_id.unwrap());
- if self.compression_algorithm != CompressionAlgorithm::None {
- self.buf = Self::compress(&self.buf[..], self.compression_algorithm);
- }
+ let result_buf = if self.compression_algorithm != CompressionAlgorithm::None {
+ self.compress_buf.clear();
+ self.compress_buf = Self::compress(
+ &self.buf[..],
+ self.compression_algorithm,
+ std::mem::take(&mut self.compress_buf),
+ );
+
+ &mut self.compress_buf
+ } else {
+ &mut self.buf
+ };
- self.compression_algorithm.encode(&mut self.buf);
- let checksum = xxhash64_checksum(&self.buf);
- self.buf.put_u64_le(checksum);
+ self.compression_algorithm.encode(result_buf);
+ let checksum = xxhash64_checksum(result_buf);
+ result_buf.put_u64_le(checksum);
assert!(
- self.buf.len() < (u32::MAX) as usize,
+ result_buf.len() < (u32::MAX) as usize,
"buf_len {} entry_count {} table {:?}",
- self.buf.len(),
+ result_buf.len(),
self.entry_count,
self.table_id
);
- self.buf.as_ref()
+ if self.compression_algorithm != CompressionAlgorithm::None {
+ self.compress_buf.as_ref()
+ } else {
+ self.buf.as_ref()
+ }
}
pub fn compress_block(
@@ -693,21 +709,29 @@ impl BlockBuilder {
let compression = CompressionAlgorithm::decode(&mut &buf[buf.len() - 9..buf.len() - 8])?;
let compressed_data = &buf[..buf.len() - 9];
assert_eq!(compression, CompressionAlgorithm::None);
- let mut writer = Self::compress(compressed_data, target_compression);
+ let mut compress_writer = Self::compress(
+ compressed_data,
+ target_compression,
+ BytesMut::with_capacity(buf.len()),
+ );
- target_compression.encode(&mut writer);
- let checksum = xxhash64_checksum(&writer);
- writer.put_u64_le(checksum);
- Ok(writer.freeze())
+ target_compression.encode(&mut compress_writer);
+ let checksum = xxhash64_checksum(&compress_writer);
+ compress_writer.put_u64_le(checksum);
+ Ok(compress_writer.freeze())
}
- pub fn compress(buf: &[u8], compression_algorithm: CompressionAlgorithm) -> BytesMut {
+ pub fn compress(
+ buf: &[u8],
+ compression_algorithm: CompressionAlgorithm,
+ compress_writer: BytesMut,
+ ) -> BytesMut {
match compression_algorithm {
CompressionAlgorithm::None => unreachable!(),
CompressionAlgorithm::Lz4 => {
let mut encoder = lz4::EncoderBuilder::new()
.level(4)
- .build(BytesMut::with_capacity(buf.len()).writer())
+ .build(compress_writer.writer())
.map_err(HummockError::encode_error)
.unwrap();
encoder
@@ -719,10 +743,9 @@ impl BlockBuilder {
writer.into_inner()
}
CompressionAlgorithm::Zstd => {
- let mut encoder =
- zstd::Encoder::new(BytesMut::with_capacity(buf.len()).writer(), 4)
- .map_err(HummockError::encode_error)
- .unwrap();
+ let mut encoder = zstd::Encoder::new(compress_writer.writer(), 4)
+ .map_err(HummockError::encode_error)
+ .unwrap();
encoder
.write_all(buf)
.map_err(HummockError::encode_error)
@@ -762,6 +785,10 @@ impl BlockBuilder {
pub fn table_id(&self) -> Option {
self.table_id
}
+
+ fn buf_reserve_size(option: &BlockBuilderOptions) -> usize {
+ option.capacity + 1024 + 256
+ }
}
#[cfg(test)]
diff --git a/src/storage/src/hummock/sstable/builder.rs b/src/storage/src/hummock/sstable/builder.rs
index 4fe331f677321..ebaa60e167056 100644
--- a/src/storage/src/hummock/sstable/builder.rs
+++ b/src/storage/src/hummock/sstable/builder.rs
@@ -240,7 +240,6 @@ impl SstableBuilder {
self.add(full_key, value).await
}
- /// only for test
pub fn current_block_size(&self) -> usize {
self.block_builder.approximate_len()
}
@@ -344,6 +343,12 @@ impl SstableBuilder {
|| !user_key(&self.raw_key).eq(user_key(&self.last_full_key));
let table_id = full_key.user_key.table_id.table_id();
let is_new_table = self.last_table_id.is_none() || self.last_table_id.unwrap() != table_id;
+ let current_block_size = self.current_block_size();
+ let is_block_full = current_block_size >= self.options.block_capacity
+ || (current_block_size > self.options.block_capacity / 4 * 3
+ && current_block_size + self.raw_value.len() + self.raw_key.len()
+ > self.options.block_capacity);
+
if is_new_table {
assert!(
could_switch_block,
@@ -356,9 +361,7 @@ impl SstableBuilder {
if !self.block_builder.is_empty() {
self.build_block().await?;
}
- } else if self.block_builder.approximate_len() >= self.options.block_capacity
- && could_switch_block
- {
+ } else if is_block_full && could_switch_block {
self.build_block().await?;
}
self.last_table_stats.total_key_count += 1;
@@ -704,6 +707,15 @@ impl SstableBuilder {
data_len, block_meta.offset
)
});
+
+ if data_len as usize > self.options.capacity * 2 {
+ tracing::warn!(
+ "WARN unexpected block size {} table {:?}",
+ data_len,
+ self.block_builder.table_id()
+ );
+ }
+
self.block_builder.clear();
Ok(())
}
diff --git a/src/storage/src/monitor/hummock_state_store_metrics.rs b/src/storage/src/monitor/hummock_state_store_metrics.rs
index 6954263010333..5932185ecd5f7 100644
--- a/src/storage/src/monitor/hummock_state_store_metrics.rs
+++ b/src/storage/src/monitor/hummock_state_store_metrics.rs
@@ -291,11 +291,6 @@ impl HummockStateStoreMetrics {
registry
)
.unwrap();
- let spill_task_counts = RelabeledCounterVec::with_metric_level(
- MetricLevel::Debug,
- spill_task_counts,
- metric_level,
- );
let spill_task_size = register_int_counter_vec_with_registry!(
"state_store_spill_task_size",
@@ -304,11 +299,6 @@ impl HummockStateStoreMetrics {
registry
)
.unwrap();
- let spill_task_size = RelabeledCounterVec::with_metric_level(
- MetricLevel::Debug,
- spill_task_size,
- metric_level,
- );
let uploader_uploading_task_size = GenericGauge::new(
"state_store_uploader_uploading_task_size",
@@ -327,10 +317,11 @@ impl HummockStateStoreMetrics {
)
.unwrap();
let read_req_bloom_filter_positive_counts =
- RelabeledGuardedIntCounterVec::with_metric_level(
+ RelabeledGuardedIntCounterVec::with_metric_level_relabel_n(
MetricLevel::Info,
read_req_bloom_filter_positive_counts,
metric_level,
+ 1,
);
let read_req_positive_but_non_exist_counts = register_guarded_int_counter_vec_with_registry!(
diff --git a/src/stream/src/cache/managed_lru.rs b/src/stream/src/cache/managed_lru.rs
index d91eb664d43a2..9773f3fb51bf0 100644
--- a/src/stream/src/cache/managed_lru.rs
+++ b/src/stream/src/cache/managed_lru.rs
@@ -156,6 +156,14 @@ impl(&self, k: &Q) -> Option<&V>
+ where
+ K: Borrow,
+ Q: Hash + Eq + ?Sized,
+ {
+ self.inner.peek(k)
+ }
+
pub fn peek_mut(&mut self, k: &K) -> Option> {
let v = self.inner.peek_mut(k);
v.map(|inner| {
diff --git a/src/stream/src/executor/source/source_executor.rs b/src/stream/src/executor/source/source_executor.rs
index 8ad653c5f8397..e2567bb141492 100644
--- a/src/stream/src/executor/source/source_executor.rs
+++ b/src/stream/src/executor/source/source_executor.rs
@@ -24,7 +24,7 @@ use risingwave_common::system_param::local_manager::SystemParamsReaderRef;
use risingwave_common::system_param::reader::SystemParamsRead;
use risingwave_connector::source::reader::desc::{SourceDesc, SourceDescBuilder};
use risingwave_connector::source::{
- BoxChunkSourceStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitMetaData,
+ BoxChunkSourceStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitId, SplitMetaData,
};
use risingwave_connector::ConnectorParams;
use risingwave_storage::StateStore;
@@ -138,13 +138,21 @@ impl SourceExecutor {
]
}
- /// Returns `target_states` if split changed. Otherwise `None`.
+ /// - `should_trim_state`: whether to trim state for dropped splits.
+ ///
+ /// For scaling, the connector splits can be migrated to other actors, but
+ /// won't be added or removed. Actors should not trim states for splits that
+ /// are moved to other actors.
+ ///
+ /// For source split change, split will not be migrated and we can trim states
+ /// for deleted splits.
async fn apply_split_change(
&mut self,
source_desc: &SourceDesc,
stream: &mut StreamReaderWithPause,
split_assignment: &HashMap>,
- ) -> StreamExecutorResult