diff --git a/Cargo.lock b/Cargo.lock index ec284f3ba4778..1c074e276553a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -685,19 +685,6 @@ dependencies = [ "futures-core", ] -[[package]] -name = "async-compat" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b48b4ff0c2026db683dea961cd8ea874737f56cffca86fa84415eaddc51c00d" -dependencies = [ - "futures-core", - "futures-io", - "once_cell", - "pin-project-lite", - "tokio", -] - [[package]] name = "async-compression" version = "0.4.5" @@ -6658,12 +6645,11 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "opendal" -version = "0.44.0" +version = "0.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c32736a48ef08a5d2212864e2295c8e54f4d6b352b7f49aa0c29a12fc410ff66" +checksum = "4af824652d4d2ffabf606d337a071677ae621b05622adf35df9562f69d9b4498" dependencies = [ "anyhow", - "async-compat", "async-trait", "backon", "base64 0.21.4", @@ -6676,9 +6662,7 @@ dependencies = [ "log", "md-5", "once_cell", - "parking_lot 0.12.1", "percent-encoding", - "pin-project", "prometheus", "quick-xml 0.30.0", "reqsign", @@ -7516,6 +7500,7 @@ dependencies = [ "postgres-protocol", "serde", "serde_json", + "uuid", ] [[package]] @@ -9058,6 +9043,7 @@ dependencies = [ "tracing-test", "url", "urlencoding", + "uuid", "walkdir", "with_options", "workspace-hack", diff --git a/Makefile.toml b/Makefile.toml index 983b304d74e51..8820acf67c7bd 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -757,10 +757,10 @@ tmux list-windows -t risedev -F "#{window_name} #{pane_id}" \ if [[ -n $(tmux list-windows -t risedev | grep kafka) ]]; then echo "kill kafka" - kill_kafka + kill_kafka || true echo "kill zookeeper" - kill_zookeeper + kill_zookeeper || true # Kill their tmux sessions tmux list-windows -t risedev -F "#{pane_id}" | xargs -I {} tmux send-keys -t {} C-c C-d diff --git a/README.md b/README.md index 44443cfab8282..1611af1815175 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ Don’t have Docker? Learn how to install RisingWave on Mac, Ubuntu, and other e ## Production deployments -For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-trial/?method=docker-compose). +For **single-node deployment**, please refer to [Docker Compose](https://docs.risingwave.com/docs/current/risingwave-docker-compose/). For **distributed deployment**, please refer to [Kubernetes with Helm](https://docs.risingwave.com/docs/current/risingwave-k8s-helm/) or [Kubernetes with Operator](https://docs.risingwave.com/docs/current/risingwave-kubernetes/). diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 4a9f2970b84c7..db017be647376 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -88,10 +88,27 @@ services: - message_queue - elasticsearch - clickhouse-server - - pulsar + - redis-server + - pulsar-server + - cassandra-server + - starrocks-fe-server + - starrocks-be-server volumes: - ..:/risingwave + sink-doris-env: + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231109 + depends_on: + - doris-fe-server + - doris-be-server + volumes: + - ..:/risingwave + command: > + sh -c "sudo sysctl -w vm.max_map_count=2000000" + networks: + mynetwork: + ipv4_address: 172.121.0.4 + rw-build-env: image: public.ecr.aws/x5u3w5h6/rw-build-env:v20240213 volumes: @@ -159,10 +176,96 @@ services: expose: - 9009 -# Temporary workaround for json schema registry test since redpanda only supports -# protobuf/avro schema registry. Should be removed after the support. -# Related tracking issue: -# https://github.com/redpanda-data/redpanda/issues/1878 + redis-server: + container_name: redis-server + image: 'redis:latest' + expose: + - 6379 + ports: + - 6378:6379 + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 30s + retries: 50 + + doris-fe-server: + platform: linux/amd64 + image: apache/doris:2.0.0_alpha-fe-x86_64 + hostname: doris-fe-server + command: > + sh -c "sudo sysctl -w vm.max_map_count=2000000" + environment: + - FE_SERVERS=fe1:172.121.0.2:9010 + - FE_ID=1 + ports: + - "8030:8030" + - "9030:9030" + networks: + mynetwork: + ipv4_address: 172.121.0.2 + + doris-be-server: + platform: linux/amd64 + image: apache/doris:2.0.0_alpha-be-x86_64 + hostname: doris-be-server + command: > + sh -c "sudo sysctl -w vm.max_map_count=2000000" + environment: + - FE_SERVERS=fe1:172.121.0.2:9010 + - BE_ADDR=172.121.0.3:9050 + depends_on: + - doris-fe-server + ports: + - "9050:9050" + networks: + mynetwork: + ipv4_address: 172.121.0.3 + + cassandra-server: + container_name: cassandra-server + image: cassandra:4.0 + ports: + - 9042:9042 + environment: + - CASSANDRA_CLUSTER_NAME=cloudinfra + + starrocks-fe-server: + container_name: starrocks-fe-server + image: starrocks/fe-ubuntu:3.1.7 + hostname: starrocks-fe-server + command: + /opt/starrocks/fe/bin/start_fe.sh + ports: + - 28030:8030 + - 29020:9020 + - 29030:9030 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9030"] + interval: 5s + timeout: 5s + retries: 30 + + starrocks-be-server: + image: starrocks/be-ubuntu:3.1.7 + command: + - /bin/bash + - -c + - | + sleep 15s; mysql --connect-timeout 2 -h starrocks-fe-server -P9030 -uroot -e "alter system add backend \"starrocks-be-server:9050\";" + /opt/starrocks/be/bin/start_be.sh + ports: + - 28040:8040 + - 29050:9050 + hostname: starrocks-be-server + container_name: starrocks-be-server + depends_on: + - starrocks-fe-server + +# # Temporary workaround for json schema registry test since redpanda only supports +# # protobuf/avro schema registry. Should be removed after the support. +# # Related tracking issue: +# # https://github.com/redpanda-data/redpanda/issues/1878 zookeeper: container_name: zookeeper image: confluentinc/cp-zookeeper:latest @@ -201,8 +304,8 @@ services: KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9093,PLAINTEXT_INTERNAL://localhost:29093 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 - pulsar: - container_name: pulsar + pulsar-server: + container_name: pulsar-server image: apachepulsar/pulsar:latest command: bin/pulsar standalone ports: @@ -216,3 +319,9 @@ services: interval: 5s timeout: 5s retries: 5 +networks: + mynetwork: + ipam: + config: + - subnet: 172.121.80.0/16 + default: diff --git a/ci/scripts/e2e-cassandra-sink-test.sh b/ci/scripts/e2e-cassandra-sink-test.sh new file mode 100755 index 0000000000000..c393d510d19a2 --- /dev/null +++ b/ci/scripts/e2e-cassandra-sink-test.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +# prepare environment +export CONNECTOR_LIBS_PATH="./connector-node/libs" + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +sleep 1 + +echo "--- create cassandra table" +curl https://downloads.apache.org/cassandra/4.1.3/apache-cassandra-4.1.3-bin.tar.gz --output apache-cassandra-4.1.3-bin.tar.gz +tar xfvz apache-cassandra-4.1.3-bin.tar.gz +cd apache-cassandra-4.1.3/bin +export CQLSH_HOST=cassandra-server +export CQLSH_PORT=9042 +./cqlsh -e "CREATE KEYSPACE demo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};use demo; +CREATE table demo_bhv_table(v1 int primary key,v2 smallint,v3 bigint,v4 float,v5 double,v6 text,v7 date,v8 timestamp,v9 boolean);" + +echo "--- testing sinks" +cd ../../ +sqllogictest -p 4566 -d dev './e2e_test/sink/cassandra_sink.slt' +sleep 1 +cd apache-cassandra-4.1.3/bin +./cqlsh -e "COPY demo.demo_bhv_table TO './query_result.csv' WITH HEADER = false AND ENCODING = 'UTF-8';" + +if cat ./query_result.csv | awk -F "," '{ + exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01.000+0000" && $9 == "False\r"); }'; then + echo "Cassandra sink check passed" +else + cat ./query_result.csv + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cd ../../ +cargo make ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-clickhouse-sink-test.sh b/ci/scripts/e2e-clickhouse-sink-test.sh index 3464bd3c3c14d..c14d83e8c4281 100755 --- a/ci/scripts/e2e-clickhouse-sink-test.sh +++ b/ci/scripts/e2e-clickhouse-sink-test.sh @@ -24,7 +24,7 @@ shift $((OPTIND -1)) download_and_prepare_rw "$profile" source echo "--- starting risingwave cluster" -cargo make ci-start ci-clickhouse-test +cargo make ci-start ci-sink-test sleep 1 diff --git a/ci/scripts/e2e-deltalake-sink-rust-test.sh b/ci/scripts/e2e-deltalake-sink-rust-test.sh index 71ff1eede8e4d..cc0c287e8b572 100755 --- a/ci/scripts/e2e-deltalake-sink-rust-test.sh +++ b/ci/scripts/e2e-deltalake-sink-rust-test.sh @@ -32,8 +32,7 @@ mkdir ./connector-node tar xf ./risingwave-connector.tar.gz -C ./connector-node echo "--- starting risingwave cluster" -mkdir -p .risingwave/log -cargo make ci-start ci-deltalake-test +cargo make ci-start ci-sink-test sleep 1 # prepare minio deltalake sink diff --git a/ci/scripts/e2e-doris-sink-test.sh b/ci/scripts/e2e-doris-sink-test.sh new file mode 100755 index 0000000000000..30bfdaf129e26 --- /dev/null +++ b/ci/scripts/e2e-doris-sink-test.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +sleep 1 + +echo "--- create doris table" +apt-get update -y && apt-get install -y mysql-client +sleep 2 +mysql -uroot -P 9030 -h doris-fe-server -e "CREATE database demo;use demo; +CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 datev2,v8 datetime,v9 boolean) UNIQUE KEY(\`v1\`) +DISTRIBUTED BY HASH(\`v1\`) BUCKETS 1 +PROPERTIES ( + \"replication_allocation\" = \"tag.location.default: 1\" +); +CREATE USER 'users'@'%' IDENTIFIED BY '123456'; +GRANT ALL ON *.* TO 'users'@'%';" +sleep 2 + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/doris_sink.slt' +sleep 1 +mysql -uroot -P 9030 -h doris-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv + + +if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{ + exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0); }'; then + echo "Doris sink check passed" +else + cat ./query_result.csv + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cargo make ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-pulsar-sink-test.sh b/ci/scripts/e2e-pulsar-sink-test.sh index ee8848832f940..f942ad945b3e9 100755 --- a/ci/scripts/e2e-pulsar-sink-test.sh +++ b/ci/scripts/e2e-pulsar-sink-test.sh @@ -21,7 +21,7 @@ shift $((OPTIND -1)) download_and_prepare_rw "$profile" source echo "--- starting risingwave cluster" -cargo make ci-start ci-pulsar-test +cargo make ci-start ci-sink-test sleep 1 echo "--- waiting until pulsar is healthy" diff --git a/ci/scripts/e2e-redis-sink-test.sh b/ci/scripts/e2e-redis-sink-test.sh new file mode 100755 index 0000000000000..cf64662db4051 --- /dev/null +++ b/ci/scripts/e2e-redis-sink-test.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +apt-get update -y && apt-get install -y redis-server +sleep 1 + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/redis_sink.slt' +sleep 1 + +redis-cli -h redis-server -p 6379 get {\"v1\":1} >> ./query_result.txt +redis-cli -h redis-server -p 6379 get V1:1 >> ./query_result.txt + +# check sink destination using shell +if cat ./query_result.txt | tr '\n' '\0' | xargs -0 -n1 bash -c '[[ "$0" == "{\"v1\":1,\"v2\":1,\"v3\":1,\"v4\":1.100000023841858,\"v5\":1.2,\"v6\":\"test\",\"v7\":734869,\"v8\":\"2013-01-01T01:01:01.000000Z\",\"v9\":false}" || "$0" == "V2:1,V3:1" ]]'; then + echo "Redis sink check passed" +else + cat ./query_result.txt + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cargo make ci-kill \ No newline at end of file diff --git a/ci/scripts/e2e-starrocks-sink-test.sh b/ci/scripts/e2e-starrocks-sink-test.sh new file mode 100755 index 0000000000000..256f4448f9198 --- /dev/null +++ b/ci/scripts/e2e-starrocks-sink-test.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- starting risingwave cluster" +cargo make ci-start ci-sink-test +sleep 1 + + +echo "--- create starrocks table" +apt-get update -y && apt-get install -y mysql-client +sleep 2 +mysql -uroot -P 9030 -h starrocks-fe-server -e "CREATE database demo;use demo; +CREATE table demo_bhv_table(v1 int,v2 smallint,v3 bigint,v4 float,v5 double,v6 string,v7 date,v8 datetime,v9 boolean,v10 json) ENGINE=OLAP +PRIMARY KEY(\`v1\`) +DISTRIBUTED BY HASH(\`v1\`) properties(\"replication_num\" = \"1\"); +CREATE USER 'users'@'%' IDENTIFIED BY '123456'; +GRANT ALL ON *.* TO 'users'@'%';" +sleep 2 + +echo "--- testing sinks" +sqllogictest -p 4566 -d dev './e2e_test/sink/starrocks_sink.slt' +sleep 1 +mysql -uroot -P 9030 -h starrocks-fe-server -e "select * from demo.demo_bhv_table" > ./query_result.csv + + +if cat ./query_result.csv | sed '1d; s/\t/,/g' | awk -F "," '{ + exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01" && $9 == 0 && $10 = "{"v101": 100}"); }'; then + echo "Starrocks sink check passed" +else + cat ./query_result.csv + echo "The output is not as expected." + exit 1 +fi + +echo "--- Kill cluster" +cargo make ci-kill \ No newline at end of file diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index 835c46fb01e60..934458bcca1bc 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -815,6 +815,94 @@ steps: timeout_in_minutes: 10 retry: *auto-retry + - label: "end-to-end redis sink test" + key: "e2e-redis-sink-tests" + command: "ci/scripts/e2e-redis-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-redis-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-redis-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "set vm_max_map_count_2000000" + key: "set-vm_max_map_count" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/ + command: "sudo sysctl -w vm.max_map_count=2000000" + depends_on: + - "build" + - "build-other" + + - label: "end-to-end doris sink test" + key: "e2e-doris-sink-tests" + command: "ci/scripts/e2e-doris-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-doris-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + - "set-vm_max_map_count" + plugins: + - docker-compose#v4.9.0: + run: sink-doris-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end starrocks sink test" + key: "e2e-starrocks-sink-tests" + command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-starrocks-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end cassandra sink test" + key: "e2e-cassandra-sink-tests" + command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-release" + if: | + !(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null + || build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests" + || build.env("CI_STEPS") =~ /(^|,)e2e-cassandra-sink-tests?(,|$$)/ + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + - label: "end-to-end clickhouse sink test" key: "e2e-clickhouse-sink-tests" command: "ci/scripts/e2e-clickhouse-sink-test.sh -p ci-release" diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml index c48de6df64f1c..a67f915d943cc 100644 --- a/ci/workflows/pull-request.yml +++ b/ci/workflows/pull-request.yml @@ -292,6 +292,75 @@ steps: timeout_in_minutes: 10 retry: *auto-retry + - label: "end-to-end redis sink test" + if: build.pull_request.labels includes "ci/run-e2e-redis-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-redis-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-redis-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "set vm_max_map_count_2000000" + key: "set-vm_max_map_count" + if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/ + command: "sudo sysctl -w vm.max_map_count=2000000" + depends_on: + - "build" + - "build-other" + + - label: "end-to-end doris sink test" + if: build.pull_request.labels includes "ci/run-e2e-doris-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-doris-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-doris-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + - "set-vm_max_map_count" + plugins: + - docker-compose#v4.9.0: + run: sink-doris-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end starrocks sink test" + if: build.pull_request.labels includes "ci/run-e2e-starrocks-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-starrocks-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-starrocks-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + + - label: "end-to-end cassandra sink test" + if: build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests" || build.env("CI_STEPS") =~ /(^|,) e2e-cassandra-sink-tests?(,|$$)/ + command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + - label: "e2e java-binding test" if: build.pull_request.labels includes "ci/run-java-binding-tests" || build.env("CI_STEPS") =~ /(^|,)java-binding-tests?(,|$$)/ command: "ci/scripts/java-binding-test.sh -p ci-dev" diff --git a/e2e_test/batch/catalog/pg_cast.slt.part b/e2e_test/batch/catalog/pg_cast.slt.part index b8ab68a5ed5cd..b1558d1e144c4 100644 --- a/e2e_test/batch/catalog/pg_cast.slt.part +++ b/e2e_test/batch/catalog/pg_cast.slt.part @@ -82,8 +82,9 @@ SELECT * FROM pg_catalog.pg_cast; 78 3802 701 e 79 3802 1700 e 80 3802 1043 a -81 1301 701 e -82 1301 1043 a +81 20 20 e +82 1301 701 e +83 1301 1043 a query TT rowsort SELECT s.typname, t.typname diff --git a/e2e_test/batch/catalog/pg_settings.slt.part b/e2e_test/batch/catalog/pg_settings.slt.part index 09e2546a856d8..c8e927ba72b9f 100644 --- a/e2e_test/batch/catalog/pg_settings.slt.part +++ b/e2e_test/batch/catalog/pg_settings.slt.part @@ -1,50 +1,102 @@ query TT -SELECT name FROM pg_catalog.pg_settings order by name; +SELECT context, name FROM pg_catalog.pg_settings ORDER BY (context, name); ---- -application_name -background_ddl -batch_enable_distributed_dml -batch_parallelism -bytea_output -client_encoding -client_min_messages -create_compaction_group_for_mv -datestyle -extra_float_digits -idle_in_transaction_session_timeout -intervalstyle -lock_timeout -max_split_range_gap -query_epoch -query_mode -row_security -rw_batch_enable_lookup_join -rw_batch_enable_sort_agg -rw_enable_join_ordering -rw_enable_share_plan -rw_enable_two_phase_agg -rw_force_split_distinct_agg -rw_force_two_phase_agg -rw_implicit_flush -rw_streaming_allow_jsonb_in_stream_key -rw_streaming_enable_bushy_join -rw_streaming_enable_delta_join -rw_streaming_over_window_cache_policy -search_path -server_encoding -server_version -server_version_num -sink_decouple -standard_conforming_strings -statement_timeout -streaming_enable_arrangement_backfill -streaming_parallelism -streaming_rate_limit -synchronize_seqscans -timezone -transaction_isolation -visibility_mode +internal block_size_kb +internal bloom_false_positive +internal data_directory +internal parallel_compact_size_mb +internal sstable_size_mb +internal state_store +internal wasm_storage_url +postmaster backup_storage_directory +postmaster backup_storage_url +postmaster barrier_interval_ms +postmaster checkpoint_frequency +postmaster enable_tracing +postmaster max_concurrent_creating_streaming_jobs +postmaster pause_on_next_bootstrap +user application_name +user background_ddl +user batch_enable_distributed_dml +user batch_parallelism +user bytea_output +user client_encoding +user client_min_messages +user create_compaction_group_for_mv +user datestyle +user extra_float_digits +user idle_in_transaction_session_timeout +user intervalstyle +user lock_timeout +user max_split_range_gap +user query_epoch +user query_mode +user row_security +user rw_batch_enable_lookup_join +user rw_batch_enable_sort_agg +user rw_enable_join_ordering +user rw_enable_share_plan +user rw_enable_two_phase_agg +user rw_force_split_distinct_agg +user rw_force_two_phase_agg +user rw_implicit_flush +user rw_streaming_allow_jsonb_in_stream_key +user rw_streaming_enable_bushy_join +user rw_streaming_enable_delta_join +user rw_streaming_over_window_cache_policy +user search_path +user server_encoding +user server_version +user server_version_num +user sink_decouple +user standard_conforming_strings +user statement_timeout +user streaming_enable_arrangement_backfill +user streaming_parallelism +user streaming_rate_limit +user synchronize_seqscans +user timezone +user transaction_isolation +user visibility_mode query TT SELECT * FROM pg_catalog.pg_settings where name='dummy'; ----- \ No newline at end of file +---- + +# https://github.com/risingwavelabs/risingwave/issues/15125 +query TT +SELECT min(name) name, context FROM pg_catalog.pg_settings GROUP BY context; +---- +application_name user +backup_storage_directory postmaster +block_size_kb internal + +# Tab-completion of `SET` command +query T +SELECT name +FROM + (SELECT pg_catalog.lower(name) AS name + FROM pg_catalog.pg_settings + WHERE context IN ('user', + 'superuser') + UNION ALL SELECT 'constraints' + UNION ALL SELECT 'transaction' + UNION ALL SELECT 'session' + UNION ALL SELECT 'role' + UNION ALL SELECT 'tablespace' + UNION ALL SELECT 'all') ss +WHERE substring(name, 1, 8)='search_p'; +---- +search_path + +# Tab-completion of `ALTER SYSTEM SET` command +query T +SELECT name +FROM + (SELECT pg_catalog.lower(name) AS name + FROM pg_catalog.pg_settings + WHERE context != 'internal' + UNION ALL SELECT 'all') ss +WHERE substring(name, 1, 7)='checkpo'; +---- +checkpoint_frequency diff --git a/e2e_test/batch/catalog/version.slt.part b/e2e_test/batch/catalog/version.slt.part index b2ba9e2a877c5..dc3e0399b1e6a 100644 --- a/e2e_test/batch/catalog/version.slt.part +++ b/e2e_test/batch/catalog/version.slt.part @@ -1,4 +1,4 @@ query T -select substring(version() from 1 for 14); +select substring(version() from 1 for 16); ---- -PostgreSQL 9.5 +PostgreSQL 13.14 diff --git a/e2e_test/batch/functions/setting.slt.part b/e2e_test/batch/functions/setting.slt.part index 77d1d80e46590..233399d80a025 100644 --- a/e2e_test/batch/functions/setting.slt.part +++ b/e2e_test/batch/functions/setting.slt.part @@ -1,12 +1,12 @@ query T SELECT current_setting('server_version'); ---- -9.5.0 +13.14.0 query I -SELECT CAST(current_setting('server_version_num') AS INT) / 100 AS version; +SELECT current_setting('server_version_num') AS version; ---- -905 +130014 query T SELECT set_config('client_min_messages', 'warning', false); diff --git a/e2e_test/error_ui/simple/main.slt b/e2e_test/error_ui/simple/main.slt index b4cebbdfeff70..3197544b45d75 100644 --- a/e2e_test/error_ui/simple/main.slt +++ b/e2e_test/error_ui/simple/main.slt @@ -27,7 +27,7 @@ db error: ERROR: Failed to run the query Caused by these errors (recent errors listed first): 1: gRPC request to meta service failed: Internal error - 2: SystemParams error: unrecognized system param "not_exist_key" + 2: SystemParams error: unrecognized system parameter "not_exist_key" query error diff --git a/e2e_test/sink/cassandra_sink.slt b/e2e_test/sink/cassandra_sink.slt new file mode 100644 index 0000000000000..7091e8da70783 --- /dev/null +++ b/e2e_test/sink/cassandra_sink.slt @@ -0,0 +1,33 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 +FROM + mv6 WITH ( + connector = 'cassandra', + type = 'append-only', + force_append_only='true', + cassandra.url = 'cassandra-server:9042', + cassandra.keyspace = 'demo', + cassandra.table = 'demo_bhv_table', + cassandra.datacenter = 'datacenter1', +); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false); + +statement ok +FLUSH; + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/sink/doris_sink.slt b/e2e_test/sink/doris_sink.slt new file mode 100644 index 0000000000000..2c552bbb26143 --- /dev/null +++ b/e2e_test/sink/doris_sink.slt @@ -0,0 +1,34 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 +FROM + mv6 WITH ( + connector = 'doris', + type = 'append-only', + doris.url = 'http://doris-fe-server:8030', + doris.user = 'users', + doris.password = '123456', + doris.database = 'demo', + doris.table='demo_bhv_table', + force_append_only='true' +); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false); + +statement ok +FLUSH; + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/sink/redis_sink.slt b/e2e_test/sink/redis_sink.slt new file mode 100644 index 0000000000000..7475a80ae696e --- /dev/null +++ b/e2e_test/sink/redis_sink.slt @@ -0,0 +1,41 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s61 +FROM + mv6 WITH ( + primary_key = 'v1', + connector = 'redis', + redis.url= 'redis://redis-server:6379/', +)FORMAT PLAIN ENCODE JSON(force_append_only='true'); + +statement ok +CREATE SINK s62 +FROM + mv6 WITH ( + primary_key = 'v1', + connector = 'redis', + redis.url= 'redis://redis-server:6379/', +)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'V1:{v1}', value_format = 'V2:{v2},V3:{v3}'); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false); + +statement ok +FLUSH; + +statement ok +DROP SINK s61; + +statement ok +DROP SINK s62; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/sink/sink_into_table/basic.slt b/e2e_test/sink/sink_into_table/basic.slt index 1bc5a47907077..890087e207fd0 100644 --- a/e2e_test/sink/sink_into_table/basic.slt +++ b/e2e_test/sink/sink_into_table/basic.slt @@ -362,6 +362,35 @@ drop table t_b; statement ok drop table t_c; +# cycle check (with materialize view) + +statement ok +create table t_a(v int primary key); + +statement ok +create materialized view m_a as select v from t_a; + +statement ok +create table t_b(v int primary key); + +statement ok +create sink s_a into t_b as select v from m_a; + +statement error Creating such a sink will result in circular dependency +create sink s_b into t_a as select v from t_b; + +statement ok +drop sink s_a; + +statement ok +drop table t_b; + +statement ok +drop materialized view m_a; + +statement ok +drop table t_a; + # multi sinks statement ok diff --git a/e2e_test/sink/starrocks_sink.slt b/e2e_test/sink/starrocks_sink.slt new file mode 100644 index 0000000000000..a1ee1b0ffe039 --- /dev/null +++ b/e2e_test/sink/starrocks_sink.slt @@ -0,0 +1,36 @@ +statement ok +CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamp, v9 boolean, v10 jsonb); + +statement ok +CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6; + +statement ok +CREATE SINK s6 +FROM + mv6 WITH ( + connector = 'starrocks', + type = 'upsert', + starrocks.host = 'starrocks-fe-server', + starrocks.mysqlport = '9030', + starrocks.httpport = '8030', + starrocks.user = 'users', + starrocks.password = '123456', + starrocks.database = 'demo', + starrocks.table = 'demo_bhv_table', + primary_key = 'v1' +); + +statement ok +INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01' , false, '{"v101":100}'); + +statement ok +FLUSH; + +statement ok +DROP SINK s6; + +statement ok +DROP MATERIALIZED VIEW mv6; + +statement ok +DROP TABLE t6; \ No newline at end of file diff --git a/e2e_test/source/cdc/cdc.share_stream.slt b/e2e_test/source/cdc/cdc.share_stream.slt index d4b50ed4db6d6..7739d3f1ad6ea 100644 --- a/e2e_test/source/cdc/cdc.share_stream.slt +++ b/e2e_test/source/cdc/cdc.share_stream.slt @@ -205,6 +205,7 @@ CREATE TABLE IF NOT EXISTS postgres_all_types( c_timestamptz_array timestamptz[], c_interval_array interval[], c_jsonb_array jsonb[], + c_uuid varchar, PRIMARY KEY (c_boolean,c_bigint,c_date) ) from pg_source table 'public.postgres_all_types'; @@ -234,9 +235,9 @@ CREATE MATERIALIZED VIEW person_new_cnt AS SELECT COUNT(*) AS cnt FROM person_ne sleep 3s query TTTTTTT -SELECT c_boolean,c_date,c_time,c_timestamp,c_jsonb,c_smallint_array,c_timestamp_array FROM postgres_all_types where c_bigint=-9223372036854775807 +SELECT c_boolean,c_date,c_time,c_timestamp,c_jsonb,c_smallint_array,c_timestamp_array,c_uuid FROM postgres_all_types where c_bigint=-9223372036854775807 ---- -f 0001-01-01 00:00:00 0001-01-01 00:00:00 {} {-32767} {"0001-01-01 00:00:00"} +f 0001-01-01 00:00:00 0001-01-01 00:00:00 {} {-32767} {"0001-01-01 00:00:00"} bb488f9b-330d-4012-b849-12adeb49e57e # postgres streaming test diff --git a/e2e_test/source/cdc/postgres_cdc.sql b/e2e_test/source/cdc/postgres_cdc.sql index 43dba14950b36..a4de0e447a0cc 100644 --- a/e2e_test/source/cdc/postgres_cdc.sql +++ b/e2e_test/source/cdc/postgres_cdc.sql @@ -67,7 +67,8 @@ CREATE TABLE IF NOT EXISTS postgres_all_types( c_timestamptz_array timestamptz[], c_interval_array interval[], c_jsonb_array jsonb[], + c_uuid uuid, PRIMARY KEY (c_boolean,c_bigint,c_date) ); -INSERT INTO postgres_all_types VALUES ( False, 0, 0, 0, 0, 0, 0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[]::boolean[], array[]::smallint[], array[]::integer[], array[]::bigint[], array[]::decimal[], array[]::real[], array[]::double precision[], array[]::varchar[], array[]::bytea[], array[]::date[], array[]::time[], array[]::timestamp[], array[]::timestamptz[], array[]::interval[], array[]::jsonb[]); -INSERT INTO postgres_all_types VALUES ( False, -32767, -2147483647, -9223372036854775807, -10.0, -9999.999999, -10000.0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[False::boolean]::boolean[], array[-32767::smallint]::smallint[], array[-2147483647::integer]::integer[], array[-9223372036854775807::bigint]::bigint[], array[-10.0::decimal]::decimal[], array[-9999.999999::real]::real[], array[-10000.0::double precision]::double precision[], array[''::varchar]::varchar[], array['\x00'::bytea]::bytea[], array['0001-01-01'::date]::date[], array['00:00:00'::time]::time[], array['0001-01-01 00:00:00'::timestamp::timestamp]::timestamp[], array['0001-01-01 00:00:00'::timestamptz::timestamptz]::timestamptz[], array[interval '0 second'::interval]::interval[], array['{}'::jsonb]::jsonb[]); +INSERT INTO postgres_all_types VALUES ( False, 0, 0, 0, 0, 0, 0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[]::boolean[], array[]::smallint[], array[]::integer[], array[]::bigint[], array[]::decimal[], array[]::real[], array[]::double precision[], array[]::varchar[], array[]::bytea[], array[]::date[], array[]::time[], array[]::timestamp[], array[]::timestamptz[], array[]::interval[], array[]::jsonb[], null); +INSERT INTO postgres_all_types VALUES ( False, -32767, -2147483647, -9223372036854775807, -10.0, -9999.999999, -10000.0, '', '\x00', '0001-01-01', '00:00:00', '0001-01-01 00:00:00'::timestamp, '0001-01-01 00:00:00'::timestamptz, interval '0 second', '{}', array[False::boolean]::boolean[], array[-32767::smallint]::smallint[], array[-2147483647::integer]::integer[], array[-9223372036854775807::bigint]::bigint[], array[-10.0::decimal]::decimal[], array[-9999.999999::real]::real[], array[-10000.0::double precision]::double precision[], array[''::varchar]::varchar[], array['\x00'::bytea]::bytea[], array['0001-01-01'::date]::date[], array['00:00:00'::time]::time[], array['0001-01-01 00:00:00'::timestamp::timestamp]::timestamp[], array['0001-01-01 00:00:00'::timestamptz::timestamptz]::timestamptz[], array[interval '0 second'::interval]::interval[], array['{}'::jsonb]::jsonb[], 'bb488f9b-330d-4012-b849-12adeb49e57e'); diff --git a/e2e_test/streaming/bug_fixes/issue_15198.slt b/e2e_test/streaming/bug_fixes/issue_15198.slt new file mode 100644 index 0000000000000..a69aede18c2c9 --- /dev/null +++ b/e2e_test/streaming/bug_fixes/issue_15198.slt @@ -0,0 +1,23 @@ +# https://github.com/risingwavelabs/risingwave/issues/15198 + +statement ok +SET RW_IMPLICIT_FLUSH TO TRUE; + +statement ok +create materialized view "tumble_with_offset" +as ( + with + input as ( + select 1 as id, TO_TIMESTAMP('2024-01-01 01:30:02', 'YYYY-MM-DD HH24:MI:SS') as timestamps + ) + select * + from tumble(input, timestamps, interval '1 DAY', '+6 HOURS') +); + +query ITTT +select * from tumble_with_offset; +---- +1 2024-01-01 01:30:02+00:00 2023-12-31 06:00:00+00:00 2024-01-01 06:00:00+00:00 + +statement ok +drop materialized view tumble_with_offset; diff --git a/integration_tests/http-sink/README.md b/integration_tests/http-sink/README.md new file mode 100644 index 0000000000000..d956cb4ea95a4 --- /dev/null +++ b/integration_tests/http-sink/README.md @@ -0,0 +1,34 @@ +# Demo: Sinking to Http + +In this demo, we want to showcase how RisingWave is able to sink data to Http. This feature is depended on https://github.com/getindata/flink-http-connector. + +It has a few limitations: +1. It offers only two options for HTTP method, i.e, PUT and POST. +2. It can only execute one request-reply round to the service (session-less). +3. It cannot handle status codes in the SQL API. + +Therefore, we suggest you to try Python UDF at first. + +### Demo: +1. Launch the cluster: + +```sh +docker-compose up -d +``` + +The cluster contains a RisingWave cluster and its necessary dependencies, a datagen that generates the data. + +2. Build an Http Server that can be built on its own + +3. Execute the SQL queries in sequence: + +- create_source.sql +- create_mv.sql +- create_sink.sql + +4. Check the contents in Http Server: +On the Http Server side it will receive the json string, something like: +``` +{"user_id":5,"target_id":"siFqrkdlCn"} +``` +The number of json is 1000 diff --git a/integration_tests/http-sink/create_mv.sql b/integration_tests/http-sink/create_mv.sql new file mode 100644 index 0000000000000..8a291a3c95ea7 --- /dev/null +++ b/integration_tests/http-sink/create_mv.sql @@ -0,0 +1,6 @@ +CREATE MATERIALIZED VIEW bhv_mv AS +SELECT + user_id, + target_id +FROM + user_behaviors; diff --git a/integration_tests/http-sink/create_sink.sql b/integration_tests/http-sink/create_sink.sql new file mode 100644 index 0000000000000..0644d1d51934b --- /dev/null +++ b/integration_tests/http-sink/create_sink.sql @@ -0,0 +1,11 @@ +CREATE sink bhv_http_sink FROM bhv_mv WITH ( + connector = 'http', + url = 'http://localhost:8080/endpoint', + format = 'json', + type = 'append-only', + force_append_only='true', + primary_key = 'user_id', + gid.connector.http.sink.header.Origin = '*', + "gid.connector.http.sink.header.X-Content-Type-Options" = 'nosniff', + "gid.connector.http.sink.header.Content-Type" = 'application/json' +); \ No newline at end of file diff --git a/integration_tests/http-sink/create_source.sql b/integration_tests/http-sink/create_source.sql new file mode 100644 index 0000000000000..c28c10f3616da --- /dev/null +++ b/integration_tests/http-sink/create_source.sql @@ -0,0 +1,18 @@ +CREATE table user_behaviors ( + user_id int, + target_id VARCHAR, + target_type VARCHAR, + event_timestamp TIMESTAMP, + behavior_type VARCHAR, + parent_target_type VARCHAR, + parent_target_id VARCHAR, + PRIMARY KEY(user_id) +) WITH ( + connector = 'datagen', + fields.user_id.kind = 'sequence', + fields.user_id.start = '1', + fields.user_id.end = '1000', + fields.user_name.kind = 'random', + fields.user_name.length = '10', + datagen.rows.per.second = '10' +) FORMAT PLAIN ENCODE JSON; \ No newline at end of file diff --git a/integration_tests/http-sink/docker-compose.yml b/integration_tests/http-sink/docker-compose.yml new file mode 100644 index 0000000000000..8fba5ff352dc0 --- /dev/null +++ b/integration_tests/http-sink/docker-compose.yml @@ -0,0 +1,37 @@ +--- +version: "3" +services: + risingwave-standalone: + extends: + file: ../../docker/docker-compose.yml + service: risingwave-standalone + etcd-0: + extends: + file: ../../docker/docker-compose.yml + service: etcd-0 + grafana-0: + extends: + file: ../../docker/docker-compose.yml + service: grafana-0 + minio-0: + extends: + file: ../../docker/docker-compose.yml + service: minio-0 + prometheus-0: + extends: + file: ../../docker/docker-compose.yml + service: prometheus-0 +volumes: + risingwave-standalone: + external: false + etcd-0: + external: false + grafana-0: + external: false + minio-0: + external: false + prometheus-0: + external: false + message_queue: + external: false +name: risingwave-compose diff --git a/java/connector-node/risingwave-connector-service/pom.xml b/java/connector-node/risingwave-connector-service/pom.xml index 047c523c1c7db..d51d67497ce05 100644 --- a/java/connector-node/risingwave-connector-service/pom.xml +++ b/java/connector-node/risingwave-connector-service/pom.xml @@ -99,7 +99,6 @@ com.risingwave risingwave-sink-mock-flink-http-sink - provided diff --git a/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties b/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties index 8b1d571082f6e..a361dcf71cefe 100644 --- a/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties +++ b/java/connector-node/risingwave-connector-service/src/main/resources/mysql.properties @@ -22,5 +22,5 @@ heartbeat.interval.ms=${debezium.heartbeat.interval.ms:-60000} # In sharing cdc mode, we will subscribe to multiple tables in the given database, # so here we set ${table.name} to a default value `RW_CDC_Sharing` just for display. name=${hostname}:${port}:${database.name}.${table.name:-RW_CDC_Sharing} -# Enable transaction metadata by default -provide.transaction.metadata=${transactional:-true} +# In sharing cdc mode, transaction metadata will be enabled in frontend +provide.transaction.metadata=${transactional:-false} diff --git a/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties b/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties index 8d0284d03892e..326138403d3b2 100644 --- a/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties +++ b/java/connector-node/risingwave-connector-service/src/main/resources/postgres.properties @@ -20,5 +20,5 @@ heartbeat.interval.ms=${debezium.heartbeat.interval.ms:-300000} # In sharing cdc source mode, we will subscribe to multiple tables in the given database, # so here we set ${table.name} to a default value `RW_CDC_Sharing` just for display. name=${hostname}:${port}:${database.name}.${schema.name}.${table.name:-RW_CDC_Sharing} -# Enable transaction metadata by default -provide.transaction.metadata=${transactional:-true} +# In sharing cdc mode, transaction metadata will be enabled in frontend +provide.transaction.metadata=${transactional:-false} diff --git a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java index 9ac3d257b2bad..7c883335cfc23 100644 --- a/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java +++ b/java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java @@ -23,6 +23,7 @@ public class CassandraConfig extends CommonSinkConfig { /** Required */ private String type; + /** Required */ private String url; diff --git a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java index a969dddd620f7..d316eeae74bed 100644 --- a/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java +++ b/java/connector-node/risingwave-sink-mock-flink/risingwave-sink-mock-flink-http-sink/src/main/java/com/risingwave/mock/flink/http/HttpFlinkMockSinkFactory.java @@ -26,6 +26,8 @@ /** * The `FlinkMockSinkFactory` implementation of the http sink is responsible for creating the http * counterpart of the `DynamicTableSinkFactory`. And `validate` don't need to do anything. + * + *

This feature is depended on https://github.com/getindata/flink-http-connector */ public class HttpFlinkMockSinkFactory implements FlinkMockSinkFactory { @Override diff --git a/java/pom.xml b/java/pom.xml index 5f168c48bd9ef..c6e39b34cfc0b 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -69,7 +69,7 @@ 1.53.0 2.10 0.1.0-SNAPSHOT - 2.27.1 + 2.43.0 2.20.0 2.0.9 1.5.0 @@ -391,7 +391,7 @@ - 1.7 + 1.20.0 diff --git a/proto/buf.yaml b/proto/buf.yaml index 1aa31816ce0af..abad30f04506c 100644 --- a/proto/buf.yaml +++ b/proto/buf.yaml @@ -1,7 +1,8 @@ version: v1 breaking: use: - - WIRE # https://docs.buf.build/breaking/rules + - WIRE_JSON # https://docs.buf.build/breaking/rules + # https://github.com/risingwavelabs/risingwave/issues/15030 lint: use: - DEFAULT diff --git a/proto/expr.proto b/proto/expr.proto index 4c9be4d15ea24..48bf9b55227ef 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -58,12 +58,12 @@ message ExprNode { MAKE_TIMESTAMP = 115; // From f64 to timestamp. // e.g. `select to_timestamp(1672044740.0)` - TO_TIMESTAMP = 104; + SEC_TO_TIMESTAMPTZ = 104; AT_TIME_ZONE = 105; DATE_TRUNC = 106; // Parse text to timestamp by format string. // e.g. `select to_timestamp('2022 08 21', 'YYYY MM DD')` - TO_TIMESTAMP1 = 107; + CHAR_TO_TIMESTAMPTZ = 107; CHAR_TO_DATE = 111; // Performs a cast with additional timezone information. CAST_WITH_TIME_ZONE = 108; diff --git a/proto/plan_common.proto b/proto/plan_common.proto index 82f9fbc63a0f8..1dd45ad08a6ef 100644 --- a/proto/plan_common.proto +++ b/proto/plan_common.proto @@ -136,6 +136,7 @@ enum FormatType { FORMAT_TYPE_CANAL = 5; FORMAT_TYPE_UPSERT = 6; FORMAT_TYPE_PLAIN = 7; + FORMAT_TYPE_NONE = 8; } enum EncodeType { @@ -147,6 +148,7 @@ enum EncodeType { ENCODE_TYPE_JSON = 5; ENCODE_TYPE_BYTES = 6; ENCODE_TYPE_TEMPLATE = 7; + ENCODE_TYPE_NONE = 8; } enum RowFormatType { diff --git a/risedev.yml b/risedev.yml index 69b0c23b05dd3..cb352daab6cf9 100644 --- a/risedev.yml +++ b/risedev.yml @@ -164,6 +164,17 @@ profile: - use: compactor # - use: prometheus # - use: grafana + fs: + steps: + # - use: etcd + - use: meta-node + - use: compute-node + - use: frontend + - use: opendal + engine: fs + - use: compactor + # - use: prometheus + # - use: grafana webhdfs: steps: # - use: etcd @@ -872,27 +883,7 @@ profile: - use: frontend - use: compactor - ci-deltalake-test: - config-path: src/config/ci.toml - steps: - - use: minio - - use: meta-node - - use: compute-node - enable-tiered-cache: true - - use: frontend - - use: compactor - - ci-clickhouse-test: - config-path: src/config/ci.toml - steps: - - use: minio - - use: meta-node - - use: compute-node - enable-tiered-cache: true - - use: frontend - - use: compactor - - ci-pulsar-test: + ci-sink-test: config-path: src/config/ci.toml steps: - use: minio diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs index 2c167fc1bdc20..e9173abefe1df 100644 --- a/src/cmd_all/src/bin/risingwave.rs +++ b/src/cmd_all/src/bin/risingwave.rs @@ -239,6 +239,7 @@ fn standalone(opts: StandaloneOpts) { /// high level options to standalone mode node-level options. /// We will start a standalone instance, with all nodes in the same process. fn single_node(opts: SingleNodeOpts) { + opts.create_store_directories().unwrap(); let opts = risingwave_cmd_all::map_single_node_opts_to_standalone_opts(&opts); let settings = risingwave_rt::LoggerSettings::from_opts(&opts) .with_target("risingwave_storage", Level::WARN) diff --git a/src/cmd_all/src/single_node.rs b/src/cmd_all/src/single_node.rs index b89f861f6e4fd..042a0feee9863 100644 --- a/src/cmd_all/src/single_node.rs +++ b/src/cmd_all/src/single_node.rs @@ -14,6 +14,7 @@ use std::sync::LazyLock; +use anyhow::Result; use clap::Parser; use home::home_dir; use risingwave_common::config::{AsyncStackTraceOption, MetaBackend}; @@ -64,7 +65,7 @@ pub struct SingleNodeOpts { /// The store directory used by meta store and object store. #[clap(long, env = "RW_SINGLE_NODE_STORE_DIRECTORY")] - store_directory: Option, + pub store_directory: Option, /// The address of the meta node. #[clap(long, env = "RW_SINGLE_NODE_META_ADDR")] @@ -142,6 +143,7 @@ pub fn map_single_node_opts_to_standalone_opts(opts: &SingleNodeOpts) -> ParsedS } } +// Defaults impl SingleNodeOpts { fn default_frontend_opts() -> FrontendOpts { FrontendOpts { @@ -227,3 +229,15 @@ impl SingleNodeOpts { } } } + +impl SingleNodeOpts { + pub fn create_store_directories(&self) -> Result<()> { + let store_directory = self + .store_directory + .as_ref() + .unwrap_or_else(|| &*DEFAULT_STORE_DIRECTORY); + std::fs::create_dir_all(format!("{}/meta_store", store_directory))?; + std::fs::create_dir_all(format!("{}/state_store", store_directory))?; + Ok(()) + } +} diff --git a/src/common/fields-derive/src/gen/test_empty_pk.rs b/src/common/fields-derive/src/gen/test_empty_pk.rs new file mode 100644 index 0000000000000..ffb5ff268bed1 --- /dev/null +++ b/src/common/fields-derive/src/gen/test_empty_pk.rs @@ -0,0 +1,29 @@ +impl ::risingwave_common::types::Fields for Data { + const PRIMARY_KEY: Option<&'static [usize]> = Some(&[]); + fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { + vec![ + ("v1", < i16 as ::risingwave_common::types::WithDataType > + ::default_data_type()), ("v2", < String as + ::risingwave_common::types::WithDataType > ::default_data_type()) + ] + } + fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow { + ::risingwave_common::row::OwnedRow::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2) + ], + ) + } +} +impl From for ::risingwave_common::types::ScalarImpl { + fn from(v: Data) -> Self { + ::risingwave_common::types::StructValue::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2) + ], + ) + .into() + } +} diff --git a/src/common/fields-derive/src/gen/test_no_pk.rs b/src/common/fields-derive/src/gen/test_no_pk.rs new file mode 100644 index 0000000000000..9e1b3e7892969 --- /dev/null +++ b/src/common/fields-derive/src/gen/test_no_pk.rs @@ -0,0 +1,29 @@ +impl ::risingwave_common::types::Fields for Data { + const PRIMARY_KEY: Option<&'static [usize]> = None; + fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { + vec![ + ("v1", < i16 as ::risingwave_common::types::WithDataType > + ::default_data_type()), ("v2", < String as + ::risingwave_common::types::WithDataType > ::default_data_type()) + ] + } + fn into_owned_row(self) -> ::risingwave_common::row::OwnedRow { + ::risingwave_common::row::OwnedRow::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.v2) + ], + ) + } +} +impl From for ::risingwave_common::types::ScalarImpl { + fn from(v: Data) -> Self { + ::risingwave_common::types::StructValue::new( + vec![ + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v1), + ::risingwave_common::types::ToOwnedDatum::to_owned_datum(v.v2) + ], + ) + .into() + } +} diff --git a/src/common/fields-derive/src/gen/test_output.rs b/src/common/fields-derive/src/gen/test_output.rs index 517dcdefc7a8c..a804a379bfd4a 100644 --- a/src/common/fields-derive/src/gen/test_output.rs +++ b/src/common/fields-derive/src/gen/test_output.rs @@ -1,4 +1,5 @@ impl ::risingwave_common::types::Fields for Data { + const PRIMARY_KEY: Option<&'static [usize]> = Some(&[1usize, 0usize]); fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { vec![ ("v1", < i16 as ::risingwave_common::types::WithDataType > @@ -21,9 +22,6 @@ impl ::risingwave_common::types::Fields for Data { ], ) } - fn primary_key() -> &'static [usize] { - &[1usize, 0usize] - } } impl From for ::risingwave_common::types::ScalarImpl { fn from(v: Data) -> Self { diff --git a/src/common/fields-derive/src/lib.rs b/src/common/fields-derive/src/lib.rs index 86fa229a5adcd..dae648d1dc343 100644 --- a/src/common/fields-derive/src/lib.rs +++ b/src/common/fields-derive/src/lib.rs @@ -16,7 +16,7 @@ use proc_macro2::TokenStream; use quote::quote; use syn::{Data, DeriveInput, Result}; -#[proc_macro_derive(Fields, attributes(primary_key))] +#[proc_macro_derive(Fields, attributes(primary_key, fields))] pub fn fields(tokens: proc_macro::TokenStream) -> proc_macro::TokenStream { inner(tokens.into()).into() } @@ -46,6 +46,16 @@ fn gen(tokens: TokenStream) -> Result { )); }; + let style = get_style(&input); + if let Some(style) = &style { + if !["Title Case", "TITLE CASE", "snake_case"].contains(&style.value().as_str()) { + return Err(syn::Error::new_spanned( + style, + "only `Title Case`, `TITLE CASE`, and `snake_case` are supported", + )); + } + } + let fields_rw: Vec = struct_ .fields .iter() @@ -55,6 +65,12 @@ fn gen(tokens: TokenStream) -> Result { if name.starts_with("r#") { name = name[2..].to_string(); } + // cast style + match style.as_ref().map_or(String::new(), |f| f.value()).as_str() { + "Title Case" => name = to_title_case(&name), + "TITLE CASE" => name = to_title_case(&name).to_uppercase(), + _ => {} + } let ty = &field.ty; quote! { (#name, <#ty as ::risingwave_common::types::WithDataType>::default_data_type()) @@ -66,16 +82,17 @@ fn gen(tokens: TokenStream) -> Result { .iter() .map(|field| field.ident.as_ref().expect("field no name")) .collect::>(); - let primary_key = get_primary_key(&input).map(|indices| { - quote! { - fn primary_key() -> &'static [usize] { - &[#(#indices),*] - } - } - }); + let primary_key = get_primary_key(&input).map_or_else( + || quote! { None }, + |indices| { + quote! { Some(&[#(#indices),*]) } + }, + ); Ok(quote! { impl ::risingwave_common::types::Fields for #ident { + const PRIMARY_KEY: Option<&'static [usize]> = #primary_key; + fn fields() -> Vec<(&'static str, ::risingwave_common::types::DataType)> { vec![#(#fields_rw),*] } @@ -84,7 +101,6 @@ fn gen(tokens: TokenStream) -> Result { ::risingwave_common::types::ToOwnedDatum::to_owned_datum(self.#names) ),*]) } - #primary_key } impl From<#ident> for ::risingwave_common::types::ScalarImpl { fn from(v: #ident) -> Self { @@ -117,7 +133,9 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option> { return Some( keys.to_string() .split(',') - .map(|s| index(s.trim())) + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(index) .collect(), ); } @@ -132,6 +150,46 @@ fn get_primary_key(input: &syn::DeriveInput) -> Option> { None } +/// Get name style from `#[fields(style = "xxx")]` attribute. +fn get_style(input: &syn::DeriveInput) -> Option { + let style = input.attrs.iter().find_map(|attr| match &attr.meta { + syn::Meta::List(list) if list.path.is_ident("fields") => { + let name_value: syn::MetaNameValue = syn::parse2(list.tokens.clone()).ok()?; + if name_value.path.is_ident("style") { + Some(name_value.value) + } else { + None + } + } + _ => None, + })?; + match style { + syn::Expr::Lit(lit) => match lit.lit { + syn::Lit::Str(s) => Some(s), + _ => None, + }, + _ => None, + } +} + +/// Convert `snake_case` to `Title Case`. +fn to_title_case(s: &str) -> String { + let mut title = String::new(); + let mut next_upper = true; + for c in s.chars() { + if c == '_' { + title.push(' '); + next_upper = true; + } else if next_upper { + title.push(c.to_uppercase().next().unwrap()); + next_upper = false; + } else { + title.push(c); + } + } + title +} + #[cfg(test)] mod tests { use indoc::indoc; @@ -143,6 +201,18 @@ mod tests { prettyplease::unparse(&output) } + fn do_test(code: &str, expected_path: &str) { + let input: TokenStream = str::parse(code).unwrap(); + + let output = super::gen(input).unwrap(); + + let output = pretty_print(output); + + let expected = expect_test::expect_file![expected_path]; + + expected.assert_eq(&output); + } + #[test] fn test_gen() { let code = indoc! {r#" @@ -157,14 +227,33 @@ mod tests { } "#}; - let input: TokenStream = str::parse(code).unwrap(); + do_test(code, "gen/test_output.rs"); + } - let output = super::gen(input).unwrap(); + #[test] + fn test_no_pk() { + let code = indoc! {r#" + #[derive(Fields)] + struct Data { + v1: i16, + v2: String, + } + "#}; - let output = pretty_print(output); + do_test(code, "gen/test_no_pk.rs"); + } - let expected = expect_test::expect_file!["gen/test_output.rs"]; + #[test] + fn test_empty_pk() { + let code = indoc! {r#" + #[derive(Fields)] + #[primary_key()] + struct Data { + v1: i16, + v2: String, + } + "#}; - expected.assert_eq(&output); + do_test(code, "gen/test_empty_pk.rs"); } } diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 980897d5636e7..313c0bada6616 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -92,9 +92,9 @@ pub const UNKNOWN_GIT_SHA: &str = "unknown"; // The single source of truth of the pg parameters, Used in ConfigMap and current_cluster_version. // The version of PostgreSQL that Risingwave claims to be. -pub const PG_VERSION: &str = "9.5.0"; +pub const PG_VERSION: &str = "13.14.0"; /// The version of PostgreSQL that Risingwave claims to be. -pub const SERVER_VERSION_NUM: i32 = 90500; +pub const SERVER_VERSION_NUM: i32 = 130014; /// Shows the server-side character set encoding. At present, this parameter can be shown but not set, because the encoding is determined at database creation time. It is also the default value of `client_encoding`. pub const SERVER_ENCODING: &str = "UTF8"; /// see diff --git a/src/common/src/system_param/mod.rs b/src/common/src/system_param/mod.rs index 278390887dd51..82677e57e9753 100644 --- a/src/common/src/system_param/mod.rs +++ b/src/common/src/system_param/mod.rs @@ -340,7 +340,7 @@ macro_rules! impl_set_system_param { )* _ => { Err(format!( - "unrecognized system param {:?}", + "unrecognized system parameter {:?}", key )) } diff --git a/src/common/src/system_param/reader.rs b/src/common/src/system_param/reader.rs index c6b8d8c5af6aa..cf17c7bb43dd5 100644 --- a/src/common/src/system_param/reader.rs +++ b/src/common/src/system_param/reader.rs @@ -16,9 +16,17 @@ use std::borrow::Borrow; use risingwave_pb::meta::PbSystemParams; -use super::{default, system_params_to_kv, ParamValue}; +use super::{default, ParamValue}; use crate::for_all_params; +/// Information about a system parameter. +pub struct ParameterInfo { + pub name: &'static str, + pub mutable: bool, + pub value: String, + pub description: &'static str, +} + macro_rules! define_system_params_read_trait { ($({ $field:ident, $type:ty, $default:expr, $is_mutable:expr, $doc:literal, $($rest:tt)* },)*) => { /// The trait delegating reads on [`risingwave_pb::meta::SystemParams`]. @@ -32,6 +40,20 @@ macro_rules! define_system_params_read_trait { #[doc = $doc] fn $field(&self) -> <$type as ParamValue>::Borrowed<'_>; )* + + /// Return the information of all parameters. + fn get_all(&self) -> Vec { + vec![ + $( + ParameterInfo { + name: stringify!($field), + mutable: $is_mutable, + value: self.$field().to_string(), + description: $doc, + }, + )* + ] + } } }; } @@ -70,10 +92,6 @@ where } } - pub fn to_kv(&self) -> Vec<(String, String)> { - system_params_to_kv(self.inner()).unwrap() - } - fn inner(&self) -> &PbSystemParams { self.inner.borrow() } diff --git a/src/common/src/types/fields.rs b/src/common/src/types/fields.rs index f52717297792e..df1795804af00 100644 --- a/src/common/src/types/fields.rs +++ b/src/common/src/types/fields.rs @@ -58,17 +58,18 @@ use crate::util::chunk_coalesce::DataChunkBuilder; /// } /// ``` pub trait Fields { + /// The primary key of the table. + /// + /// - `None` if the primary key is not applicable. + /// - `Some(&[])` if the primary key is empty, i.e., there'll be at most one row in the table. + const PRIMARY_KEY: Option<&'static [usize]>; + /// Return the schema of the struct. fn fields() -> Vec<(&'static str, DataType)>; /// Convert the struct to an `OwnedRow`. fn into_owned_row(self) -> OwnedRow; - /// The primary key of the table. - fn primary_key() -> &'static [usize] { - &[] - } - /// Create a [`DataChunkBuilder`](crate::util::chunk_coalesce::DataChunkBuilder) with the schema of the struct. fn data_chunk_builder(capacity: usize) -> DataChunkBuilder { DataChunkBuilder::new( diff --git a/src/common/src/types/serial.rs b/src/common/src/types/serial.rs index 9bfbf5e4fcac7..5c84c95fa0f7a 100644 --- a/src/common/src/types/serial.rs +++ b/src/common/src/types/serial.rs @@ -26,6 +26,12 @@ use crate::util::row_id::RowId; #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Default, Hash)] pub struct Serial(i64); +impl From for i64 { + fn from(value: Serial) -> i64 { + value.0 + } +} + impl From for Serial { fn from(value: i64) -> Self { Self(value) diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index f73bd5f51c3e0..e1f7ea97812c5 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -135,7 +135,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [ "signal", "fs", ] } -tokio-postgres = "0.7" +tokio-postgres = { version = "0.7", features = ["with-uuid-1"] } tokio-retry = "0.3" tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["codec", "io"] } @@ -143,6 +143,7 @@ tonic = { workspace = true } tracing = "0.1" url = "2" urlencoding = "2" +uuid = { version = "1", features = ["v4", "fast-rng"] } with_options = { path = "./with_options" } yup-oauth2 = "8.3" diff --git a/src/connector/src/macros.rs b/src/connector/src/macros.rs index 9a2383dbb4a96..e34171717ae6c 100644 --- a/src/connector/src/macros.rs +++ b/src/connector/src/macros.rs @@ -36,7 +36,8 @@ macro_rules! for_all_classified_sources { { Gcs, $crate::source::filesystem::opendal_source::GcsProperties , $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalGcs> }, { OpendalS3, $crate::source::filesystem::opendal_source::OpendalS3Properties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalS3> }, { PosixFs, $crate::source::filesystem::opendal_source::PosixFsProperties, $crate::source::filesystem::OpendalFsSplit<$crate::source::filesystem::opendal_source::OpendalPosixFs> }, - { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit} + { Test, $crate::source::test_source::TestSourceProperties, $crate::source::test_source::TestSourceSplit}, + { Iceberg, $crate::source::iceberg::IcebergProperties, $crate::source::iceberg::IcebergSplit} } $( ,$extra_args diff --git a/src/connector/src/parser/postgres.rs b/src/connector/src/parser/postgres.rs index fe1906614698c..acfbe5e4ae435 100644 --- a/src/connector/src/parser/postgres.rs +++ b/src/connector/src/parser/postgres.rs @@ -24,6 +24,7 @@ use risingwave_common::types::{ }; use rust_decimal::Decimal as RustDecimal; use thiserror_ext::AsReport; +use tokio_postgres::types::Type; static LOG_SUPPERSSER: LazyLock = LazyLock::new(LogSuppresser::default); @@ -139,7 +140,29 @@ pub fn postgres_row_to_owned_row(row: tokio_postgres::Row, schema: &Schema) -> O handle_data_type!(row, i, name, RustDecimal, Decimal) } DataType::Varchar => { - handle_data_type!(row, i, name, String) + match row.columns()[i].type_() { + // Since we don't support UUID natively, adapt it to a VARCHAR column + &Type::UUID => { + let res = row.try_get::<_, Option>(i); + match res { + Ok(val) => val.map(|v| ScalarImpl::from(v.to_string())), + Err(err) => { + if let Ok(sc) = LOG_SUPPERSSER.check() { + tracing::error!( + suppressed_count = sc, + column_name = name, + error = %err.as_report(), + "parse uuid column failed", + ); + } + None + } + } + } + _ => { + handle_data_type!(row, i, name, String) + } + } } DataType::Date => { handle_data_type!(row, i, name, NaiveDate, Date) diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs index d4e38cac4d1c9..e6a654f75a5fd 100644 --- a/src/connector/src/sink/catalog/mod.rs +++ b/src/connector/src/sink/catalog/mod.rs @@ -205,7 +205,12 @@ impl TryFrom for SinkFormatDesc { F::Plain => SinkFormat::AppendOnly, F::Upsert => SinkFormat::Upsert, F::Debezium => SinkFormat::Debezium, - f @ (F::Unspecified | F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => { + f @ (F::Unspecified + | F::Native + | F::DebeziumMongo + | F::Maxwell + | F::Canal + | F::None) => { return Err(SinkError::Config(anyhow!( "sink format unsupported: {}", f.as_str_name() @@ -217,7 +222,7 @@ impl TryFrom for SinkFormatDesc { E::Protobuf => SinkEncode::Protobuf, E::Template => SinkEncode::Template, E::Avro => SinkEncode::Avro, - e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => { + e @ (E::Unspecified | E::Native | E::Csv | E::Bytes | E::None) => { return Err(SinkError::Config(anyhow!( "sink encode unsupported: {}", e.as_str_name() diff --git a/src/connector/src/sink/iceberg/mod.rs b/src/connector/src/sink/iceberg/mod.rs index 68c5654533a64..326f8586d76eb 100644 --- a/src/connector/src/sink/iceberg/mod.rs +++ b/src/connector/src/sink/iceberg/mod.rs @@ -927,7 +927,7 @@ impl SinkCommitCoordinator for IcebergSinkCommitter { } /// Try to match our schema with iceberg schema. -fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> { +pub fn try_matches_arrow_schema(rw_schema: &Schema, arrow_schema: &ArrowSchema) -> Result<()> { if rw_schema.fields.len() != arrow_schema.fields().len() { return Err(SinkError::Iceberg(anyhow!( "Schema length not match, ours is {}, and iceberg is {}", diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs index 11594133695d4..c5a0740b0736f 100644 --- a/src/connector/src/sink/starrocks.rs +++ b/src/connector/src/sink/starrocks.rs @@ -52,10 +52,10 @@ pub struct StarrocksCommon { #[serde(rename = "starrocks.host")] pub host: String, /// The port to the MySQL server of StarRocks FE. - #[serde(rename = "starrocks.mysqlport")] + #[serde(rename = "starrocks.mysqlport", alias = "starrocks.query_port")] pub mysql_port: String, /// The port to the HTTP server of StarRocks FE. - #[serde(rename = "starrocks.httpport")] + #[serde(rename = "starrocks.httpport", alias = "starrocks.http_port")] pub http_port: String, /// The user name used to access the StarRocks database. #[serde(rename = "starrocks.user")] @@ -175,7 +175,7 @@ impl StarrocksSink { Ok(starrocks_data_type.contains("varchar")) } risingwave_common::types::DataType::Time => Err(SinkError::Starrocks( - "starrocks can not support Time".to_string(), + "TIME is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(), )), risingwave_common::types::DataType::Timestamp => { Ok(starrocks_data_type.contains("datetime")) @@ -184,24 +184,24 @@ impl StarrocksSink { "TIMESTAMP WITH TIMEZONE is not supported for Starrocks sink as Starrocks doesn't store time values with timezone information. Please convert to TIMESTAMP first.".to_string(), )), risingwave_common::types::DataType::Interval => Err(SinkError::Starrocks( - "starrocks can not support Interval".to_string(), + "INTERVAL is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(), )), // todo! Validate the type struct and list risingwave_common::types::DataType::Struct(_) => Err(SinkError::Starrocks( - "starrocks can not support import struct".to_string(), + "STRUCT is not supported for Starrocks sink.".to_string(), )), risingwave_common::types::DataType::List(_) => { Ok(starrocks_data_type.contains("unknown")) } risingwave_common::types::DataType::Bytea => Err(SinkError::Starrocks( - "starrocks can not support Bytea".to_string(), + "BYTEA is not supported for Starrocks sink. Please convert to VARCHAR or other supported types.".to_string(), )), risingwave_common::types::DataType::Jsonb => Ok(starrocks_data_type.contains("json")), risingwave_common::types::DataType::Serial => { Ok(starrocks_data_type.contains("bigint")) } risingwave_common::types::DataType::Int256 => Err(SinkError::Starrocks( - "starrocks can not support Int256".to_string(), + "INT256 is not supported for Starrocks sink.".to_string(), )), } } diff --git a/src/connector/src/source/base.rs b/src/connector/src/source/base.rs index 5b909a2738f3c..fed8e0263aac4 100644 --- a/src/connector/src/source/base.rs +++ b/src/connector/src/source/base.rs @@ -150,7 +150,7 @@ pub struct SourceEnumeratorContext { pub connector_client: Option, } -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct SourceEnumeratorInfo { pub source_id: u32, } diff --git a/src/connector/src/source/cdc/mod.rs b/src/connector/src/source/cdc/mod.rs index ae9490bca3c56..5fc6aefdfefdd 100644 --- a/src/connector/src/source/cdc/mod.rs +++ b/src/connector/src/source/cdc/mod.rs @@ -37,6 +37,8 @@ pub const CDC_SNAPSHOT_BACKFILL: &str = "rw_cdc_backfill"; pub const CDC_SHARING_MODE_KEY: &str = "rw.sharing.mode.enable"; // User can set snapshot='false' to disable cdc backfill pub const CDC_BACKFILL_ENABLE_KEY: &str = "snapshot"; +// We enable transaction for shared cdc source by default +pub const CDC_TRANSACTIONAL_KEY: &str = "transactional"; pub const MYSQL_CDC_CONNECTOR: &str = Mysql::CDC_CONNECTOR_NAME; pub const POSTGRES_CDC_CONNECTOR: &str = Postgres::CDC_CONNECTOR_NAME; diff --git a/src/connector/src/source/iceberg/mod.rs b/src/connector/src/source/iceberg/mod.rs new file mode 100644 index 0000000000000..e274f639f15b2 --- /dev/null +++ b/src/connector/src/source/iceberg/mod.rs @@ -0,0 +1,128 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +use async_trait::async_trait; +use risingwave_common::types::JsonbVal; +use serde::{Deserialize, Serialize}; + +use crate::parser::ParserConfig; +use crate::source::{ + BoxChunkSourceStream, Column, SourceContextRef, SourceEnumeratorContextRef, SourceProperties, + SplitEnumerator, SplitId, SplitMetaData, SplitReader, UnknownFields, +}; + +pub const ICEBERG_CONNECTOR: &str = "iceberg"; + +#[derive(Clone, Debug, Deserialize, PartialEq, with_options::WithOptions)] +pub struct IcebergProperties { + #[serde(rename = "catalog.type")] + pub catalog_type: String, + #[serde(rename = "s3.region")] + pub region_name: String, + #[serde(rename = "s3.endpoint", default)] + pub endpoint: String, + #[serde(rename = "s3.access.key", default)] + pub s3_access: String, + #[serde(rename = "s3.secret.key", default)] + pub s3_secret: String, + #[serde(rename = "warehouse.path")] + pub warehouse_path: String, + #[serde(rename = "database.name")] + pub database_name: String, + #[serde(rename = "table.name")] + pub table_name: String, + + #[serde(flatten)] + pub unknown_fields: HashMap, +} + +impl SourceProperties for IcebergProperties { + type Split = IcebergSplit; + type SplitEnumerator = IcebergSplitEnumerator; + type SplitReader = IcebergFileReader; + + const SOURCE_NAME: &'static str = ICEBERG_CONNECTOR; +} + +impl UnknownFields for IcebergProperties { + fn unknown_fields(&self) -> HashMap { + self.unknown_fields.clone() + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct IcebergSplit {} + +impl SplitMetaData for IcebergSplit { + fn id(&self) -> SplitId { + unimplemented!() + } + + fn restore_from_json(_value: JsonbVal) -> anyhow::Result { + unimplemented!() + } + + fn encode_to_json(&self) -> JsonbVal { + unimplemented!() + } + + fn update_with_offset(&mut self, _start_offset: String) -> anyhow::Result<()> { + unimplemented!() + } +} + +#[derive(Debug, Clone)] +pub struct IcebergSplitEnumerator {} + +#[async_trait] +impl SplitEnumerator for IcebergSplitEnumerator { + type Properties = IcebergProperties; + type Split = IcebergSplit; + + async fn new( + _properties: Self::Properties, + _context: SourceEnumeratorContextRef, + ) -> anyhow::Result { + Ok(Self {}) + } + + async fn list_splits(&mut self) -> anyhow::Result> { + Ok(vec![]) + } +} + +#[derive(Debug)] +pub struct IcebergFileReader {} + +#[async_trait] +impl SplitReader for IcebergFileReader { + type Properties = IcebergProperties; + type Split = IcebergSplit; + + async fn new( + _props: IcebergProperties, + _splits: Vec, + _parser_config: ParserConfig, + _source_ctx: SourceContextRef, + _columns: Option>, + ) -> anyhow::Result { + unimplemented!() + } + + fn into_stream(self) -> BoxChunkSourceStream { + unimplemented!() + } +} diff --git a/src/connector/src/source/mod.rs b/src/connector/src/source/mod.rs index cba63b3005c1a..3656820ed95b0 100644 --- a/src/connector/src/source/mod.rs +++ b/src/connector/src/source/mod.rs @@ -31,6 +31,7 @@ pub use kafka::KAFKA_CONNECTOR; pub use kinesis::KINESIS_CONNECTOR; pub use nats::NATS_CONNECTOR; mod common; +pub mod iceberg; mod manager; pub mod reader; pub mod test_source; diff --git a/src/connector/with_options_sink.yaml b/src/connector/with_options_sink.yaml index 74cb5c21e9c7f..2b23913a1fc32 100644 --- a/src/connector/with_options_sink.yaml +++ b/src/connector/with_options_sink.yaml @@ -466,10 +466,12 @@ StarrocksConfig: field_type: String comments: The port to the MySQL server of StarRocks FE. required: true + alias: starrocks.query_port - name: starrocks.httpport field_type: String comments: The port to the HTTP server of StarRocks FE. required: true + alias: starrocks.http_port - name: starrocks.user field_type: String comments: The user name used to access the StarRocks database. diff --git a/src/connector/with_options_source.yaml b/src/connector/with_options_source.yaml index 2d811ce639c96..dec3cf6a8941a 100644 --- a/src/connector/with_options_source.yaml +++ b/src/connector/with_options_source.yaml @@ -33,6 +33,35 @@ GcsProperties: field_type: String required: false default: Default::default +IcebergProperties: + fields: + - name: catalog.type + field_type: String + required: true + - name: s3.region + field_type: String + required: true + - name: s3.endpoint + field_type: String + required: false + default: Default::default + - name: s3.access.key + field_type: String + required: false + default: Default::default + - name: s3.secret.key + field_type: String + required: false + default: Default::default + - name: warehouse.path + field_type: String + required: true + - name: database.name + field_type: String + required: true + - name: table.name + field_type: String + required: true KafkaProperties: fields: - name: bytes.per.second diff --git a/src/expr/impl/benches/expr.rs b/src/expr/impl/benches/expr.rs index fc2ad441cfb96..8468ae86e241b 100644 --- a/src/expr/impl/benches/expr.rs +++ b/src/expr/impl/benches/expr.rs @@ -304,7 +304,7 @@ fn bench_expr(c: &mut Criterion) { } if [ "date_trunc(character varying, timestamp with time zone) -> timestamp with time zone", - "to_timestamp1(character varying, character varying) -> timestamp with time zone", + "char_to_timestamptz(character varying, character varying) -> timestamp with time zone", "to_char(timestamp with time zone, character varying) -> character varying", ] .contains(&format!("{sig:?}").as_str()) @@ -321,12 +321,12 @@ fn bench_expr(c: &mut Criterion) { for (i, t) in sig.inputs_type.iter().enumerate() { use DataType::*; let idx = match (sig.name.as_scalar(), i) { - (PbType::ToTimestamp1, 0) => TIMESTAMP_FORMATTED_STRING, - (PbType::ToChar | PbType::ToTimestamp1, 1) => { + (PbType::CharToTimestamptz, 0) => TIMESTAMP_FORMATTED_STRING, + (PbType::ToChar | PbType::CharToTimestamptz, 1) => { children.push(string_literal("YYYY/MM/DD HH:MM:SS")); continue; } - (PbType::ToChar | PbType::ToTimestamp1, 2) => { + (PbType::ToChar | PbType::CharToTimestamptz, 2) => { children.push(string_literal("Australia/Sydney")); continue; } diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index dc81e3ab77bac..bf8afc7712f93 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -87,6 +87,7 @@ pub fn jsonb_to_number>(v: JsonbRef<'_>) -> Result { #[function("cast(int4) -> int2")] #[function("cast(int8) -> int2")] #[function("cast(int8) -> int4")] +#[function("cast(serial) -> int8")] #[function("cast(float4) -> int2")] #[function("cast(float8) -> int2")] #[function("cast(float4) -> int4")] diff --git a/src/expr/impl/src/scalar/timestamptz.rs b/src/expr/impl/src/scalar/timestamptz.rs index 06433d25f2892..83e77011ec6be 100644 --- a/src/expr/impl/src/scalar/timestamptz.rs +++ b/src/expr/impl/src/scalar/timestamptz.rs @@ -28,7 +28,7 @@ pub fn time_zone_err(inner_err: String) -> ExprError { } } -#[function("to_timestamp(float8) -> timestamptz")] +#[function("sec_to_timestamptz(float8) -> timestamptz")] pub fn f64_sec_to_timestamptz(elem: F64) -> Result { // TODO(#4515): handle +/- infinity let micros = (elem.0 * 1e6) diff --git a/src/expr/impl/src/scalar/to_timestamp.rs b/src/expr/impl/src/scalar/to_timestamp.rs index 3b3e4eaa90db6..d15703dbb78aa 100644 --- a/src/expr/impl/src/scalar/to_timestamp.rs +++ b/src/expr/impl/src/scalar/to_timestamp.rs @@ -66,7 +66,7 @@ fn parse(s: &str, tmpl: &ChronoPattern) -> Result { } #[function( - "to_timestamp1(varchar, varchar) -> timestamp", + "char_to_timestamptz(varchar, varchar) -> timestamp", prebuild = "ChronoPattern::compile($1)", deprecated )] @@ -81,7 +81,7 @@ pub fn to_timestamp_legacy(s: &str, tmpl: &ChronoPattern) -> Result { } #[function( - "to_timestamp1(varchar, varchar, varchar) -> timestamptz", + "char_to_timestamptz(varchar, varchar, varchar) -> timestamptz", prebuild = "ChronoPattern::compile($1)" )] pub fn to_timestamp(s: &str, timezone: &str, tmpl: &ChronoPattern) -> Result { @@ -93,7 +93,7 @@ pub fn to_timestamp(s: &str, timezone: &str, tmpl: &ChronoPattern) -> Result timestamptz", rewritten)] +#[function("char_to_timestamptz(varchar, varchar) -> timestamptz", rewritten)] fn _to_timestamp1() {} #[function( diff --git a/src/frontend/macro/src/lib.rs b/src/frontend/macro/src/lib.rs index 8ba10a9f4454a..36b7f33eb99c0 100644 --- a/src/frontend/macro/src/lib.rs +++ b/src/frontend/macro/src/lib.rs @@ -117,11 +117,15 @@ fn gen_sys_table(attr: Attr, item_fn: ItemFn) -> Result { #[linkme::distributed_slice(crate::catalog::system_catalog::SYS_CATALOGS_SLICE)] #[no_mangle] // to prevent duplicate schema.table name fn #gen_fn_name() -> crate::catalog::system_catalog::BuiltinCatalog { + const _: () = { + assert!(#struct_type::PRIMARY_KEY.is_some(), "primary key is required for system table"); + }; + crate::catalog::system_catalog::BuiltinCatalog::Table(crate::catalog::system_catalog::BuiltinTable { name: #table_name, schema: #schema_name, columns: #struct_type::fields(), - pk: #struct_type::primary_key(), + pk: #struct_type::PRIMARY_KEY.unwrap(), function: |reader| std::boxed::Box::pin(async { let rows = #user_fn_name(reader) #_await #handle_error; let mut builder = #struct_type::data_chunk_builder(rows.len() + 1); diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index b787632846e98..22fc4ce99c45f 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -959,8 +959,8 @@ impl Binder { ( "to_timestamp", dispatch_by_len(vec![ - (1, raw_call(ExprType::ToTimestamp)), - (2, raw_call(ExprType::ToTimestamp1)), + (1, raw_call(ExprType::SecToTimestamptz)), + (2, raw_call(ExprType::CharToTimestamptz)), ]), ), ("date_trunc", raw_call(ExprType::DateTrunc)), diff --git a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs index 074b772ca0bb8..a9a0d8fc4f1b5 100644 --- a/src/frontend/src/catalog/system_catalog/information_schema/columns.rs +++ b/src/frontend/src/catalog/system_catalog/information_schema/columns.rs @@ -34,13 +34,37 @@ use risingwave_frontend_macro::system_catalog; NULL::integer AS numeric_scale, c.position AS ordinal_position, 'YES' AS is_nullable, - NULL AS collation_name, - 'pg_catalog' AS udt_schema, CASE WHEN c.data_type = 'varchar' THEN 'character varying' ELSE c.data_type END AS data_type, - c.udt_type AS udt_name + CURRENT_DATABASE() AS udt_catalog, + 'pg_catalog' AS udt_schema, + c.udt_type AS udt_name, + NULL AS character_set_catalog, + NULL AS character_set_schema, + NULL AS character_set_name, + NULL AS collation_catalog, + NULL AS collation_schema, + NULL AS collation_name, + NULL AS domain_catalog, + NULL AS domain_schema, + NULL AS domain_name, + NULL AS scope_catalog, + NULL AS scope_schema, + NULL AS scope_name, + 'NO' AS is_identity, + NULL AS identity_generation, + NULL AS identity_start, + NULL AS identity_increment, + NULL AS identity_maximum, + NULL AS identity_minimum, + NULL AS identity_cycle, + CASE + WHEN c.is_generated THEN 'ALWAYS' + ELSE 'NEVER' + END AS is_generated, + c.generation_expression FROM rw_catalog.rw_columns c LEFT JOIN rw_catalog.rw_relations r ON c.relation_id = r.id JOIN rw_catalog.rw_schemas s ON s.id = r.schema_id @@ -58,8 +82,29 @@ struct Column { numeric_scale: i32, ordinal_position: i32, is_nullable: String, - collation_name: String, - udt_schema: String, data_type: String, + udt_catalog: String, + udt_schema: String, udt_name: String, + character_set_catalog: String, + character_set_schema: String, + character_set_name: String, + collation_catalog: String, + collation_schema: String, + collation_name: String, + domain_catalog: String, + domain_schema: String, + domain_name: String, + scope_catalog: String, + scope_schema: String, + scope_name: String, + is_identity: String, + identity_generation: String, + identity_start: String, + identity_increment: String, + identity_maximum: String, + identity_minimum: String, + identity_cycle: String, + is_generated: String, + generation_expression: String, } diff --git a/src/frontend/src/catalog/system_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/mod.rs index 61ec69b77ae5a..18a4757601aed 100644 --- a/src/frontend/src/catalog/system_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/mod.rs @@ -31,6 +31,7 @@ use risingwave_common::catalog::{ }; use risingwave_common::error::BoxedError; use risingwave_common::session_config::ConfigMap; +use risingwave_common::system_param::local_manager::SystemParamsReaderRef; use risingwave_common::types::DataType; use risingwave_pb::meta::list_table_fragment_states_response::TableFragmentState; use risingwave_pb::meta::table_parallelism::{PbFixedParallelism, PbParallelism}; @@ -110,6 +111,8 @@ pub struct SysCatalogReaderImpl { auth_context: Arc, // Read config. config: Arc>, + // Read system params. + system_params: SystemParamsReaderRef, } impl SysCatalogReaderImpl { @@ -120,6 +123,7 @@ impl SysCatalogReaderImpl { meta_client: Arc, auth_context: Arc, config: Arc>, + system_params: SystemParamsReaderRef, ) -> Self { Self { catalog_reader, @@ -128,6 +132,7 @@ impl SysCatalogReaderImpl { meta_client, auth_context, config, + system_params, } } } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs index c1a935803f9f4..ce97aeaac552c 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs @@ -35,6 +35,7 @@ mod pg_matviews; mod pg_namespace; mod pg_opclass; mod pg_operator; +mod pg_partitioned_table; mod pg_proc; mod pg_roles; mod pg_settings; diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs index c13e87f162afe..11bcabcde0f69 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_cast.rs @@ -22,6 +22,7 @@ use crate::expr::cast_map_array; /// Ref: [`https://www.postgresql.org/docs/current/catalog-pg-cast.html`] #[derive(Fields)] struct PgCast { + #[primary_key] oid: i32, castsource: i32, casttarget: i32, diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs index 196c36ec7f1af..2dfb15f9e527b 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_index.rs @@ -28,7 +28,8 @@ use risingwave_frontend_macro::system_catalog; ARRAY[]::smallint[] as indoption, NULL AS indexprs, NULL AS indpred, - FALSE AS indisprimary + FALSE AS indisprimary, + ARRAY[]::int[] AS indclass FROM rw_catalog.rw_indexes" )] #[derive(Fields)] @@ -46,4 +47,6 @@ struct PgIndex { indpred: Option, // TODO: we return false as the default value. indisprimary: bool, + // Empty array. We only have a dummy implementation of `pg_opclass` yet. + indclass: Vec, } diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs new file mode 100644 index 0000000000000..e11739e2609fd --- /dev/null +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_partitioned_table.rs @@ -0,0 +1,30 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_common::types::Fields; +use risingwave_frontend_macro::system_catalog; + +/// The catalog `pg_partitioned_table` stores information about how tables are partitioned. Reference: [`https://www.postgresql.org/docs/current/catalog-pg-partitioned-table.html`] +#[system_catalog(view, "pg_catalog.pg_partitioned_table")] +#[derive(Fields)] +struct PgPartitionedTable { + partrelid: i32, + partstrat: String, + partnatts: i16, + partdefid: i32, + partattrs: Vec, + partclass: Vec, + partcollation: Vec, + partexprs: Option, +} diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs index 4fc0fb057108f..58d44b1aef92b 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_settings.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_common::types::Fields; +use risingwave_common::system_param::reader::SystemParamsRead; +use risingwave_common::types::{DataType, Datum, Fields, ToOwnedDatum, WithDataType}; use risingwave_frontend_macro::system_catalog; use crate::catalog::system_catalog::SysCatalogReaderImpl; @@ -20,23 +21,75 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl; /// The catalog `pg_settings` stores settings. /// Ref: [`https://www.postgresql.org/docs/current/view-pg-settings.html`] #[derive(Fields)] +#[primary_key(name, context)] struct PgSetting { name: String, setting: String, short_desc: String, + context: Context, +} + +/// Context required to set the parameter's value. +/// +/// Note that we do not strictly follow the PostgreSQL's semantics for each variant +/// but only pick the minimum set of variants required for features like tab-completion. +#[derive(Clone, Copy)] +enum Context { + /// Used for immutable system parameters. + Internal, + + /// Used for mutable system parameters. + // TODO: `postmaster` means that changes require a restart of the server. This is + // not accurate for all system parameters. Use lower contexts once we guarantee about + // the behavior of each parameter. + Postmaster, + + /// Used for session variables. + // TODO: There might be variables that can only be set by superusers in the future. + // Should use `superuser` context then. + User, +} + +impl WithDataType for Context { + fn default_data_type() -> DataType { + DataType::Varchar + } +} + +impl ToOwnedDatum for Context { + fn to_owned_datum(self) -> Datum { + match self { + Context::Internal => "internal", + Context::Postmaster => "postmaster", + Context::User => "user", + } + .to_owned_datum() + } } #[system_catalog(table, "pg_catalog.pg_settings")] fn read_pg_settings(reader: &SysCatalogReaderImpl) -> Vec { - let config_reader = reader.config.read(); - let all_variables = config_reader.show_all(); + let variables = (reader.config.read().show_all()) + .into_iter() + .map(|info| PgSetting { + name: info.name, + setting: info.setting, + short_desc: info.description, + context: Context::User, + }); - all_variables - .iter() + let system_params = (reader.system_params.load().get_all()) + .into_iter() .map(|info| PgSetting { - name: info.name.clone(), - setting: info.setting.clone(), - short_desc: info.description.clone(), - }) - .collect() + name: info.name.to_owned(), + setting: info.value, + short_desc: info.description.to_owned(), + context: if info.mutable { + Context::Postmaster + } else { + Context::Internal + }, + }); + + variables.chain(system_params).collect() } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs index 40760df81a492..8491da7062711 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_columns.rs @@ -17,6 +17,7 @@ use risingwave_frontend_macro::system_catalog; use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; +use crate::expr::{ExprDisplay, ExprImpl}; #[derive(Fields)] #[primary_key(relation_id, name)] @@ -27,6 +28,8 @@ struct RwColumn { is_hidden: bool, is_primary_key: bool, is_distribution_key: bool, + is_generated: bool, + generation_expression: Option, data_type: String, type_oid: i32, type_len: i16, @@ -51,6 +54,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: false, is_primary_key: false, is_distribution_key: false, + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -71,6 +76,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: sink.downstream_pk.contains(&index), is_distribution_key: sink.distribution_key.contains(&index), + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -93,6 +100,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: table.pk.contains(&index), is_distribution_key: false, + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -104,6 +113,7 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { let table_rows = schema .iter_valid_table() .flat_map(|table| { + let schema = table.column_schema(); table .columns .iter() @@ -115,6 +125,15 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: table.pk().iter().any(|idx| idx.column_index == index), is_distribution_key: table.distribution_key.contains(&index), + is_generated: column.is_generated(), + generation_expression: column.generated_expr().map(|expr_node| { + let expr = ExprImpl::from_expr_proto(expr_node).unwrap(); + let expr_display = ExprDisplay { + expr: &expr, + input_schema: &schema, + }; + expr_display.to_string() + }), data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), @@ -138,6 +157,8 @@ fn read_rw_columns(reader: &SysCatalogReaderImpl) -> Result> { is_hidden: column.is_hidden, is_primary_key: source.pk_col_ids.contains(&column.column_id()), is_distribution_key: false, + is_generated: false, + generation_expression: None, data_type: column.data_type().to_string(), type_oid: column.data_type().to_oid(), type_len: column.data_type().type_len(), diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs index 2699503a2fdd5..443fa255f4398 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_branched_objects.rs @@ -19,6 +19,7 @@ use crate::catalog::system_catalog::SysCatalogReaderImpl; use crate::error::Result; #[derive(Fields)] +#[primary_key(object_id, sst_id)] // TODO: is this correct? struct RwHummockBranchedObject { object_id: i64, sst_id: i64, diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs index ac2b96bdc0023..e4f18c8fecaf3 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_snapshots.rs @@ -20,6 +20,7 @@ use crate::error::Result; #[derive(Fields)] struct RwHummockPinnedSnapshot { + #[primary_key] worker_node_id: i32, min_pinned_snapshot_id: i64, } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs index 45a8e23f0ecc5..c0a9dd9e7fc45 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_pinned_versions.rs @@ -20,6 +20,7 @@ use crate::error::Result; #[derive(Fields)] struct RwHummockPinnedVersion { + #[primary_key] worker_node_id: i32, min_pinned_version_id: i64, } diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs index 5551170e57a6f..37d1ceb6486ea 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs @@ -22,6 +22,7 @@ use crate::error::Result; #[derive(Fields)] struct RwHummockVersion { + #[primary_key] version_id: i64, max_committed_epoch: i64, safe_epoch: i64, diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs index ebb969cac462f..f31b1f7c67c5c 100644 --- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs +++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_meta_snapshot.rs @@ -21,6 +21,7 @@ use crate::error::Result; #[derive(Fields)] struct RwMetaSnapshot { + #[primary_key] meta_snapshot_id: i64, hummock_version_id: i64, // the smallest epoch this meta snapshot includes diff --git a/src/frontend/src/catalog/table_catalog.rs b/src/frontend/src/catalog/table_catalog.rs index fbb77a0ca0bb5..edb458997e33f 100644 --- a/src/frontend/src/catalog/table_catalog.rs +++ b/src/frontend/src/catalog/table_catalog.rs @@ -17,7 +17,7 @@ use std::collections::{HashMap, HashSet}; use fixedbitset::FixedBitSet; use itertools::Itertools; use risingwave_common::catalog::{ - ColumnCatalog, ConflictBehavior, TableDesc, TableId, TableVersionId, + ColumnCatalog, ConflictBehavior, Field, Schema, TableDesc, TableId, TableVersionId, }; use risingwave_common::util::epoch::Epoch; use risingwave_common::util::sort_util::ColumnOrder; @@ -74,6 +74,8 @@ pub struct TableCatalog { pub name: String, + pub dependent_relations: Vec, + /// All columns in this table. pub columns: Vec, @@ -492,6 +494,15 @@ impl TableCatalog { pub fn has_generated_column(&self) -> bool { self.columns.iter().any(|c| c.is_generated()) } + + pub fn column_schema(&self) -> Schema { + Schema::new( + self.columns + .iter() + .map(|c| Field::from(&c.column_desc)) + .collect(), + ) + } } impl From for TableCatalog { @@ -564,6 +575,11 @@ impl From for TableCatalog { created_at_cluster_version: tb.created_at_cluster_version.clone(), initialized_at_cluster_version: tb.initialized_at_cluster_version.clone(), retention_seconds: tb.retention_seconds, + dependent_relations: tb + .dependent_relations + .into_iter() + .map(TableId::from) + .collect_vec(), } } } @@ -715,6 +731,7 @@ mod tests { incoming_sinks: vec![], created_at_cluster_version: None, initialized_at_cluster_version: None, + dependent_relations: vec![], } ); assert_eq!(table, TableCatalog::from(table.to_prost(0, 0))); diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs index 5528b4614c355..4a7fc95cfd7ba 100644 --- a/src/frontend/src/expr/pure.rs +++ b/src/frontend/src/expr/pure.rs @@ -60,13 +60,13 @@ impl ExprVisitor for ImpureAnalyzer { | expr_node::Type::Extract | expr_node::Type::DatePart | expr_node::Type::TumbleStart - | expr_node::Type::ToTimestamp + | expr_node::Type::SecToTimestamptz | expr_node::Type::AtTimeZone | expr_node::Type::DateTrunc | expr_node::Type::MakeDate | expr_node::Type::MakeTime | expr_node::Type::MakeTimestamp - | expr_node::Type::ToTimestamp1 + | expr_node::Type::CharToTimestamptz | expr_node::Type::CharToDate | expr_node::Type::CastWithTimeZone | expr_node::Type::AddWithTimeZone diff --git a/src/frontend/src/expr/session_timezone.rs b/src/frontend/src/expr/session_timezone.rs index 8382e51398419..5ab35726c176b 100644 --- a/src/frontend/src/expr/session_timezone.rs +++ b/src/frontend/src/expr/session_timezone.rs @@ -216,9 +216,9 @@ impl SessionTimezone { new_inputs.push(ExprImpl::literal_varchar(self.timezone())); Some(FunctionCall::new(func_type, new_inputs).unwrap().into()) } - // `to_timestamp1(input_string, format_string)` - // => `to_timestamp1(input_string, format_string, zone_string)` - ExprType::ToTimestamp1 => { + // `char_to_timestamptz(input_string, format_string)` + // => `char_to_timestamptz(input_string, format_string, zone_string)` + ExprType::CharToTimestamptz => { if !(inputs.len() == 2 && inputs[0].return_type() == DataType::Varchar && inputs[1].return_type() == DataType::Varchar) diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs index aa7e1c8ee9192..1f1a96e92b826 100644 --- a/src/frontend/src/expr/type_inference/cast.rs +++ b/src/frontend/src/expr/type_inference/cast.rs @@ -216,22 +216,23 @@ pub static CAST_MAP: LazyLock = LazyLock::new(|| { use DataTypeName::*; const CAST_TABLE: &[(&str, DataTypeName)] = &[ // 123456789ABCDEF - (". e a", Boolean), // 0 - (" .iiiiii a", Int16), // 1 - ("ea.iiiii a", Int32), // 2 - (" aa.iiii a", Int64), // 3 - (" aaa.ii a", Decimal), // 4 - (" aaaa.i a", Float32), // 5 - (" aaaaa. a", Float64), // 6 - (" e. a", Int256), // 7 - (" .ii a", Date), // 8 - (" a.ia a", Timestamp), // 9 - (" aa.a a", Timestamptz), // A - (" .i a", Time), // B - (" a. a", Interval), // C - ("eeeeeee . a", Jsonb), // D - (" .a", Bytea), // E - ("eeeeeeeeeeeeeee.", Varchar), // F + (". e a ", Boolean), // 0 + (" .iiiiii a ", Int16), // 1 + ("ea.iiiii a ", Int32), // 2 + (" aa.iiii a ", Int64), // 3 + (" aaa.ii a ", Decimal), // 4 + (" aaaa.i a ", Float32), // 5 + (" aaaaa. a ", Float64), // 6 + (" e. a ", Int256), // 7 + (" .ii a ", Date), // 8 + (" a.ia a ", Timestamp), // 9 + (" aa.a a ", Timestamptz), // A + (" .i a ", Time), // B + (" a. a ", Interval), // C + ("eeeeeee . a ", Jsonb), // D + (" .a ", Bytea), // E + ("eeeeeeeeeeeeeee. ", Varchar), // F + (" e .", Serial), ]; let mut map = BTreeMap::new(); for (row, source) in CAST_TABLE { diff --git a/src/frontend/src/expr/utils.rs b/src/frontend/src/expr/utils.rs index 259c7400dc996..9db25b3dc554e 100644 --- a/src/frontend/src/expr/utils.rs +++ b/src/frontend/src/expr/utils.rs @@ -498,11 +498,23 @@ impl WatermarkAnalyzer { _ => WatermarkDerivation::None, }, ExprType::Subtract | ExprType::TumbleStart => { - match self.visit_binary_op(func_call.inputs()) { - (Constant, Constant) => Constant, - (Watermark(idx), Constant) => Watermark(idx), - (Nondecreasing, Constant) => Nondecreasing, - _ => WatermarkDerivation::None, + if func_call.inputs().len() == 3 { + // With `offset` specified + // e.g., select * from tumble(t1, start, interval, offset); + assert_eq!(ExprType::TumbleStart, func_call.func_type()); + match self.visit_ternary_op(func_call.inputs()) { + (Constant, Constant, Constant) => Constant, + (Watermark(idx), Constant, Constant) => Watermark(idx), + (Nondecreasing, Constant, Constant) => Nondecreasing, + _ => WatermarkDerivation::None, + } + } else { + match self.visit_binary_op(func_call.inputs()) { + (Constant, Constant) => Constant, + (Watermark(idx), Constant) => Watermark(idx), + (Nondecreasing, Constant) => Nondecreasing, + _ => WatermarkDerivation::None, + } } } ExprType::Multiply | ExprType::Divide | ExprType::Modulus => { @@ -577,8 +589,8 @@ impl WatermarkAnalyzer { }, _ => unreachable!(), }, - ExprType::ToTimestamp => self.visit_unary_op(func_call.inputs()), - ExprType::ToTimestamp1 => WatermarkDerivation::None, + ExprType::SecToTimestamptz => self.visit_unary_op(func_call.inputs()), + ExprType::CharToTimestamptz => WatermarkDerivation::None, ExprType::Cast => { // TODO: need more derivation WatermarkDerivation::None diff --git a/src/frontend/src/handler/alter_source_with_sr.rs b/src/frontend/src/handler/alter_source_with_sr.rs index a8e6892e5a908..06bb2d0387479 100644 --- a/src/frontend/src/handler/alter_source_with_sr.rs +++ b/src/frontend/src/handler/alter_source_with_sr.rs @@ -42,6 +42,7 @@ fn format_type_to_format(from: FormatType) -> Option { FormatType::Canal => Format::Canal, FormatType::Upsert => Format::Upsert, FormatType::Plain => Format::Plain, + FormatType::None => Format::None, }) } @@ -55,6 +56,7 @@ fn encode_type_to_encode(from: EncodeType) -> Option { EncodeType::Json => Encode::Json, EncodeType::Bytes => Encode::Bytes, EncodeType::Template => Encode::Template, + EncodeType::None => Encode::None, }) } diff --git a/src/frontend/src/handler/cancel_job.rs b/src/frontend/src/handler/cancel_job.rs index f124a2a030bd1..278e01e3e1bc0 100644 --- a/src/frontend/src/handler/cancel_job.rs +++ b/src/frontend/src/handler/cancel_job.rs @@ -12,14 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; -use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; -use risingwave_common::types::DataType; +use risingwave_common::types::Fields; use risingwave_pb::meta::cancel_creating_jobs_request::{CreatingJobIds, PbJobs}; use risingwave_sqlparser::ast::JobIdents; +use super::RwPgResponseBuilderExt; use crate::error::Result; use crate::handler::{HandlerArgs, RwPgResponse}; @@ -36,16 +34,14 @@ pub(super) async fn handle_cancel( .await?; let rows = canceled_jobs .into_iter() - .map(|id| Row::new(vec![Some(id.to_string().into())])) - .collect_vec(); + .map(|id| CancelRow { id: id.to_string() }); Ok(PgResponse::builder(StatementType::CANCEL_COMMAND) - .values( - rows.into(), - vec![PgFieldDescriptor::new( - "Id".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - ) + .rows(rows) .into()) } + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct CancelRow { + id: String, +} diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index de8e93e04a784..245976bd913b9 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -504,7 +504,7 @@ fn check_cycle_for_sink( if let Ok(table) = reader.get_table_by_id(table_id) { visit_table(session, reader, sink_index, table.as_ref(), visited_tables)? } else { - bail!("table not found: {:?}", table_id); + bail!("streaming job not found: {:?}", table_id); } } @@ -533,6 +533,14 @@ fn check_cycle_for_sink( } } + for table_id in &table.dependent_relations { + if let Ok(table) = reader.get_table_by_id(table_id) { + visit_table(session, reader, sink_index, table.as_ref(), visited_tables)? + } else { + bail!("streaming job not found: {:?}", table_id); + } + } + Ok(()) } @@ -693,7 +701,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result { F::Plain => SinkFormat::AppendOnly, F::Upsert => SinkFormat::Upsert, F::Debezium => SinkFormat::Debezium, - f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal) => { + f @ (F::Native | F::DebeziumMongo | F::Maxwell | F::Canal | F::None) => { return Err(ErrorCode::BindError(format!("sink format unsupported: {f}")).into()); } }; @@ -702,7 +710,7 @@ fn bind_sink_format_desc(value: ConnectorSchema) -> Result { E::Protobuf => SinkEncode::Protobuf, E::Avro => SinkEncode::Avro, E::Template => SinkEncode::Template, - e @ (E::Native | E::Csv | E::Bytes) => { + e @ (E::Native | E::Csv | E::Bytes | E::None) => { return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into()); } }; diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index 0ab4d9ec3a416..bbb2d93b21790 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -16,13 +16,13 @@ use std::collections::{BTreeMap, HashMap}; use std::rc::Rc; use std::sync::LazyLock; -use anyhow::Context; +use anyhow::{anyhow, Context}; use either::Either; use itertools::Itertools; use maplit::{convert_args, hashmap}; use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::catalog::{ - is_column_ids_dedup, ColumnCatalog, ColumnDesc, TableId, INITIAL_SOURCE_VERSION_ID, + is_column_ids_dedup, ColumnCatalog, ColumnDesc, Schema, TableId, INITIAL_SOURCE_VERSION_ID, KAFKA_TIMESTAMP_COLUMN_NAME, }; use risingwave_common::types::DataType; @@ -36,17 +36,20 @@ use risingwave_connector::parser::{ use risingwave_connector::schema::schema_registry::{ name_strategy_from_str, SchemaRegistryAuth, SCHEMA_REGISTRY_PASSWORD, SCHEMA_REGISTRY_USERNAME, }; +use risingwave_connector::sink::iceberg::IcebergConfig; use risingwave_connector::source::cdc::external::CdcTableType; use risingwave_connector::source::cdc::{ - CDC_SHARING_MODE_KEY, CDC_SNAPSHOT_BACKFILL, CDC_SNAPSHOT_MODE_KEY, CITUS_CDC_CONNECTOR, - MYSQL_CDC_CONNECTOR, POSTGRES_CDC_CONNECTOR, + CDC_SHARING_MODE_KEY, CDC_SNAPSHOT_BACKFILL, CDC_SNAPSHOT_MODE_KEY, CDC_TRANSACTIONAL_KEY, + CITUS_CDC_CONNECTOR, MYSQL_CDC_CONNECTOR, POSTGRES_CDC_CONNECTOR, }; use risingwave_connector::source::datagen::DATAGEN_CONNECTOR; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::nexmark::source::{get_event_data_types_with_names, EventType}; use risingwave_connector::source::test_source::TEST_CONNECTOR; use risingwave_connector::source::{ - GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR, KINESIS_CONNECTOR, NATS_CONNECTOR, - NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR, + ConnectorProperties, GCS_CONNECTOR, GOOGLE_PUBSUB_CONNECTOR, KAFKA_CONNECTOR, + KINESIS_CONNECTOR, NATS_CONNECTOR, NEXMARK_CONNECTOR, OPENDAL_S3_CONNECTOR, POSIX_FS_CONNECTOR, + PULSAR_CONNECTOR, S3_CONNECTOR, }; use risingwave_pb::catalog::{ PbSchemaRegistryNameStrategy, PbSource, StreamSourceInfo, WatermarkDesc, @@ -72,7 +75,7 @@ use crate::handler::create_table::{ ensure_table_constraints_supported, ColumnIdGenerator, }; use crate::handler::util::{ - get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt, + connector_need_pk, get_connector, is_cdc_connector, is_kafka_connector, SourceSchemaCompatExt, }; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::generic::SourceNodeKind; @@ -316,6 +319,7 @@ pub(crate) async fn bind_columns_from_source( let columns = match (&source_schema.format, &source_schema.row_encode) { (Format::Native, Encode::Native) + | (Format::None, Encode::None) | (Format::Plain, Encode::Bytes) | (Format::DebeziumMongo, Encode::Json) => None, (Format::Plain, Encode::Protobuf) => { @@ -706,7 +710,9 @@ pub(crate) async fn bind_source_pk( .collect_vec(); let res = match (&source_schema.format, &source_schema.row_encode) { - (Format::Native, Encode::Native) | (Format::Plain, _) => sql_defined_pk_names, + (Format::Native, Encode::Native) | (Format::None, Encode::None) | (Format::Plain, _) => { + sql_defined_pk_names + } // For all Upsert formats, we only accept one and only key column as primary key. // Additional KEY columns must be set in this case and must be primary key. @@ -977,6 +983,9 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock hashmap!( Format::Plain => vec![Encode::Json], + ), + ICEBERG_CONNECTOR => hashmap!( + Format::None => vec![Encode::None], ) )) }); @@ -1054,12 +1063,11 @@ pub fn validate_compatibility( } /// Performs early stage checking in frontend to see if the schema of the given `columns` is -/// compatible with the connector extracted from the properties. Currently this only works for -/// `nexmark` connector since it's in chunk format. +/// compatible with the connector extracted from the properties. /// /// One should only call this function after all properties of all columns are resolved, like /// generated column descriptors. -pub(super) fn check_source_schema( +pub(super) async fn check_source_schema( props: &HashMap, row_id_index: Option, columns: &[ColumnCatalog], @@ -1068,10 +1076,22 @@ pub(super) fn check_source_schema( return Ok(()); }; - if connector != NEXMARK_CONNECTOR { - return Ok(()); + if connector == NEXMARK_CONNECTOR { + check_nexmark_schema(props, row_id_index, columns) + } else if connector == ICEBERG_CONNECTOR { + Ok(check_iceberg_source(props, columns) + .await + .map_err(|err| ProtocolError(err.to_string()))?) + } else { + Ok(()) } +} +pub(super) fn check_nexmark_schema( + props: &HashMap, + row_id_index: Option, + columns: &[ColumnCatalog], +) -> Result<()> { let table_type = props .get("nexmark.table.type") .map(|t| t.to_ascii_lowercase()); @@ -1121,6 +1141,68 @@ pub(super) fn check_source_schema( Ok(()) } +pub async fn check_iceberg_source( + props: &HashMap, + columns: &[ColumnCatalog], +) -> anyhow::Result<()> { + let props = ConnectorProperties::extract(props.clone(), true)?; + let ConnectorProperties::Iceberg(properties) = props else { + return Err(anyhow!(format!( + "Invalid properties for iceberg source: {:?}", + props + ))); + }; + + let iceberg_config = IcebergConfig { + database_name: properties.database_name, + table_name: properties.table_name, + catalog_type: Some(properties.catalog_type), + path: properties.warehouse_path, + endpoint: Some(properties.endpoint), + access_key: properties.s3_access, + secret_key: properties.s3_secret, + region: Some(properties.region_name), + ..Default::default() + }; + + let schema = Schema { + fields: columns + .iter() + .cloned() + .map(|c| c.column_desc.into()) + .collect(), + }; + + let table = iceberg_config.load_table().await?; + + let iceberg_schema: arrow_schema::Schema = table + .current_table_metadata() + .current_schema()? + .clone() + .try_into()?; + + for f1 in schema.fields() { + if !iceberg_schema.fields.iter().any(|f2| f2.name() == &f1.name) { + return Err(anyhow::anyhow!(format!( + "Column {} not found in iceberg table", + f1.name + ))); + } + } + + let new_iceberg_field = iceberg_schema + .fields + .iter() + .filter(|f1| schema.fields.iter().any(|f2| f1.name() == &f2.name)) + .cloned() + .collect::>(); + let new_iceberg_schema = arrow_schema::Schema::new(new_iceberg_field); + + risingwave_connector::sink::iceberg::try_matches_arrow_schema(&schema, &new_iceberg_schema)?; + + Ok(()) +} + pub async fn handle_create_source( handler_args: HandlerArgs, stmt: CreateSourceStatement, @@ -1196,6 +1278,8 @@ pub async fn handle_create_source( with_properties.insert(CDC_SNAPSHOT_MODE_KEY.into(), CDC_SNAPSHOT_BACKFILL.into()); // enable cdc sharing mode, which will capture all tables in the given `database.name` with_properties.insert(CDC_SHARING_MODE_KEY.into(), "true".into()); + // enable transactional cdc + with_properties.insert(CDC_TRANSACTIONAL_KEY.into(), "true".into()); } // must behind `handle_addition_columns` @@ -1213,8 +1297,8 @@ pub async fn handle_create_source( ) .into()); } - - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?; + let (mut columns, pk_column_ids, row_id_index) = + bind_pk_on_relation(columns, pk_names, connector_need_pk(&with_properties))?; debug_assert!(is_column_ids_dedup(&columns)); @@ -1231,7 +1315,7 @@ pub async fn handle_create_source( &pk_column_ids, )?; - check_source_schema(&with_properties, row_id_index, &columns)?; + check_source_schema(&with_properties, row_id_index, &columns).await?; let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect(); @@ -1308,6 +1392,7 @@ fn format_to_prost(format: &Format) -> FormatType { Format::DebeziumMongo => FormatType::DebeziumMongo, Format::Maxwell => FormatType::Maxwell, Format::Canal => FormatType::Canal, + Format::None => FormatType::None, } } fn row_encode_to_prost(row_encode: &Encode) -> EncodeType { @@ -1319,6 +1404,7 @@ fn row_encode_to_prost(row_encode: &Encode) -> EncodeType { Encode::Csv => EncodeType::Csv, Encode::Bytes => EncodeType::Bytes, Encode::Template => EncodeType::Template, + Encode::None => EncodeType::None, } } diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 8fc30c2c30e19..7fc757b71b6b7 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -61,6 +61,7 @@ use crate::handler::create_source::{ bind_all_columns, bind_columns_from_source, bind_source_pk, bind_source_watermark, check_source_schema, handle_addition_columns, validate_compatibility, UPSTREAM_SOURCE_KEY, }; +use crate::handler::util::is_iceberg_connector; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::generic::SourceNodeKind; use crate::optimizer::plan_node::{LogicalCdcScan, LogicalSource}; @@ -411,6 +412,7 @@ fn multiple_pk_definition_err() -> RwError { pub fn bind_pk_on_relation( mut columns: Vec, pk_names: Vec, + must_need_pk: bool, ) -> Result<(Vec, Vec, Option)> { for c in &columns { assert!(c.column_id() != ColumnId::placeholder()); @@ -431,8 +433,10 @@ pub fn bind_pk_on_relation( }) .try_collect()?; - // Add `_row_id` column if `pk_column_ids` is empty. - let row_id_index = pk_column_ids.is_empty().then(|| { + // Add `_row_id` column if `pk_column_ids` is empty and must_need_pk + let need_row_id = pk_column_ids.is_empty() && must_need_pk; + + let row_id_index = need_row_id.then(|| { let column = ColumnCatalog::row_id_column(); let index = columns.len(); pk_column_ids = vec![column.column_id()]; @@ -510,7 +514,12 @@ pub(crate) async fn gen_create_table_plan_with_source( c.column_desc.column_id = col_id_gen.generate(c.name()) } - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?; + if is_iceberg_connector(&with_properties) { + return Err( + ErrorCode::BindError("can't create table with iceberg connector".to_string()).into(), + ); + } + let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; let watermark_descs = bind_source_watermark( session, @@ -531,7 +540,7 @@ pub(crate) async fn gen_create_table_plan_with_source( &pk_column_ids, )?; - check_source_schema(&with_properties, row_id_index, &columns)?; + check_source_schema(&with_properties, row_id_index, &columns).await?; gen_table_plan_inner( context.into(), @@ -594,7 +603,7 @@ pub(crate) fn gen_create_table_plan_without_bind( ) -> Result<(PlanRef, Option, PbTable)> { ensure_table_constraints_supported(&constraints)?; let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names)?; + let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; let watermark_descs = bind_source_watermark( context.session_ctx(), @@ -774,7 +783,7 @@ pub(crate) fn gen_create_table_plan_for_cdc_source( } let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?; + let (columns, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?; let definition = context.normalized_sql().to_owned(); @@ -1275,7 +1284,7 @@ mod tests { } ensure_table_constraints_supported(&constraints)?; let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names)?; + let (_, pk_column_ids, _) = bind_pk_on_relation(columns, pk_names, true)?; Ok(pk_column_ids) })(); match (expected, actual) { diff --git a/src/frontend/src/handler/describe.rs b/src/frontend/src/handler/describe.rs index ef1a601cca590..36cff2e20e2b6 100644 --- a/src/frontend/src/handler/describe.rs +++ b/src/frontend/src/handler/describe.rs @@ -17,17 +17,16 @@ use std::fmt::Display; use itertools::Itertools; use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; use risingwave_common::catalog::{ColumnCatalog, ColumnDesc}; -use risingwave_common::types::DataType; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{display_comma_separated, ObjectName}; -use super::RwPgResponse; +use super::show::ShowColumnRow; +use super::{fields_to_descriptors, RwPgResponse}; use crate::binder::{Binder, Relation}; use crate::catalog::CatalogError; use crate::error::Result; -use crate::handler::util::col_descs_to_rows; -use crate::handler::HandlerArgs; +use crate::handler::{HandlerArgs, RwPgResponseBuilderExt}; pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Result { let session = handler_args.session; @@ -156,7 +155,10 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re }; // Convert all column descs to rows - let mut rows = col_descs_to_rows(columns); + let mut rows = columns + .into_iter() + .flat_map(ShowColumnRow::from_catalog) + .collect_vec(); fn concat(display_elems: impl IntoIterator) -> String where @@ -170,96 +172,68 @@ pub fn handle_describe(handler_args: HandlerArgs, object_name: ObjectName) -> Re // Convert primary key to rows if !pk_columns.is_empty() { - rows.push(Row::new(vec![ - Some("primary key".into()), - Some(concat(pk_columns.iter().map(|x| &x.name)).into()), - None, // Is Hidden - None, // Description - ])); + rows.push(ShowColumnRow { + name: "primary key".into(), + r#type: concat(pk_columns.iter().map(|x| &x.name)), + is_hidden: None, + description: None, + }); } // Convert distribution keys to rows if !dist_columns.is_empty() { - rows.push(Row::new(vec![ - Some("distribution key".into()), - Some(concat(dist_columns.iter().map(|x| &x.name)).into()), - None, // Is Hidden - None, // Description - ])); + rows.push(ShowColumnRow { + name: "distribution key".into(), + r#type: concat(dist_columns.iter().map(|x| &x.name)), + is_hidden: None, + description: None, + }); } // Convert all indexes to rows rows.extend(indices.iter().map(|index| { let index_display = index.display(); - Row::new(vec![ - Some(index.name.clone().into()), - if index_display.include_columns.is_empty() { - Some( - format!( - "index({}) distributed by({})", - display_comma_separated(&index_display.index_columns_with_ordering), - display_comma_separated(&index_display.distributed_by_columns), - ) - .into(), + ShowColumnRow { + name: index.name.clone(), + r#type: if index_display.include_columns.is_empty() { + format!( + "index({}) distributed by({})", + display_comma_separated(&index_display.index_columns_with_ordering), + display_comma_separated(&index_display.distributed_by_columns), ) } else { - Some( - format!( - "index({}) include({}) distributed by({})", - display_comma_separated(&index_display.index_columns_with_ordering), - display_comma_separated(&index_display.include_columns), - display_comma_separated(&index_display.distributed_by_columns), - ) - .into(), + format!( + "index({}) include({}) distributed by({})", + display_comma_separated(&index_display.index_columns_with_ordering), + display_comma_separated(&index_display.include_columns), + display_comma_separated(&index_display.distributed_by_columns), ) }, - // Is Hidden - None, - // Description + is_hidden: None, // TODO: index description - None, - ]) + description: None, + } })); - rows.push(Row::new(vec![ - Some("table description".into()), - Some(relname.into()), - None, // Is Hidden - description.map(Into::into), // Description - ])); + rows.push(ShowColumnRow { + name: "table description".into(), + r#type: relname, + is_hidden: None, + description: description.map(Into::into), + }); // TODO: table name and description as title of response // TODO: recover the original user statement Ok(PgResponse::builder(StatementType::DESCRIBE) - .values( - rows.into(), - vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Hidden".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ) + .rows(rows) .into()) } +pub fn infer_describe() -> Vec { + fields_to_descriptors(ShowColumnRow::fields()) +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs index c25bf7678bd04..b966cca8f50cf 100644 --- a/src/frontend/src/handler/explain.rs +++ b/src/frontend/src/handler/explain.rs @@ -12,12 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use itertools::Itertools; -use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; use risingwave_common::bail_not_implemented; -use risingwave_common::types::DataType; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{ExplainOptions, ExplainType, Statement}; use thiserror_ext::AsReport; @@ -27,7 +24,7 @@ use super::create_sink::{gen_sink_plan, get_partition_compute_info}; use super::create_table::ColumnIdGenerator; use super::query::gen_batch_plan_by_statement; use super::util::SourceSchemaCompatExt; -use super::RwPgResponse; +use super::{RwPgResponse, RwPgResponseBuilderExt}; use crate::error::{ErrorCode, Result}; use crate::handler::create_table::handle_create_table_plan; use crate::handler::HandlerArgs; @@ -254,20 +251,17 @@ pub async fn handle_explain( } } - let rows = blocks - .iter() - .flat_map(|b| b.lines().map(|l| l.to_owned())) - .map(|l| Row::new(vec![Some(l.into())])) - .collect_vec(); + let rows = blocks.iter().flat_map(|b| b.lines()).map(|l| ExplainRow { + query_plan: l.into(), + }); Ok(PgResponse::builder(StatementType::EXPLAIN) - .values( - rows.into(), - vec![PgFieldDescriptor::new( - "QUERY PLAN".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - ) + .rows(rows) .into()) } + +#[derive(Fields)] +#[fields(style = "TITLE CASE")] +struct ExplainRow { + query_plan: String, +} diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index 3cdc4b191da92..827f28f87319e 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -18,11 +18,15 @@ use std::task::{Context, Poll}; use futures::stream::{self, BoxStream}; use futures::{Stream, StreamExt}; +use itertools::Itertools; +use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_response::StatementType::{self, ABORT, BEGIN, COMMIT, ROLLBACK, START_TRANSACTION}; use pgwire::pg_response::{PgResponse, PgResponseBuilder, RowSetResult}; use pgwire::pg_server::BoxedError; use pgwire::types::{Format, Row}; use risingwave_common::bail_not_implemented; +use risingwave_common::types::Fields; +use risingwave_common::util::iter_util::ZipEqFast; use risingwave_sqlparser::ast::*; use self::util::{DataChunkToRowSetAdapter, SourceSchemaCompatExt}; @@ -59,7 +63,7 @@ pub mod create_table; pub mod create_table_as; pub mod create_user; pub mod create_view; -mod describe; +pub mod describe; mod drop_connection; mod drop_database; pub mod drop_function; @@ -78,7 +82,7 @@ pub mod handle_privilege; mod kill_process; pub mod privilege; pub mod query; -mod show; +pub mod show; mod transaction; pub mod util; pub mod variable; @@ -90,6 +94,42 @@ pub type RwPgResponseBuilder = PgResponseBuilder; /// The [`PgResponse`] used by RisingWave. pub type RwPgResponse = PgResponse; +#[easy_ext::ext(RwPgResponseBuilderExt)] +impl RwPgResponseBuilder { + /// Append rows to the response. + pub fn rows(self, rows: impl IntoIterator) -> Self { + let fields = T::fields(); + self.values( + rows.into_iter() + .map(|row| { + Row::new( + row.into_owned_row() + .into_iter() + .zip_eq_fast(&fields) + .map(|(datum, (_, ty))| { + datum.map(|scalar| { + scalar.as_scalar_ref_impl().text_format(ty).into() + }) + }) + .collect(), + ) + }) + .collect_vec() + .into(), + fields_to_descriptors(fields), + ) + } +} + +pub fn fields_to_descriptors( + fields: Vec<(&str, risingwave_common::types::DataType)>, +) -> Vec { + fields + .iter() + .map(|(name, ty)| PgFieldDescriptor::new(name.to_string(), ty.to_oid(), ty.type_len())) + .collect() +} + pub enum PgResponseStream { LocalQuery(DataChunkToRowSetAdapter), DistributedQuery(DataChunkToRowSetAdapter), diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs index 4a98b6c7cd33d..226a219a11887 100644 --- a/src/frontend/src/handler/show.rs +++ b/src/frontend/src/handler/show.rs @@ -19,27 +19,24 @@ use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_protocol::truncated_fmt; use pgwire::pg_response::{PgResponse, StatementType}; use pgwire::pg_server::Session; -use pgwire::types::Row; use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ColumnCatalog, ColumnDesc, DEFAULT_SCHEMA_NAME}; -use risingwave_common::types::DataType; +use risingwave_common::types::{DataType, Fields}; use risingwave_common::util::addr::HostAddr; use risingwave_connector::source::kafka::PRIVATELINK_CONNECTION; use risingwave_expr::scalar::like::{i_like_default, like_default}; use risingwave_pb::catalog::connection; use risingwave_sqlparser::ast::{ - Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter, + display_comma_separated, Ident, ObjectName, ShowCreateType, ShowObject, ShowStatementFilter, }; use serde_json; -use super::RwPgResponse; +use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt}; use crate::binder::{Binder, Relation}; use crate::catalog::{CatalogError, IndexCatalog}; use crate::error::Result; -use crate::handler::util::{col_descs_to_rows, indexes_to_rows}; use crate::handler::HandlerArgs; use crate::session::SessionImpl; -use crate::utils::infer_stmt_row_desc::infer_show_object; pub fn get_columns_from_table( session: &SessionImpl, @@ -109,6 +106,136 @@ fn schema_or_default(schema: &Option) -> String { .map_or_else(|| DEFAULT_SCHEMA_NAME.to_string(), |s| s.real_value()) } +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowObjectRow { + name: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +pub struct ShowColumnRow { + pub name: String, + pub r#type: String, + pub is_hidden: Option, + pub description: Option, +} + +impl ShowColumnRow { + pub fn from_catalog(col: ColumnCatalog) -> Vec { + col.column_desc + .flatten() + .into_iter() + .map(|c| { + let type_name = if let DataType::Struct { .. } = c.data_type { + c.type_name.clone() + } else { + c.data_type.to_string() + }; + ShowColumnRow { + name: c.name, + r#type: type_name, + is_hidden: Some(col.is_hidden.to_string()), + description: c.description, + } + }) + .collect() + } +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowConnectionRow { + name: String, + r#type: String, + properties: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowFunctionRow { + name: String, + arguments: String, + return_type: String, + language: String, + link: Option, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowIndexRow { + name: String, + on: String, + key: String, + include: String, + distributed_by: String, +} + +impl From> for ShowIndexRow { + fn from(index: Arc) -> Self { + let index_display = index.display(); + ShowIndexRow { + name: index.name.clone(), + on: index.primary_table.name.clone(), + key: display_comma_separated(&index_display.index_columns_with_ordering).to_string(), + include: display_comma_separated(&index_display.include_columns).to_string(), + distributed_by: display_comma_separated(&index_display.distributed_by_columns) + .to_string(), + } + } +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowClusterRow { + addr: String, + state: String, + parallel_units: String, + is_streaming: String, + is_serving: String, + is_unschedulable: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowJobRow { + id: i64, + statement: String, + progress: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowProcessListRow { + id: String, + user: String, + host: String, + database: String, + time: Option, + info: Option, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowCreateObjectRow { + name: String, + create_sql: String, +} + +/// Infer the row description for different show objects. +pub fn infer_show_object(objects: &ShowObject) -> Vec { + fields_to_descriptors(match objects { + ShowObject::Columns { .. } => ShowColumnRow::fields(), + ShowObject::Connection { .. } => ShowConnectionRow::fields(), + ShowObject::Function { .. } => ShowFunctionRow::fields(), + ShowObject::Indexes { .. } => ShowIndexRow::fields(), + ShowObject::Cluster => ShowClusterRow::fields(), + ShowObject::Jobs => ShowJobRow::fields(), + ShowObject::ProcessList => ShowProcessListRow::fields(), + _ => ShowObjectRow::fields(), + }) +} + pub async fn handle_show_object( handler_args: HandlerArgs, command: ShowObject, @@ -119,7 +246,6 @@ pub async fn handle_show_object( if let Some(ShowStatementFilter::Where(..)) = filter { bail_not_implemented!("WHERE clause in SHOW statement"); } - let row_desc = infer_show_object(&command); let catalog_reader = session.env().catalog_reader(); @@ -178,18 +304,15 @@ pub async fn handle_show_object( .into()); }; - let rows = col_descs_to_rows(columns); - return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(columns.into_iter().flat_map(ShowColumnRow::from_catalog)) .into()); } ShowObject::Indexes { table } => { let indexes = get_indexes_from_table(&session, table)?; - let rows = indexes_to_rows(indexes); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(indexes.into_iter().map(ShowIndexRow::from)) .into()); } ShowObject::Connection { schema } => { @@ -200,7 +323,7 @@ pub async fn handle_show_object( .iter_connections() .map(|c| { let name = c.name.clone(); - let conn_type = match &c.info { + let r#type = match &c.info { connection::Info::PrivateLinkService(_) => { PRIVATELINK_CONNECTION.to_string() }, @@ -230,105 +353,81 @@ pub async fn handle_show_object( ) } }; - Row::new(vec![ - Some(name.into()), - Some(conn_type.into()), - Some(properties.into()), - ]) - }) - .collect_vec(); + ShowConnectionRow { + name, + r#type, + properties, + } + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::Function { schema } => { - let rows = catalog_reader - .read_guard() + let reader = catalog_reader.read_guard(); + let rows = reader .get_schema_by_name(session.database(), &schema_or_default(&schema))? .iter_function() - .map(|t| { - Row::new(vec![ - Some(t.name.clone().into()), - Some(t.arg_types.iter().map(|t| t.to_string()).join(", ").into()), - Some(t.return_type.to_string().into()), - Some(t.language.clone().into()), - t.link.clone().map(Into::into), - ]) - }) - .collect_vec(); + .map(|t| ShowFunctionRow { + name: t.name.clone(), + arguments: t.arg_types.iter().map(|t| t.to_string()).join(", "), + return_type: t.return_type.to_string(), + language: t.language.clone(), + link: t.link.clone(), + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::Cluster => { let workers = session.env().worker_node_manager().list_worker_nodes(); - let rows = workers - .into_iter() - .map(|worker| { - let addr: HostAddr = worker.host.as_ref().unwrap().into(); - let property = worker.property.as_ref().unwrap(); - Row::new(vec![ - Some(addr.to_string().into()), - Some(worker.get_state().unwrap().as_str_name().into()), - Some( - worker - .parallel_units - .into_iter() - .map(|pu| pu.id) - .join(", ") - .into(), - ), - Some(property.is_streaming.to_string().into()), - Some(property.is_serving.to_string().into()), - Some(property.is_unschedulable.to_string().into()), - ]) - }) - .collect_vec(); + let rows = workers.into_iter().map(|worker| { + let addr: HostAddr = worker.host.as_ref().unwrap().into(); + let property = worker.property.as_ref().unwrap(); + ShowClusterRow { + addr: addr.to_string(), + state: worker.get_state().unwrap().as_str_name().to_string(), + parallel_units: worker.parallel_units.into_iter().map(|pu| pu.id).join(", "), + is_streaming: property.is_streaming.to_string(), + is_serving: property.is_serving.to_string(), + is_unschedulable: property.is_unschedulable.to_string(), + } + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::Jobs => { let resp = session.env().meta_client().list_ddl_progress().await?; - let rows = resp - .into_iter() - .map(|job| { - Row::new(vec![ - Some(job.id.to_string().into()), - Some(job.statement.into()), - Some(job.progress.into()), - ]) - }) - .collect_vec(); + let rows = resp.into_iter().map(|job| ShowJobRow { + id: job.id as i64, + statement: job.statement, + progress: job.progress, + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } ShowObject::ProcessList => { - let rows = { - let sessions_map = session.env().sessions_map(); - sessions_map - .read() - .values() - .map(|s| { - Row::new(vec![ - // Since process id and the secret id in the session id are the same in RisingWave, just display the process id. - Some(format!("{}", s.id().0).into()), - Some(s.user_name().to_owned().into()), - Some(format!("{}", s.peer_addr()).into()), - Some(s.database().to_owned().into()), - s.elapse_since_running_sql() - .map(|mills| format!("{}ms", mills).into()), - s.running_sql().map(|sql| { - format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024)).into() - }), - ]) - }) - .collect_vec() - }; + let sessions_map = session.env().sessions_map().read(); + let rows = sessions_map.values().map(|s| { + ShowProcessListRow { + // Since process id and the secret id in the session id are the same in RisingWave, just display the process id. + id: format!("{}", s.id().0), + user: s.user_name().to_owned(), + host: format!("{}", s.peer_addr()), + database: s.database().to_owned(), + time: s + .elapse_since_running_sql() + .map(|mills| format!("{}ms", mills)), + info: s + .running_sql() + .map(|sql| format!("{}", truncated_fmt::TruncatedFmt(&sql, 1024))), + } + }); return Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values(rows.into(), row_desc) + .rows(rows) .into()); } }; @@ -341,21 +440,17 @@ pub async fn handle_show_object( Some(ShowStatementFilter::Where(..)) => unreachable!(), None => true, }) - .map(|n| Row::new(vec![Some(n.into())])) - .collect_vec(); + .map(|name| ShowObjectRow { name }); Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values( - rows.into(), - vec![PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - ) + .rows(rows) .into()) } +pub fn infer_show_create_object() -> Vec { + fields_to_descriptors(ShowCreateObjectRow::fields()) +} + pub fn handle_show_create_object( handle_args: HandlerArgs, show_create_type: ShowCreateType, @@ -415,21 +510,10 @@ pub fn handle_show_create_object( let name = format!("{}.{}", schema_name, object_name); Ok(PgResponse::builder(StatementType::SHOW_COMMAND) - .values( - vec![Row::new(vec![Some(name.into()), Some(sql.into())])].into(), - vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Create Sql".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ) + .rows([ShowCreateObjectRow { + name, + create_sql: sql, + }]) .into()) } diff --git a/src/frontend/src/handler/transaction.rs b/src/frontend/src/handler/transaction.rs index 452cfe0ed9299..8ab7af36c29ca 100644 --- a/src/frontend/src/handler/transaction.rs +++ b/src/frontend/src/handler/transaction.rs @@ -13,14 +13,13 @@ // limitations under the License. use pgwire::pg_response::StatementType; -use pgwire::types::Row; use risingwave_common::bail_not_implemented; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{TransactionAccessMode, TransactionMode, Value}; -use super::{HandlerArgs, RwPgResponse}; +use super::{HandlerArgs, RwPgResponse, RwPgResponseBuilderExt}; use crate::error::Result; use crate::session::transaction::AccessMode; -use crate::utils::infer_stmt_row_desc::infer_show_variable; macro_rules! not_impl { ($body:expr) => { @@ -118,16 +117,20 @@ pub async fn handle_set( .into()) } +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableRow { + name: String, +} + pub fn handle_show_isolation_level(handler_args: HandlerArgs) -> Result { let config_reader = handler_args.session.config(); - let parameter_name = "transaction_isolation"; - let row_desc = infer_show_variable(parameter_name); - let rows = vec![Row::new(vec![Some( - config_reader.get(parameter_name)?.into(), - )])]; + let rows = [ShowVariableRow { + name: config_reader.get("transaction_isolation")?, + }]; Ok(RwPgResponse::builder(StatementType::SHOW_VARIABLE) - .values(rows.into(), row_desc) + .rows(rows) .into()) } diff --git a/src/frontend/src/handler/util.rs b/src/frontend/src/handler/util.rs index 6e91cf53f0b32..ab9d4fe415b33 100644 --- a/src/frontend/src/handler/util.rs +++ b/src/frontend/src/handler/util.rs @@ -27,14 +27,14 @@ use pgwire::pg_server::BoxedError; use pgwire::types::{Format, FormatIterator, Row}; use pin_project_lite::pin_project; use risingwave_common::array::DataChunk; -use risingwave_common::catalog::{ColumnCatalog, Field}; +use risingwave_common::catalog::Field; use risingwave_common::row::Row as _; use risingwave_common::types::{DataType, ScalarRefImpl, Timestamptz}; use risingwave_common::util::iter_util::ZipEqFast; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; use risingwave_connector::source::KAFKA_CONNECTOR; -use risingwave_sqlparser::ast::{display_comma_separated, CompatibleSourceSchema, ConnectorSchema}; +use risingwave_sqlparser::ast::{CompatibleSourceSchema, ConnectorSchema}; -use crate::catalog::IndexCatalog; use crate::error::{ErrorCode, Result as RwResult}; use crate::handler::create_source::UPSTREAM_SOURCE_KEY; use crate::session::{current, SessionImpl}; @@ -172,66 +172,6 @@ fn to_pg_rows( .try_collect() } -/// Convert column descs to rows which conclude name and type -pub fn col_descs_to_rows(columns: Vec) -> Vec { - columns - .iter() - .flat_map(|col| { - col.column_desc - .flatten() - .into_iter() - .map(|c| { - let type_name = if let DataType::Struct { .. } = c.data_type { - c.type_name.clone() - } else { - c.data_type.to_string() - }; - Row::new(vec![ - Some(c.name.into()), - Some(type_name.into()), - Some(col.is_hidden.to_string().into()), - c.description.map(Into::into), - ]) - }) - .collect_vec() - }) - .collect_vec() -} - -pub fn indexes_to_rows(indexes: Vec>) -> Vec { - indexes - .iter() - .map(|index| { - let index_display = index.display(); - Row::new(vec![ - Some(index.name.clone().into()), - Some(index.primary_table.name.clone().into()), - Some( - format!( - "{}", - display_comma_separated(&index_display.index_columns_with_ordering) - ) - .into(), - ), - Some( - format!( - "{}", - display_comma_separated(&index_display.include_columns) - ) - .into(), - ), - Some( - format!( - "{}", - display_comma_separated(&index_display.distributed_by_columns) - ) - .into(), - ), - ]) - }) - .collect_vec() -} - /// Convert from [`Field`] to [`PgFieldDescriptor`]. pub fn to_pg_field(f: &Field) -> PgFieldDescriptor { PgFieldDescriptor::new( @@ -241,6 +181,11 @@ pub fn to_pg_field(f: &Field) -> PgFieldDescriptor { ) } +pub fn connector_need_pk(with_properties: &HashMap) -> bool { + // Currently only iceberg connector doesn't need primary key + !is_iceberg_connector(with_properties) +} + #[inline(always)] pub fn get_connector(with_properties: &HashMap) -> Option { with_properties @@ -265,6 +210,14 @@ pub fn is_cdc_connector(with_properties: &HashMap) -> bool { connector.contains("-cdc") } +#[inline(always)] +pub fn is_iceberg_connector(with_properties: &HashMap) -> bool { + let Some(connector) = get_connector(with_properties) else { + return false; + }; + connector == ICEBERG_CONNECTOR +} + #[easy_ext::ext(SourceSchemaCompatExt)] impl CompatibleSourceSchema { /// Convert `self` to [`ConnectorSchema`] and warn the user if the syntax is deprecated. diff --git a/src/frontend/src/handler/variable.rs b/src/frontend/src/handler/variable.rs index 884947c88b763..96fd232215ccd 100644 --- a/src/frontend/src/handler/variable.rs +++ b/src/frontend/src/handler/variable.rs @@ -14,19 +14,18 @@ use anyhow::Context; use itertools::Itertools; +use pgwire::pg_field_descriptor::PgFieldDescriptor; use pgwire::pg_protocol::ParameterStatus; use pgwire::pg_response::{PgResponse, StatementType}; -use pgwire::types::Row; use risingwave_common::session_config::{ConfigReporter, SESSION_CONFIG_LIST_SEP}; -use risingwave_common::system_param::is_mutable; -use risingwave_common::types::{DataType, ScalarRefImpl}; +use risingwave_common::system_param::reader::SystemParamsRead; +use risingwave_common::types::Fields; use risingwave_sqlparser::ast::{Ident, SetTimeZoneValue, SetVariableValue, Value}; use risingwave_sqlparser::keywords::Keyword; -use super::RwPgResponse; +use super::{fields_to_descriptors, RwPgResponse, RwPgResponseBuilderExt}; use crate::error::Result; use crate::handler::HandlerArgs; -use crate::utils::infer_stmt_row_desc::infer_show_variable; /// convert `SetVariableValue` to string while remove the quotes on literals. pub(crate) fn set_var_to_param_str(value: &SetVariableValue) -> Option { @@ -117,40 +116,36 @@ pub(super) async fn handle_show( ) -> Result { // TODO: Verify that the name used in `show` command is indeed always case-insensitive. let name = variable.iter().map(|e| e.real_value()).join(" "); - let row_desc = infer_show_variable(&name); - let rows = if name.eq_ignore_ascii_case("PARAMETERS") { - handle_show_system_params(handler_args).await? + if name.eq_ignore_ascii_case("PARAMETERS") { + handle_show_system_params(handler_args).await } else if name.eq_ignore_ascii_case("ALL") { - handle_show_all(handler_args.clone())? + handle_show_all(handler_args.clone()) } else { let config_reader = handler_args.session.config(); - vec![Row::new(vec![Some(config_reader.get(&name)?.into())])] - }; - - Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) - .values(rows.into(), row_desc) - .into()) + Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) + .rows([ShowVariableRow { + name: config_reader.get(&name)?, + }]) + .into()) + } } -fn handle_show_all(handler_args: HandlerArgs) -> Result> { +fn handle_show_all(handler_args: HandlerArgs) -> Result { let config_reader = handler_args.session.config(); let all_variables = config_reader.show_all(); - let rows = all_variables - .iter() - .map(|info| { - Row::new(vec![ - Some(info.name.clone().into()), - Some(info.setting.clone().into()), - Some(info.description.clone().into()), - ]) - }) - .collect_vec(); - Ok(rows) + let rows = all_variables.iter().map(|info| ShowVariableAllRow { + name: info.name.clone(), + setting: info.setting.clone(), + description: info.description.clone(), + }); + Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) + .rows(rows) + .into()) } -async fn handle_show_system_params(handler_args: HandlerArgs) -> Result> { +async fn handle_show_system_params(handler_args: HandlerArgs) -> Result { let params = handler_args .session .env() @@ -158,14 +153,48 @@ async fn handle_show_system_params(handler_args: HandlerArgs) -> Result .get_system_params() .await?; let rows = params - .to_kv() + .get_all() .into_iter() - .map(|(k, v)| { - let is_mutable_bytes = ScalarRefImpl::Bool(is_mutable(&k).unwrap()) - .text_format(&DataType::Boolean) - .into(); - Row::new(vec![Some(k.into()), Some(v.into()), Some(is_mutable_bytes)]) - }) - .collect_vec(); - Ok(rows) + .map(|info| ShowVariableParamsRow { + name: info.name.into(), + value: info.value, + description: info.description.into(), + mutable: info.mutable, + }); + Ok(PgResponse::builder(StatementType::SHOW_VARIABLE) + .rows(rows) + .into()) +} + +pub fn infer_show_variable(name: &str) -> Vec { + fields_to_descriptors(if name.eq_ignore_ascii_case("ALL") { + ShowVariableAllRow::fields() + } else if name.eq_ignore_ascii_case("PARAMETERS") { + ShowVariableParamsRow::fields() + } else { + ShowVariableRow::fields() + }) +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableRow { + name: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableAllRow { + name: String, + setting: String, + description: String, +} + +#[derive(Fields)] +#[fields(style = "Title Case")] +struct ShowVariableParamsRow { + name: String, + value: String, + description: String, + mutable: bool, } diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs index fa7ad908d01d4..43ec6d2a89de8 100644 --- a/src/frontend/src/optimizer/plan_node/logical_source.rs +++ b/src/frontend/src/optimizer/plan_node/logical_source.rs @@ -23,7 +23,8 @@ use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ ColumnCatalog, ColumnDesc, Field, Schema, KAFKA_TIMESTAMP_COLUMN_NAME, }; -use risingwave_connector::source::DataType; +use risingwave_connector::source::iceberg::ICEBERG_CONNECTOR; +use risingwave_connector::source::{DataType, UPSTREAM_SOURCE_KEY}; use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; use risingwave_pb::plan_common::GeneratedColumnDesc; @@ -546,6 +547,18 @@ impl ToStream for LogicalSource { } } } + if let Some(source) = &self.core.catalog { + let connector = &source + .with_properties + .get(UPSTREAM_SOURCE_KEY) + .map(|s| s.to_lowercase()) + .unwrap(); + if ICEBERG_CONNECTOR == connector { + return Err( + anyhow::anyhow!("Iceberg source is not supported in stream queries").into(), + ); + } + } Ok(plan) } diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs index 3abc7ace0e494..f2acbcf9d258c 100644 --- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs +++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs @@ -226,6 +226,7 @@ impl StreamMaterialize { id: TableId::placeholder(), associated_source_id: None, name, + dependent_relations: vec![], columns, pk: table_pk, stream_key, diff --git a/src/frontend/src/optimizer/plan_node/utils.rs b/src/frontend/src/optimizer/plan_node/utils.rs index 39d9ff5e7018d..c8cd1bb05fa83 100644 --- a/src/frontend/src/optimizer/plan_node/utils.rs +++ b/src/frontend/src/optimizer/plan_node/utils.rs @@ -141,6 +141,7 @@ impl TableCatalogBuilder { id: TableId::placeholder(), associated_source_id: None, name: String::new(), + dependent_relations: vec![], columns: self.columns.clone(), pk: self.pk, stream_key: vec![], diff --git a/src/frontend/src/scheduler/distributed/query.rs b/src/frontend/src/scheduler/distributed/query.rs index 6295d8036b566..515a83d0923ef 100644 --- a/src/frontend/src/scheduler/distributed/query.rs +++ b/src/frontend/src/scheduler/distributed/query.rs @@ -543,6 +543,7 @@ pub(crate) mod tests { id: table_id, associated_source_id: None, name: "test".to_string(), + dependent_relations: vec![], columns: vec![ ColumnCatalog { column_desc: ColumnDesc::new_atomic(DataType::Int32, "a", 0), diff --git a/src/frontend/src/scheduler/task_context.rs b/src/frontend/src/scheduler/task_context.rs index dfb2496dad556..dcfbf30a215a1 100644 --- a/src/frontend/src/scheduler/task_context.rs +++ b/src/frontend/src/scheduler/task_context.rs @@ -52,6 +52,7 @@ impl BatchTaskContext for FrontendBatchTaskContext { self.session.env().meta_client_ref(), self.session.auth_context(), self.session.shared_config(), + self.session.env().system_params_manager().get_params(), )) } diff --git a/src/frontend/src/session.rs b/src/frontend/src/session.rs index 9419999f8e479..67a5da01e1213 100644 --- a/src/frontend/src/session.rs +++ b/src/frontend/src/session.rs @@ -43,7 +43,9 @@ use risingwave_common::catalog::{ }; use risingwave_common::config::{load_config, BatchConfig, MetaConfig, MetricLevel}; use risingwave_common::session_config::{ConfigMap, ConfigReporter, VisibilityMode}; -use risingwave_common::system_param::local_manager::LocalSystemParamsManager; +use risingwave_common::system_param::local_manager::{ + LocalSystemParamsManager, LocalSystemParamsManagerRef, +}; use risingwave_common::telemetry::manager::TelemetryManager; use risingwave_common::telemetry::telemetry_env_enabled; use risingwave_common::types::DataType; @@ -79,11 +81,14 @@ use crate::catalog::{ check_schema_writable, CatalogError, DatabaseId, OwnedByUserCatalog, SchemaId, }; use crate::error::{ErrorCode, Result, RwError}; +use crate::handler::describe::infer_describe; use crate::handler::extended_handle::{ handle_bind, handle_execute, handle_parse, Portal, PrepareStatement, }; use crate::handler::privilege::ObjectCheckItem; +use crate::handler::show::{infer_show_create_object, infer_show_object}; use crate::handler::util::to_pg_field; +use crate::handler::variable::infer_show_variable; use crate::handler::{handle, RwPgResponse}; use crate::health_service::HealthServiceImpl; use crate::meta_client::{FrontendMetaClient, FrontendMetaClientImpl}; @@ -100,7 +105,6 @@ use crate::user::user_authentication::md5_hash_with_salt; use crate::user::user_manager::UserInfoManager; use crate::user::user_service::{UserInfoReader, UserInfoWriter, UserInfoWriterImpl}; use crate::user::UserId; -use crate::utils::infer_stmt_row_desc::{infer_show_object, infer_show_variable}; use crate::{FrontendOpts, PgResponseStream}; pub(crate) mod current; @@ -119,6 +123,8 @@ pub struct FrontendEnv { worker_node_manager: WorkerNodeManagerRef, query_manager: QueryManager, hummock_snapshot_manager: HummockSnapshotManagerRef, + system_params_manager: LocalSystemParamsManagerRef, + server_addr: HostAddr, client_pool: ComputeClientPoolRef, @@ -159,6 +165,7 @@ impl FrontendEnv { let worker_node_manager = Arc::new(WorkerNodeManager::mock(vec![])); let meta_client = Arc::new(MockFrontendMetaClient {}); let hummock_snapshot_manager = Arc::new(HummockSnapshotManager::new(meta_client.clone())); + let system_params_manager = Arc::new(LocalSystemParamsManager::for_test()); let compute_client_pool = Arc::new(ComputeClientPool::default()); let query_manager = QueryManager::new( worker_node_manager.clone(), @@ -191,6 +198,7 @@ impl FrontendEnv { worker_node_manager, query_manager, hummock_snapshot_manager, + system_params_manager, server_addr, client_pool, sessions_map: Arc::new(RwLock::new(HashMap::new())), @@ -383,6 +391,7 @@ impl FrontendEnv { meta_client: frontend_meta_client, query_manager, hummock_snapshot_manager, + system_params_manager, server_addr: frontend_address, client_pool, frontend_metrics, @@ -448,6 +457,10 @@ impl FrontendEnv { &self.hummock_snapshot_manager } + pub fn system_params_manager(&self) -> &LocalSystemParamsManagerRef { + &self.system_params_manager + } + pub fn server_address(&self) -> &HostAddr { &self.server_addr } @@ -1231,18 +1244,7 @@ fn infer(bound: Option, stmt: Statement) -> Result Ok(infer_show_object(&show_object)), - Statement::ShowCreateObject { .. } => Ok(vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Create Sql".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ]), + Statement::ShowCreateObject { .. } => Ok(infer_show_create_object()), Statement::ShowTransactionIsolationLevel => { let name = "transaction_isolation"; Ok(infer_show_variable(name)) @@ -1251,28 +1253,7 @@ fn infer(bound: Option, stmt: Statement) -> Result Ok(vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Hidden".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ]), + Statement::Describe { name: _ } => Ok(infer_describe()), Statement::Explain { .. } => Ok(vec![PgFieldDescriptor::new( "QUERY PLAN".to_owned(), DataType::Varchar.to_oid(), diff --git a/src/frontend/src/utils/infer_stmt_row_desc.rs b/src/frontend/src/utils/infer_stmt_row_desc.rs deleted file mode 100644 index 1ee950997720c..0000000000000 --- a/src/frontend/src/utils/infer_stmt_row_desc.rs +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright 2024 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use pgwire::pg_field_descriptor::PgFieldDescriptor; -use risingwave_common::types::DataType; -use risingwave_sqlparser::ast::ShowObject; - -/// `infer_stmt_row_desc` is used to infer the row description for different show objects. -pub fn infer_show_object(objects: &ShowObject) -> Vec { - match objects { - ShowObject::Columns { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Hidden".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Connection { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Properties".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Function { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Arguments".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Return Type".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Language".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Link".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Indexes { .. } => vec![ - PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "On".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Key".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Include".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Distributed By".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Cluster => vec![ - PgFieldDescriptor::new( - "Addr".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "State".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Parallel Units".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Streaming".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Serving".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Is Unschedulable".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::Jobs => vec![ - PgFieldDescriptor::new( - "Id".to_owned(), - DataType::Int64.to_oid(), - DataType::Int64.type_len(), - ), - PgFieldDescriptor::new( - "Statement".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Progress".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - ShowObject::ProcessList => vec![ - PgFieldDescriptor::new( - "Id".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "User".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Host".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Database".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Time".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Info".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ], - _ => vec![PgFieldDescriptor::new( - "Name".to_owned(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )], - } -} - -pub fn infer_show_variable(name: &str) -> Vec { - if name.eq_ignore_ascii_case("ALL") { - vec![ - PgFieldDescriptor::new( - "Name".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Setting".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Description".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - ] - } else if name.eq_ignore_ascii_case("PARAMETERS") { - vec![ - PgFieldDescriptor::new( - "Name".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Value".to_string(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - ), - PgFieldDescriptor::new( - "Mutable".to_string(), - DataType::Boolean.to_oid(), - DataType::Boolean.type_len(), - ), - ] - } else { - vec![PgFieldDescriptor::new( - name.to_ascii_lowercase(), - DataType::Varchar.to_oid(), - DataType::Varchar.type_len(), - )] - } -} diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs index bfe7cb093aad0..697b626fb3398 100644 --- a/src/frontend/src/utils/mod.rs +++ b/src/frontend/src/utils/mod.rs @@ -30,7 +30,6 @@ pub use rewrite_index::*; mod index_set; pub use index_set::*; pub(crate) mod group_by; -pub mod infer_stmt_row_desc; pub mod overwrite_options; pub use group_by::*; pub use overwrite_options::*; diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs index 2e770fb841ada..8d7c4253631d5 100644 --- a/src/meta/node/src/lib.rs +++ b/src/meta/node/src/lib.rs @@ -254,7 +254,7 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { const MIN_TIMEOUT_INTERVAL_SEC: u64 = 20; let compaction_task_max_progress_interval_secs = { - config + (config .storage .object_store .object_store_read_timeout_ms @@ -271,7 +271,8 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { .object_store .object_store_streaming_upload_timeout_ms, ) - .max(config.meta.compaction_task_max_progress_interval_secs) + .max(config.meta.compaction_task_max_progress_interval_secs * 1000)) + / 1000 } + MIN_TIMEOUT_INTERVAL_SEC; let (mut join_handle, leader_lost_handle, shutdown_send) = rpc_serve( diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index 6077efa7f88c1..e26e1af0f0cff 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -19,16 +19,18 @@ use std::sync::Arc; use anyhow::anyhow; use itertools::Itertools; use risingwave_common::catalog::{TableOption, DEFAULT_SCHEMA_NAME, SYSTEM_SCHEMAS}; +use risingwave_common::util::stream_graph_visitor::visit_stream_node_cont; use risingwave_common::{bail, current_cluster_version}; +use risingwave_meta_model_v2::fragment::StreamNode; use risingwave_meta_model_v2::object::ObjectType; use risingwave_meta_model_v2::prelude::*; use risingwave_meta_model_v2::table::TableType; use risingwave_meta_model_v2::{ - connection, database, function, index, object, object_dependency, schema, sink, source, - streaming_job, table, user_privilege, view, ActorId, ColumnCatalogArray, ConnectionId, - CreateType, DatabaseId, FragmentId, FunctionId, IndexId, JobStatus, ObjectId, - PrivateLinkService, SchemaId, SourceId, StreamSourceInfo, StreamingParallelism, TableId, - UserId, + actor, connection, database, fragment, function, index, object, object_dependency, schema, + sink, source, streaming_job, table, user_privilege, view, ActorId, ActorUpstreamActors, + ColumnCatalogArray, ConnectionId, CreateType, DatabaseId, FragmentId, FunctionId, I32Array, + IndexId, JobStatus, ObjectId, PrivateLinkService, SchemaId, SourceId, StreamSourceInfo, + StreamingParallelism, TableId, UserId, }; use risingwave_pb::catalog::table::PbTableType; use risingwave_pb::catalog::{ @@ -41,6 +43,8 @@ use risingwave_pb::meta::subscribe_response::{ Info as NotificationInfo, Info, Operation as NotificationOperation, Operation, }; use risingwave_pb::meta::{PbRelation, PbRelationGroup}; +use risingwave_pb::stream_plan::stream_node::NodeBody; +use risingwave_pb::stream_plan::FragmentTypeFlag; use risingwave_pb::user::PbUserInfo; use sea_orm::sea_query::{Expr, SimpleExpr}; use sea_orm::ActiveValue::Set; @@ -423,6 +427,7 @@ impl CatalogController { pub async fn clean_dirty_creating_jobs(&self) -> MetaResult { let inner = self.inner.write().await; let txn = inner.db.begin().await?; + let creating_job_ids: Vec = streaming_job::Entity::find() .select_only() .column(streaming_job::Column::JobId) @@ -436,7 +441,14 @@ impl CatalogController { .into_tuple() .all(&txn) .await?; + + let changed = Self::clean_dirty_sink_downstreams(&txn).await?; + if creating_job_ids.is_empty() { + if changed { + txn.commit().await?; + } + return Ok(ReleaseContext::default()); } @@ -476,6 +488,7 @@ impl CatalogController { .exec(&txn) .await?; assert!(res.rows_affected > 0); + txn.commit().await?; Ok(ReleaseContext { @@ -485,6 +498,175 @@ impl CatalogController { }) } + async fn clean_dirty_sink_downstreams(txn: &DatabaseTransaction) -> MetaResult { + // clean incoming sink from (table) + // clean upstream fragment ids from (fragment) + // clean stream node from (fragment) + // clean upstream actor ids from (actor) + let all_fragment_ids: Vec = Fragment::find() + .select_only() + .columns(vec![fragment::Column::FragmentId]) + .into_tuple() + .all(txn) + .await?; + + let all_fragment_ids: HashSet<_> = all_fragment_ids.into_iter().collect(); + + let table_sink_ids: Vec = Sink::find() + .select_only() + .column(sink::Column::SinkId) + .filter(sink::Column::TargetTable.is_not_null()) + .into_tuple() + .all(txn) + .await?; + + let all_table_with_incoming_sinks: Vec<(ObjectId, I32Array)> = Table::find() + .select_only() + .columns(vec![table::Column::TableId, table::Column::IncomingSinks]) + .into_tuple() + .all(txn) + .await?; + + let table_incoming_sinks_to_update = all_table_with_incoming_sinks + .into_iter() + .filter(|(_, incoming_sinks)| { + let inner_ref = incoming_sinks.inner_ref(); + !inner_ref.is_empty() + && inner_ref + .iter() + .any(|sink_id| !table_sink_ids.contains(sink_id)) + }) + .collect_vec(); + + let new_table_incoming_sinks = table_incoming_sinks_to_update + .into_iter() + .map(|(table_id, incoming_sinks)| { + let new_incoming_sinks = incoming_sinks + .into_inner() + .extract_if(|id| table_sink_ids.contains(id)) + .collect_vec(); + (table_id, I32Array::from(new_incoming_sinks)) + }) + .collect_vec(); + + // no need to update, returning + if new_table_incoming_sinks.is_empty() { + return Ok(false); + } + + for (table_id, new_incoming_sinks) in new_table_incoming_sinks { + tracing::info!("cleaning dirty table sink downstream table {}", table_id); + Table::update_many() + .col_expr(table::Column::IncomingSinks, new_incoming_sinks.into()) + .filter(table::Column::TableId.eq(table_id)) + .exec(txn) + .await?; + + let fragments: Vec<(FragmentId, I32Array, StreamNode, i32)> = Fragment::find() + .select_only() + .columns(vec![ + fragment::Column::FragmentId, + fragment::Column::UpstreamFragmentId, + fragment::Column::StreamNode, + fragment::Column::FragmentTypeMask, + ]) + .filter(fragment::Column::JobId.eq(table_id)) + .into_tuple() + .all(txn) + .await?; + + for (fragment_id, upstream_fragment_ids, stream_node, fragment_mask) in fragments { + let mut upstream_fragment_ids = upstream_fragment_ids.into_inner(); + + let dirty_upstream_fragment_ids = upstream_fragment_ids + .extract_if(|id| !all_fragment_ids.contains(id)) + .collect_vec(); + + if !dirty_upstream_fragment_ids.is_empty() { + // dirty downstream should be materialize fragment of table + assert!(fragment_mask & FragmentTypeFlag::Mview as i32 > 0); + + tracing::info!( + "cleaning dirty table sink fragment {:?} from downstream fragment {}", + dirty_upstream_fragment_ids, + fragment_id + ); + + let mut pb_stream_node = stream_node.to_protobuf(); + + visit_stream_node_cont(&mut pb_stream_node, |node| { + if let Some(NodeBody::Union(_)) = node.node_body { + node.input.retain_mut(|input| { + if let Some(NodeBody::Merge(merge_node)) = &mut input.node_body + && all_fragment_ids + .contains(&(merge_node.upstream_fragment_id as i32)) + { + true + } else { + false + } + }); + } + true + }); + + Fragment::update_many() + .col_expr( + fragment::Column::UpstreamFragmentId, + I32Array::from(upstream_fragment_ids).into(), + ) + .col_expr( + fragment::Column::StreamNode, + StreamNode::from_protobuf(&pb_stream_node).into(), + ) + .filter(fragment::Column::FragmentId.eq(fragment_id)) + .exec(txn) + .await?; + + let actors: Vec<(ActorId, ActorUpstreamActors)> = Actor::find() + .select_only() + .columns(vec![ + actor::Column::ActorId, + actor::Column::UpstreamActorIds, + ]) + .filter(actor::Column::FragmentId.eq(fragment_id)) + .into_tuple() + .all(txn) + .await?; + + for (actor_id, upstream_actor_ids) in actors { + let mut upstream_actor_ids = upstream_actor_ids.into_inner(); + + let dirty_actor_upstreams = upstream_actor_ids + .extract_if(|id, _| !all_fragment_ids.contains(id)) + .map(|(id, _)| id) + .collect_vec(); + + if !dirty_actor_upstreams.is_empty() { + tracing::debug!( + "cleaning dirty table sink fragment {:?} from downstream fragment {} actor {}", + dirty_actor_upstreams, + fragment_id, + actor_id, + ); + + Actor::update_many() + .col_expr( + actor::Column::UpstreamActorIds, + ActorUpstreamActors::from(upstream_actor_ids).into(), + ) + .filter(actor::Column::ActorId.eq(actor_id)) + .exec(txn) + .await?; + } + } + } + } + } + + Ok(true) + } + /// `finish_streaming_job` marks job related objects as `Created` and notify frontend. pub async fn finish_streaming_job(&self, job_id: ObjectId) -> MetaResult { let inner = self.inner.write().await; @@ -1487,6 +1669,52 @@ impl CatalogController { ); to_drop_objects.push(obj); + // Special handling for 'sink into table'. + if object_type != ObjectType::Sink { + // When dropping a table downstream, all incoming sinks of the table should be dropped as well. + if object_type == ObjectType::Table { + let table = Table::find_by_id(object_id) + .one(&txn) + .await? + .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?; + + let incoming_sinks = table.incoming_sinks.into_inner(); + + if !incoming_sinks.is_empty() { + let objs: Vec = Object::find() + .filter(object::Column::Oid.is_in(incoming_sinks)) + .into_partial_model() + .all(&txn) + .await?; + + to_drop_objects.extend(objs); + } + } + + let to_drop_object_ids: HashSet<_> = + to_drop_objects.iter().map(|obj| obj.oid).collect(); + + // When there is a table sink in the dependency chain of drop cascade, an error message needs to be returned currently to manually drop the sink. + for obj in &to_drop_objects { + if obj.obj_type == ObjectType::Sink { + let sink = Sink::find_by_id(obj.oid) + .one(&txn) + .await? + .ok_or_else(|| MetaError::catalog_id_not_found("sink", obj.oid))?; + + // Since dropping the sink into the table requires the frontend to handle some of the logic (regenerating the plan), it’s not compatible with the current cascade dropping. + if let Some(target_table) = sink.target_table + && !to_drop_object_ids.contains(&target_table) + { + bail!( + "Found sink into table with sink id {} in dependency, please drop them manually", + obj.oid, + ); + } + } + } + } + let to_drop_table_ids = to_drop_objects .iter() .filter(|obj| obj.obj_type == ObjectType::Table || obj.obj_type == ObjectType::Index) @@ -1856,22 +2084,28 @@ impl CatalogController { }); }}; } - let objs = get_referring_objects(object_id, &txn).await?; - // TODO: For sink into table. when sink into table is ready. - // if object_type == ObjectType::Table { - // let incoming_sinks: Vec<_> = Table::find_by_id(object_id) - // .select_only() - // .column(table::Column::IncomingSinks) - // .into_tuple() - // .one(&txn) - // .await? - // .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?; - // objs.extend(incoming_sinks.into_iter().map(|id| PartialObject { - // oid: id as _, - // obj_type: ObjectType::Sink, - // ..Default::default() - // })); - // } + let mut objs = get_referring_objects(object_id, &txn).await?; + if object_type == ObjectType::Table { + let incoming_sinks: I32Array = Table::find_by_id(object_id) + .select_only() + .column(table::Column::IncomingSinks) + .into_tuple() + .one(&txn) + .await? + .ok_or_else(|| MetaError::catalog_id_not_found("table", object_id))?; + + objs.extend( + incoming_sinks + .into_inner() + .into_iter() + .map(|id| PartialObject { + oid: id, + obj_type: ObjectType::Sink, + schema_id: None, + database_id: None, + }), + ); + } for obj in objs { match obj.obj_type { diff --git a/src/meta/src/controller/rename.rs b/src/meta/src/controller/rename.rs index bde954a587fdf..15be4d7ef83b8 100644 --- a/src/meta/src/controller/rename.rs +++ b/src/meta/src/controller/rename.rs @@ -79,6 +79,7 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str stmt: CreateSinkStatement { sink_from: CreateSink::AsQuery(query), + into_table_name: None, .. }, } => { @@ -89,9 +90,27 @@ pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> Str stmt: CreateSinkStatement { sink_from: CreateSink::From(table_name), + into_table_name: None, .. }, } => replace_table_name(table_name, to), + Statement::CreateSink { + stmt: CreateSinkStatement { + sink_from, + into_table_name: Some(table_name), + .. + } + } => { + let idx = table_name.0.len() - 1; + if table_name.0[idx].real_value() == from { + table_name.0[idx] = Ident::new_unchecked(to); + } else { + match sink_from { + CreateSink::From(table_name) => replace_table_name(table_name, to), + CreateSink::AsQuery(query) => QueryRewriter::rewrite_query(query, from, to), + } + } + } _ => unreachable!(), }; stmt.to_string() diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs index 9bb8af6172469..7c4360a92f285 100644 --- a/src/meta/src/controller/streaming_job.rs +++ b/src/meta/src/controller/streaming_job.rs @@ -16,6 +16,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::num::NonZeroUsize; use itertools::Itertools; +use risingwave_common::bail; use risingwave_common::buffer::Bitmap; use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping}; use risingwave_common::util::column_index_mapping::ColIndexMapping; @@ -64,8 +65,8 @@ use crate::barrier::Reschedule; use crate::controller::catalog::CatalogController; use crate::controller::rename::ReplaceTableExprRewriter; use crate::controller::utils::{ - check_relation_name_duplicate, ensure_object_id, ensure_user_id, get_fragment_actor_ids, - get_fragment_mappings, + check_relation_name_duplicate, check_sink_into_table_cycle, ensure_object_id, ensure_user_id, + get_fragment_actor_ids, get_fragment_mappings, }; use crate::controller::ObjectModel; use crate::manager::{NotificationVersion, SinkId, StreamingJob}; @@ -141,6 +142,21 @@ impl CatalogController { Table::insert(table).exec(&txn).await?; } StreamingJob::Sink(sink, _) => { + if let Some(target_table_id) = sink.target_table { + if check_sink_into_table_cycle( + target_table_id as ObjectId, + sink.dependent_relations + .iter() + .map(|id| *id as ObjectId) + .collect(), + &txn, + ) + .await? + { + bail!("Creating such a sink will result in circular dependency."); + } + } + let job_id = Self::create_streaming_job_obj( &txn, ObjectType::Sink, diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs index 4b2e598a2c221..855112acb7167 100644 --- a/src/meta/src/controller/system_param.rs +++ b/src/meta/src/controller/system_param.rs @@ -186,7 +186,7 @@ impl SystemParamsController { .await? else { return Err(MetaError::system_params(format!( - "unrecognized system parameter {}", + "unrecognized system parameter {:?}", name ))); }; diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs index ff19892d516b5..6c7e61a316add 100644 --- a/src/meta/src/controller/utils.rs +++ b/src/meta/src/controller/utils.rs @@ -118,6 +118,107 @@ pub fn construct_obj_dependency_query(obj_id: ObjectId) -> WithQuery { .to_owned() } +/// This function will construct a query using recursive cte to find if dependent objects are already relying on the target table. +/// +/// # Examples +/// +/// ``` +/// use risingwave_meta::controller::utils::construct_sink_cycle_check_query; +/// use sea_orm::sea_query::*; +/// use sea_orm::*; +/// +/// let query = construct_sink_cycle_check_query(1, vec![2, 3]); +/// +/// assert_eq!( +/// query.to_string(MysqlQueryBuilder), +/// r#"WITH RECURSIVE `used_by_object_ids_with_sink` (`oid`, `used_by`) AS (SELECT `oid`, `used_by` FROM `object_dependency` WHERE `object_dependency`.`oid` = 1 UNION ALL (SELECT `obj_dependency_with_sink`.`oid`, `obj_dependency_with_sink`.`used_by` FROM (SELECT `oid`, `used_by` FROM `object_dependency` UNION ALL (SELECT `sink_id`, `target_table` FROM `sink` WHERE `sink`.`target_table` IS NOT NULL)) AS `obj_dependency_with_sink` INNER JOIN `used_by_object_ids_with_sink` ON `used_by_object_ids_with_sink`.`used_by` = `obj_dependency_with_sink`.`oid` WHERE `used_by_object_ids_with_sink`.`used_by` <> `used_by_object_ids_with_sink`.`oid`)) SELECT COUNT(`used_by_object_ids_with_sink`.`used_by`) FROM `used_by_object_ids_with_sink` WHERE `used_by_object_ids_with_sink`.`used_by` IN (2, 3)"# +/// ); +/// assert_eq!( +/// query.to_string(PostgresQueryBuilder), +/// r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL (SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL (SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL)) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid")) SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"# +/// ); +/// assert_eq!( +/// query.to_string(SqliteQueryBuilder), +/// r#"WITH RECURSIVE "used_by_object_ids_with_sink" ("oid", "used_by") AS (SELECT "oid", "used_by" FROM "object_dependency" WHERE "object_dependency"."oid" = 1 UNION ALL SELECT "obj_dependency_with_sink"."oid", "obj_dependency_with_sink"."used_by" FROM (SELECT "oid", "used_by" FROM "object_dependency" UNION ALL SELECT "sink_id", "target_table" FROM "sink" WHERE "sink"."target_table" IS NOT NULL) AS "obj_dependency_with_sink" INNER JOIN "used_by_object_ids_with_sink" ON "used_by_object_ids_with_sink"."used_by" = "obj_dependency_with_sink"."oid" WHERE "used_by_object_ids_with_sink"."used_by" <> "used_by_object_ids_with_sink"."oid") SELECT COUNT("used_by_object_ids_with_sink"."used_by") FROM "used_by_object_ids_with_sink" WHERE "used_by_object_ids_with_sink"."used_by" IN (2, 3)"# +/// ); +/// ``` +pub fn construct_sink_cycle_check_query( + target_table: ObjectId, + dependent_objects: Vec, +) -> WithQuery { + let cte_alias = Alias::new("used_by_object_ids_with_sink"); + let depend_alias = Alias::new("obj_dependency_with_sink"); + + let mut base_query = SelectStatement::new() + .columns([ + object_dependency::Column::Oid, + object_dependency::Column::UsedBy, + ]) + .from(ObjectDependency) + .and_where(object_dependency::Column::Oid.eq(target_table)) + .to_owned(); + + let query_sink_deps = SelectStatement::new() + .columns([sink::Column::SinkId, sink::Column::TargetTable]) + .from(Sink) + .and_where(sink::Column::TargetTable.is_not_null()) + .to_owned(); + + let cte_referencing = Query::select() + .column((depend_alias.clone(), object_dependency::Column::Oid)) + .column((depend_alias.clone(), object_dependency::Column::UsedBy)) + .from_subquery( + SelectStatement::new() + .columns([ + object_dependency::Column::Oid, + object_dependency::Column::UsedBy, + ]) + .from(ObjectDependency) + .union(UnionType::All, query_sink_deps) + .to_owned(), + depend_alias.clone(), + ) + .inner_join( + cte_alias.clone(), + Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).eq(Expr::col(( + depend_alias.clone(), + object_dependency::Column::Oid, + ))), + ) + .and_where( + Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).ne(Expr::col(( + cte_alias.clone(), + object_dependency::Column::Oid, + ))), + ) + .to_owned(); + + let common_table_expr = CommonTableExpression::new() + .query(base_query.union(UnionType::All, cte_referencing).to_owned()) + .columns([ + object_dependency::Column::Oid, + object_dependency::Column::UsedBy, + ]) + .table_name(cte_alias.clone()) + .to_owned(); + + SelectStatement::new() + .expr(Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)).count()) + .from(cte_alias.clone()) + .and_where( + Expr::col((cte_alias.clone(), object_dependency::Column::UsedBy)) + .is_in(dependent_objects), + ) + .to_owned() + .with( + WithClause::new() + .recursive(true) + .cte(common_table_expr) + .to_owned(), + ) + .to_owned() +} + #[derive(Clone, DerivePartialModel, FromQueryResult)] #[sea_orm(entity = "Object")] pub struct PartialObject { @@ -175,6 +276,36 @@ where Ok(objects) } +/// Check if create a sink with given dependent objects into the target table will cause a cycle, return true if it will. +pub async fn check_sink_into_table_cycle( + target_table: ObjectId, + dependent_objs: Vec, + db: &C, +) -> MetaResult +where + C: ConnectionTrait, +{ + if dependent_objs.is_empty() { + return Ok(false); + } + + let query = construct_sink_cycle_check_query(target_table, dependent_objs); + let (sql, values) = query.build_any(&*db.get_database_backend().get_query_builder()); + + let res = db + .query_one(Statement::from_sql_and_values( + db.get_database_backend(), + sql, + values, + )) + .await? + .unwrap(); + + let cnt: i64 = res.try_get_by(0)?; + + Ok(cnt != 0) +} + /// `ensure_object_id` ensures the existence of target object in the cluster. pub async fn ensure_object_id( object_type: ObjectType, diff --git a/src/meta/src/stream/scale.rs b/src/meta/src/stream/scale.rs index 7f40f8e3da033..0e571a0afebf7 100644 --- a/src/meta/src/stream/scale.rs +++ b/src/meta/src/stream/scale.rs @@ -31,15 +31,19 @@ use risingwave_common::catalog::TableId; use risingwave_common::hash::{ActorMapping, ParallelUnitId, VirtualNode}; use risingwave_common::util::iter_util::ZipEqDebug; use risingwave_meta_model_v2::StreamingParallelism; -use risingwave_pb::common::{ActorInfo, ParallelUnit, WorkerNode}; +use risingwave_pb::common::{ActorInfo, Buffer, ParallelUnit, ParallelUnitMapping, WorkerNode}; use risingwave_pb::meta::get_reschedule_plan_request::{Policy, StableResizePolicy}; use risingwave_pb::meta::subscribe_response::{Info, Operation}; use risingwave_pb::meta::table_fragments::actor_status::ActorState; -use risingwave_pb::meta::table_fragments::fragment::FragmentDistributionType; -use risingwave_pb::meta::table_fragments::{self, ActorStatus, Fragment, State}; +use risingwave_pb::meta::table_fragments::fragment::{ + FragmentDistributionType, PbFragmentDistributionType, +}; +use risingwave_pb::meta::table_fragments::{self, ActorStatus, PbFragment, State}; use risingwave_pb::meta::FragmentParallelUnitMappings; use risingwave_pb::stream_plan::stream_node::NodeBody; -use risingwave_pb::stream_plan::{DispatcherType, FragmentTypeFlag, StreamActor, StreamNode}; +use risingwave_pb::stream_plan::{ + Dispatcher, DispatcherType, FragmentTypeFlag, PbStreamActor, StreamNode, +}; use thiserror_ext::AsReport; use tokio::sync::oneshot::Receiver; use tokio::sync::{oneshot, RwLock, RwLockReadGuard, RwLockWriteGuard}; @@ -105,15 +109,85 @@ pub struct ParallelUnitReschedule { pub removed_parallel_units: BTreeSet, } +pub struct CustomFragmentInfo { + pub fragment_id: u32, + pub fragment_type_mask: u32, + pub distribution_type: PbFragmentDistributionType, + pub vnode_mapping: Option, + pub state_table_ids: Vec, + pub upstream_fragment_ids: Vec, + pub actor_template: PbStreamActor, + pub actors: Vec, +} + +#[derive(Default)] +pub struct CustomActorInfo { + pub actor_id: u32, + pub fragment_id: u32, + pub dispatcher: Vec, + pub upstream_actor_id: Vec, + pub vnode_bitmap: Option, +} + +impl From<&PbStreamActor> for CustomActorInfo { + fn from( + PbStreamActor { + actor_id, + fragment_id, + dispatcher, + upstream_actor_id, + vnode_bitmap, + .. + }: &PbStreamActor, + ) -> Self { + CustomActorInfo { + actor_id: *actor_id, + fragment_id: *fragment_id, + dispatcher: dispatcher.clone(), + upstream_actor_id: upstream_actor_id.clone(), + vnode_bitmap: vnode_bitmap.clone(), + } + } +} + +impl From<&PbFragment> for CustomFragmentInfo { + fn from(fragment: &PbFragment) -> Self { + CustomFragmentInfo { + fragment_id: fragment.fragment_id, + fragment_type_mask: fragment.fragment_type_mask, + distribution_type: fragment.distribution_type(), + vnode_mapping: fragment.vnode_mapping.clone(), + state_table_ids: fragment.state_table_ids.clone(), + upstream_fragment_ids: fragment.upstream_fragment_ids.clone(), + actor_template: fragment + .actors + .first() + .cloned() + .expect("no actor in fragment"), + actors: fragment.actors.iter().map(CustomActorInfo::from).collect(), + } + } +} + +impl CustomFragmentInfo { + pub fn get_fragment_type_mask(&self) -> u32 { + self.fragment_type_mask + } + + pub fn distribution_type(&self) -> FragmentDistributionType { + self.distribution_type + } +} + pub struct RescheduleContext { /// Index used to map `ParallelUnitId` to `WorkerId` parallel_unit_id_to_worker_id: BTreeMap, /// Meta information for all Actors - actor_map: HashMap, + actor_map: HashMap, /// Status of all Actors, used to find the location of the `Actor` actor_status: BTreeMap, /// Meta information of all `Fragment`, used to find the `Fragment`'s `Actor` - fragment_map: HashMap, + fragment_map: HashMap, /// Indexes for all `Worker`s worker_nodes: HashMap, /// Index of all `Actor` upstreams, specific to `Dispatcher` @@ -180,7 +254,7 @@ impl RescheduleContext { /// /// The return value is the bitmap distribution after scaling, which covers all virtual node indexes pub fn rebalance_actor_vnode( - actors: &[StreamActor], + actors: &[CustomActorInfo], actors_to_remove: &BTreeSet, actors_to_create: &BTreeSet, ) -> HashMap { @@ -464,16 +538,29 @@ impl ScaleController { let mut fragment_state = HashMap::new(); let mut fragment_to_table = HashMap::new(); - let all_table_fragments = self.list_all_table_fragments().await?; - - for table_fragments in all_table_fragments { + // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future. + fn fulfill_index_by_table_fragments_ref( + actor_map: &mut HashMap, + fragment_map: &mut HashMap, + actor_status: &mut BTreeMap, + fragment_state: &mut HashMap, + fragment_to_table: &mut HashMap, + table_fragments: &TableFragments, + ) { fragment_state.extend( table_fragments .fragment_ids() .map(|f| (f, table_fragments.state())), ); - fragment_map.extend(table_fragments.fragments.clone()); - actor_map.extend(table_fragments.actor_map()); + + for (fragment_id, fragment) in &table_fragments.fragments { + for actor in &fragment.actors { + actor_map.insert(actor.actor_id, CustomActorInfo::from(actor)); + } + + fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment)); + } + actor_status.extend(table_fragments.actor_status.clone()); fragment_to_table.extend( @@ -483,6 +570,37 @@ impl ScaleController { ); } + match &self.metadata_manager { + MetadataManager::V1(mgr) => { + let guard = mgr.fragment_manager.get_fragment_read_guard().await; + + for table_fragments in guard.table_fragments().values() { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut fragment_map, + &mut actor_status, + &mut fragment_state, + &mut fragment_to_table, + table_fragments, + ); + } + } + MetadataManager::V2(_) => { + let all_table_fragments = self.list_all_table_fragments().await?; + + for table_fragments in &all_table_fragments { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut fragment_map, + &mut actor_status, + &mut fragment_state, + &mut fragment_to_table, + table_fragments, + ); + } + } + }; + // NoShuffle relation index let mut no_shuffle_source_fragment_ids = HashSet::new(); let mut no_shuffle_target_fragment_ids = HashSet::new(); @@ -608,7 +726,7 @@ impl ScaleController { } if (fragment.get_fragment_type_mask() & FragmentTypeFlag::Source as u32) != 0 { - let stream_node = fragment.actors.first().unwrap().get_nodes().unwrap(); + let stream_node = fragment.actor_template.nodes.as_ref().unwrap(); if TableFragments::find_stream_source(stream_node).is_some() { stream_source_fragment_ids.insert(*fragment_id); } @@ -698,7 +816,7 @@ impl ScaleController { &self, worker_nodes: &HashMap, actor_infos_to_broadcast: BTreeMap, - node_actors_to_create: HashMap>, + node_actors_to_create: HashMap>, broadcast_worker_ids: HashSet, ) -> MetaResult<()> { self.stream_rpc_manager @@ -963,7 +1081,7 @@ impl ScaleController { for (actor_to_create, sample_actor) in actors_to_create .iter() - .zip_eq_debug(repeat(fragment.actors.first().unwrap()).take(actors_to_create.len())) + .zip_eq_debug(repeat(&fragment.actor_template).take(actors_to_create.len())) { let new_actor_id = actor_to_create.0; let mut new_actor = sample_actor.clone(); @@ -1407,7 +1525,7 @@ impl ScaleController { fragment_actor_bitmap: &HashMap>, no_shuffle_upstream_actor_map: &HashMap>, no_shuffle_downstream_actors_map: &HashMap>, - new_actor: &mut StreamActor, + new_actor: &mut PbStreamActor, ) -> MetaResult<()> { let fragment = &ctx.fragment_map.get(&new_actor.fragment_id).unwrap(); let mut applied_upstream_fragment_actor_ids = HashMap::new(); @@ -1953,8 +2071,6 @@ impl ScaleController { }) .collect::>(); - let all_table_fragments = self.list_all_table_fragments().await?; - // FIXME: only need actor id and dispatcher info, avoid clone it. let mut actor_map = HashMap::new(); let mut actor_status = HashMap::new(); @@ -1962,24 +2078,56 @@ impl ScaleController { let mut fragment_map = HashMap::new(); let mut fragment_parallelism = HashMap::new(); - for table_fragments in all_table_fragments { - for (fragment_id, fragment) in table_fragments.fragments { - fragment - .actors - .iter() - .map(|actor| (actor.actor_id, actor)) - .for_each(|(id, actor)| { - actor_map.insert(id as ActorId, actor.clone()); - }); + // We are reusing code for the metadata manager of both V1 and V2, which will be deprecated in the future. + fn fulfill_index_by_table_fragments_ref( + actor_map: &mut HashMap, + actor_status: &mut HashMap, + fragment_map: &mut HashMap, + fragment_parallelism: &mut HashMap, + table_fragments: &TableFragments, + ) { + for (fragment_id, fragment) in &table_fragments.fragments { + for actor in &fragment.actors { + actor_map.insert(actor.actor_id, CustomActorInfo::from(actor)); + } - fragment_map.insert(fragment_id, fragment); + fragment_map.insert(*fragment_id, CustomFragmentInfo::from(fragment)); - fragment_parallelism.insert(fragment_id, table_fragments.assigned_parallelism); + fragment_parallelism.insert(*fragment_id, table_fragments.assigned_parallelism); } - actor_status.extend(table_fragments.actor_status); + actor_status.extend(table_fragments.actor_status.clone()); } + match &self.metadata_manager { + MetadataManager::V1(mgr) => { + let guard = mgr.fragment_manager.get_fragment_read_guard().await; + + for table_fragments in guard.table_fragments().values() { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut actor_status, + &mut fragment_map, + &mut fragment_parallelism, + table_fragments, + ); + } + } + MetadataManager::V2(_) => { + let all_table_fragments = self.list_all_table_fragments().await?; + + for table_fragments in &all_table_fragments { + fulfill_index_by_table_fragments_ref( + &mut actor_map, + &mut actor_status, + &mut fragment_map, + &mut fragment_parallelism, + table_fragments, + ); + } + } + }; + let mut no_shuffle_source_fragment_ids = HashSet::new(); let mut no_shuffle_target_fragment_ids = HashSet::new(); @@ -2034,7 +2182,7 @@ impl ScaleController { }, ) in fragment_worker_changes { - let fragment = match fragment_map.get(&fragment_id).cloned() { + let fragment = match fragment_map.get(&fragment_id) { None => bail!("Fragment id {} not found", fragment_id), Some(fragment) => fragment, }; @@ -2122,7 +2270,7 @@ impl ScaleController { // then we re-add the limited parallel units from the limited workers target_parallel_unit_ids.extend(limited_worker_parallel_unit_ids.into_iter()); } - match fragment.get_distribution_type().unwrap() { + match fragment.distribution_type() { FragmentDistributionType::Unspecified => unreachable!(), FragmentDistributionType::Single => { let single_parallel_unit_id = @@ -2274,7 +2422,7 @@ impl ScaleController { } pub fn build_no_shuffle_relation_index( - actor_map: &HashMap, + actor_map: &HashMap, no_shuffle_source_fragment_ids: &mut HashSet, no_shuffle_target_fragment_ids: &mut HashSet, ) { @@ -2302,7 +2450,7 @@ impl ScaleController { } pub fn build_fragment_dispatcher_index( - actor_map: &HashMap, + actor_map: &HashMap, fragment_dispatcher_map: &mut HashMap>, ) { for actor in actor_map.values() { @@ -2324,7 +2472,7 @@ impl ScaleController { pub fn resolve_no_shuffle_upstream_tables( fragment_ids: HashSet, - fragment_map: &HashMap, + fragment_map: &HashMap, no_shuffle_source_fragment_ids: &HashSet, no_shuffle_target_fragment_ids: &HashSet, fragment_to_table: &HashMap, @@ -2394,7 +2542,7 @@ impl ScaleController { pub fn resolve_no_shuffle_upstream_fragments( reschedule: &mut HashMap, - fragment_map: &HashMap, + fragment_map: &HashMap, no_shuffle_source_fragment_ids: &HashSet, no_shuffle_target_fragment_ids: &HashSet, ) -> MetaResult<()> diff --git a/src/meta/src/stream/test_scale.rs b/src/meta/src/stream/test_scale.rs index 2db55dbddbd4d..73d59ff52f2f4 100644 --- a/src/meta/src/stream/test_scale.rs +++ b/src/meta/src/stream/test_scale.rs @@ -21,10 +21,10 @@ mod tests { use risingwave_common::buffer::Bitmap; use risingwave_common::hash::{ActorMapping, ParallelUnitId, ParallelUnitMapping, VirtualNode}; use risingwave_pb::common::ParallelUnit; - use risingwave_pb::stream_plan::StreamActor; use crate::model::ActorId; use crate::stream::scale::rebalance_actor_vnode; + use crate::stream::CustomActorInfo; fn simulated_parallel_unit_nums(min: Option, max: Option) -> Vec { let mut raw = vec![1, 3, 12, 42, VirtualNode::COUNT]; @@ -39,13 +39,13 @@ mod tests { raw } - fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec { + fn build_fake_actors(info: &[(ActorId, ParallelUnitId)]) -> Vec { let parallel_units = generate_parallel_units(info); let vnode_bitmaps = ParallelUnitMapping::build(¶llel_units).to_bitmaps(); info.iter() - .map(|(actor_id, parallel_unit_id)| StreamActor { + .map(|(actor_id, parallel_unit_id)| CustomActorInfo { actor_id: *actor_id, vnode_bitmap: vnode_bitmaps .get(parallel_unit_id) @@ -64,7 +64,7 @@ mod tests { .collect_vec() } - fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &StreamActor) { + fn check_affinity_for_scale_in(bitmap: &Bitmap, actor: &CustomActorInfo) { let prev_bitmap = Bitmap::from(actor.vnode_bitmap.as_ref().unwrap()); for idx in 0..VirtualNode::COUNT { diff --git a/src/object_store/Cargo.toml b/src/object_store/Cargo.toml index a7ae9a8bfb70d..5acc52937f4ba 100644 --- a/src/object_store/Cargo.toml +++ b/src/object_store/Cargo.toml @@ -27,7 +27,7 @@ hyper-rustls = { version = "0.24.2", features = ["webpki-roots"] } hyper-tls = "0.5.0" itertools = "0.12" madsim = "0.2.22" -opendal = "0.44" +opendal = "0.44.2" prometheus = { version = "0.13", features = ["process"] } risingwave_common = { workspace = true } rustls = "0.21.8" diff --git a/src/object_store/src/object/mod.rs b/src/object_store/src/object/mod.rs index 5399b6d253b2f..d9ae0bc37b868 100644 --- a/src/object_store/src/object/mod.rs +++ b/src/object_store/src/object/mod.rs @@ -818,15 +818,27 @@ pub async fn build_remote_object_store( config: ObjectStoreConfig, ) -> ObjectStoreImpl { match url { - s3 if s3.starts_with("s3://") => ObjectStoreImpl::S3( - S3ObjectStore::new_with_config( - s3.strip_prefix("s3://").unwrap().to_string(), - metrics.clone(), - config, - ) - .await - .monitored(metrics), - ), + s3 if s3.starts_with("s3://") => { + if std::env::var("RW_USE_OPENDAL_FOR_S3").is_ok() { + let bucket = s3.strip_prefix("s3://").unwrap(); + + ObjectStoreImpl::Opendal( + OpendalObjectStore::new_s3_engine(bucket.to_string(), config) + .unwrap() + .monitored(metrics), + ) + } else { + ObjectStoreImpl::S3( + S3ObjectStore::new_with_config( + s3.strip_prefix("s3://").unwrap().to_string(), + metrics.clone(), + config, + ) + .await + .monitored(metrics), + ) + } + } #[cfg(feature = "hdfs-backend")] hdfs if hdfs.starts_with("hdfs://") => { let hdfs = hdfs.strip_prefix("hdfs://").unwrap(); diff --git a/src/object_store/src/object/opendal_engine/fs.rs b/src/object_store/src/object/opendal_engine/fs.rs index 23d7dcbd503e8..ece3555d5b777 100644 --- a/src/object_store/src/object/opendal_engine/fs.rs +++ b/src/object_store/src/object/opendal_engine/fs.rs @@ -17,15 +17,17 @@ use opendal::services::Fs; use opendal::Operator; use super::{EngineType, OpendalObjectStore}; +use crate::object::opendal_engine::ATOMIC_WRITE_DIR; use crate::object::ObjectResult; + impl OpendalObjectStore { /// create opendal fs engine. pub fn new_fs_engine(root: String) -> ObjectResult { // Create fs backend builder. let mut builder = Fs::default(); - builder.root(&root); - + let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR); + builder.atomic_write_dir(&atomic_write_dir); let op: Operator = Operator::new(builder)? .layer(RetryLayer::default()) .finish(); diff --git a/src/object_store/src/object/opendal_engine/hdfs.rs b/src/object_store/src/object/opendal_engine/hdfs.rs index b52be4094df80..12ee292a85416 100644 --- a/src/object_store/src/object/opendal_engine/hdfs.rs +++ b/src/object_store/src/object/opendal_engine/hdfs.rs @@ -17,7 +17,9 @@ use opendal::services::Hdfs; use opendal::Operator; use super::{EngineType, OpendalObjectStore}; +use crate::object::opendal_engine::ATOMIC_WRITE_DIR; use crate::object::ObjectResult; + impl OpendalObjectStore { /// create opendal hdfs engine. pub fn new_hdfs_engine(namenode: String, root: String) -> ObjectResult { @@ -26,7 +28,8 @@ impl OpendalObjectStore { // Set the name node for hdfs. builder.name_node(&namenode); builder.root(&root); - + let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR); + builder.atomic_write_dir(&atomic_write_dir); let op: Operator = Operator::new(builder)? .layer(LoggingLayer::default()) .layer(RetryLayer::default()) diff --git a/src/object_store/src/object/opendal_engine/mod.rs b/src/object_store/src/object/opendal_engine/mod.rs index 1620ee30da7d7..c1ab929d5586f 100644 --- a/src/object_store/src/object/opendal_engine/mod.rs +++ b/src/object_store/src/object/opendal_engine/mod.rs @@ -26,8 +26,11 @@ pub mod gcs; pub mod obs; -pub mod oss; - pub mod azblob; +pub mod opendal_s3; +pub mod oss; pub mod fs; + +// To make sure the the operation is consistent, we should specially set `atomic_write_dir` for fs, hdfs and webhdfs services. +const ATOMIC_WRITE_DIR: &str = "atomic_write_dir/"; diff --git a/src/object_store/src/object/opendal_engine/opendal_object_store.rs b/src/object_store/src/object/opendal_engine/opendal_object_store.rs index 19bddcfc7ac52..122506d37cdfa 100644 --- a/src/object_store/src/object/opendal_engine/opendal_object_store.rs +++ b/src/object_store/src/object/opendal_engine/opendal_object_store.rs @@ -38,6 +38,7 @@ pub enum EngineType { Memory, Hdfs, Gcs, + S3, Obs, Oss, Webhdfs, @@ -158,7 +159,7 @@ impl ObjectStore for OpendalObjectStore { .op .lister_with(prefix) .recursive(true) - .metakey(Metakey::ContentLength | Metakey::ContentType) + .metakey(Metakey::ContentLength) .await?; let stream = stream::unfold(object_lister, |mut object_lister| async move { @@ -190,6 +191,7 @@ impl ObjectStore for OpendalObjectStore { match self.engine_type { EngineType::Memory => "Memory", EngineType::Hdfs => "Hdfs", + EngineType::S3 => "S3", EngineType::Gcs => "Gcs", EngineType::Obs => "Obs", EngineType::Oss => "Oss", @@ -206,7 +208,11 @@ pub struct OpendalStreamingUploader { } impl OpendalStreamingUploader { pub async fn new(op: Operator, path: String) -> ObjectResult { - let writer = op.writer_with(&path).buffer(OPENDAL_BUFFER_SIZE).await?; + let writer = op + .writer_with(&path) + .concurrent(8) + .buffer(OPENDAL_BUFFER_SIZE) + .await?; Ok(Self { writer }) } } diff --git a/src/object_store/src/object/opendal_engine/opendal_s3.rs b/src/object_store/src/object/opendal_engine/opendal_s3.rs new file mode 100644 index 0000000000000..c10aff55d342b --- /dev/null +++ b/src/object_store/src/object/opendal_engine/opendal_s3.rs @@ -0,0 +1,63 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use opendal::layers::{LoggingLayer, RetryLayer}; +use opendal::services::S3; +use opendal::Operator; +use risingwave_common::config::ObjectStoreConfig; + +use super::{EngineType, OpendalObjectStore}; +use crate::object::ObjectResult; + +impl OpendalObjectStore { + /// create opendal s3 engine. + pub fn new_s3_engine( + bucket: String, + object_store_config: ObjectStoreConfig, + ) -> ObjectResult { + // Create s3 builder. + let mut builder = S3::default(); + builder.bucket(&bucket); + // For AWS S3, there is no need to set an endpoint; for other S3 compatible object stores, it is necessary to set this field. + if let Ok(endpoint_url) = std::env::var("RW_S3_ENDPOINT") { + builder.endpoint(&endpoint_url); + } + + if std::env::var("RW_IS_FORCE_PATH_STYLE").is_err() { + builder.enable_virtual_host_style(); + } + + let op: Operator = Operator::new(builder)? + .layer(LoggingLayer::default()) + .layer( + RetryLayer::new() + .with_min_delay(Duration::from_millis( + object_store_config.s3.object_store_req_retry_interval_ms, + )) + .with_max_delay(Duration::from_millis( + object_store_config.s3.object_store_req_retry_max_delay_ms, + )) + .with_max_times(object_store_config.s3.object_store_req_retry_max_attempts) + .with_factor(1.0) + .with_jitter(), + ) + .finish(); + Ok(Self { + op, + engine_type: EngineType::S3, + }) + } +} diff --git a/src/object_store/src/object/opendal_engine/webhdfs.rs b/src/object_store/src/object/opendal_engine/webhdfs.rs index ff61b39ec9e79..1f6b87b44fd5e 100644 --- a/src/object_store/src/object/opendal_engine/webhdfs.rs +++ b/src/object_store/src/object/opendal_engine/webhdfs.rs @@ -17,6 +17,7 @@ use opendal::services::Webhdfs; use opendal::Operator; use super::{EngineType, OpendalObjectStore}; +use crate::object::opendal_engine::ATOMIC_WRITE_DIR; use crate::object::ObjectResult; impl OpendalObjectStore { @@ -30,6 +31,8 @@ impl OpendalObjectStore { // NOTE: the root must be absolute path. builder.root(&root); + let atomic_write_dir = format!("{}/{}", root, ATOMIC_WRITE_DIR); + builder.atomic_write_dir(&atomic_write_dir); let op: Operator = Operator::new(builder)? .layer(LoggingLayer::default()) .layer(RetryLayer::default()) diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index 3dd923b610542..e876a197c265d 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -94,6 +94,7 @@ pub struct CreateSourceStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum Format { Native, + None, // Keyword::NONE Debezium, // Keyword::DEBEZIUM DebeziumMongo, // Keyword::DEBEZIUM_MONGO Maxwell, // Keyword::MAXWELL @@ -116,6 +117,7 @@ impl fmt::Display for Format { Format::Canal => "CANAL", Format::Upsert => "UPSERT", Format::Plain => "PLAIN", + Format::None => "NONE", } ) } @@ -149,6 +151,7 @@ pub enum Encode { Protobuf, // Keyword::PROTOBUF Json, // Keyword::JSON Bytes, // Keyword::BYTES + None, // Keyword::None Native, Template, } @@ -167,6 +170,7 @@ impl fmt::Display for Encode { Encode::Bytes => "BYTES", Encode::Native => "NATIVE", Encode::Template => "TEMPLATE", + Encode::None => "NONE", } ) } @@ -249,6 +253,18 @@ impl Parser { } else { ConnectorSchema::native().into() }) + } else if connector.contains("iceberg") { + let expected = ConnectorSchema::none(); + if self.peek_source_schema_format() { + let schema = parse_source_schema(self)?.into_v2(); + if schema != expected { + return Err(ParserError::ParserError(format!( + "Row format for iceberg connectors should be \ + either omitted or set to `{expected}`", + ))); + } + } + Ok(expected.into()) } else { Ok(parse_source_schema(self)?) } @@ -304,6 +320,16 @@ impl ConnectorSchema { } } + /// Create a new source schema with `None` format and encoding. + /// Used for self-explanatory source like iceberg. + pub const fn none() -> Self { + ConnectorSchema { + format: Format::None, + row_encode: Encode::None, + row_options: Vec::new(), + } + } + pub fn row_options(&self) -> &[SqlOption] { self.row_options.as_ref() } diff --git a/src/storage/src/hummock/file_cache/store.rs b/src/storage/src/hummock/file_cache/store.rs index 3435227bd317b..c640ba8f1db58 100644 --- a/src/storage/src/hummock/file_cache/store.rs +++ b/src/storage/src/hummock/file_cache/store.rs @@ -701,13 +701,8 @@ mod tests { builder.add_for_test(construct_full_key_struct(0, b"k3", 3), b"v03"); builder.add_for_test(construct_full_key_struct(0, b"k4", 4), b"v04"); - Box::new( - Block::decode( - builder.build().to_vec().into(), - builder.uncompressed_block_size(), - ) - .unwrap(), - ) + let uncompress = builder.uncompressed_block_size(); + Box::new(Block::decode(builder.build().to_vec().into(), uncompress).unwrap()) } fn sstable_for_test() -> Sstable { diff --git a/src/storage/src/hummock/sstable/block.rs b/src/storage/src/hummock/sstable/block.rs index 3d0b4f8c0f770..fe465bba5b41f 100644 --- a/src/storage/src/hummock/sstable/block.rs +++ b/src/storage/src/hummock/sstable/block.rs @@ -215,20 +215,20 @@ impl Block { let mut decoder = lz4::Decoder::new(compressed_data.reader()) .map_err(HummockError::decode_error)?; let mut decoded = Vec::with_capacity(uncompressed_capacity); - decoder + let read_size = decoder .read_to_end(&mut decoded) .map_err(HummockError::decode_error)?; - debug_assert_eq!(decoded.capacity(), uncompressed_capacity); + assert_eq!(read_size, uncompressed_capacity); Bytes::from(decoded) } CompressionAlgorithm::Zstd => { let mut decoder = zstd::Decoder::new(compressed_data.reader()) .map_err(HummockError::decode_error)?; let mut decoded = Vec::with_capacity(uncompressed_capacity); - decoder + let read_size = decoder .read_to_end(&mut decoded) .map_err(HummockError::decode_error)?; - debug_assert_eq!(decoded.capacity(), uncompressed_capacity); + assert_eq!(read_size, uncompressed_capacity); Bytes::from(decoded) } }; @@ -445,6 +445,8 @@ impl Default for BlockBuilderOptions { pub struct BlockBuilder { /// Write buffer. buf: BytesMut, + /// Compress buffer + compress_buf: BytesMut, /// Entry interval between restart points. restart_count: usize, /// Restart points. @@ -465,8 +467,9 @@ pub struct BlockBuilder { impl BlockBuilder { pub fn new(options: BlockBuilderOptions) -> Self { Self { - // add more space to avoid re-allocate space. - buf: BytesMut::with_capacity(options.capacity + 256), + // add more space to avoid re-allocate space. (for restart_points and restart_points_type_index) + buf: BytesMut::with_capacity(Self::buf_reserve_size(&options)), + compress_buf: BytesMut::default(), restart_count: options.restart_interval, restart_points: Vec::with_capacity( options.capacity / DEFAULT_ENTRY_SIZE / options.restart_interval + 1, @@ -664,22 +667,35 @@ impl BlockBuilder { ); self.buf.put_u32_le(self.table_id.unwrap()); - if self.compression_algorithm != CompressionAlgorithm::None { - self.buf = Self::compress(&self.buf[..], self.compression_algorithm); - } + let result_buf = if self.compression_algorithm != CompressionAlgorithm::None { + self.compress_buf.clear(); + self.compress_buf = Self::compress( + &self.buf[..], + self.compression_algorithm, + std::mem::take(&mut self.compress_buf), + ); + + &mut self.compress_buf + } else { + &mut self.buf + }; - self.compression_algorithm.encode(&mut self.buf); - let checksum = xxhash64_checksum(&self.buf); - self.buf.put_u64_le(checksum); + self.compression_algorithm.encode(result_buf); + let checksum = xxhash64_checksum(result_buf); + result_buf.put_u64_le(checksum); assert!( - self.buf.len() < (u32::MAX) as usize, + result_buf.len() < (u32::MAX) as usize, "buf_len {} entry_count {} table {:?}", - self.buf.len(), + result_buf.len(), self.entry_count, self.table_id ); - self.buf.as_ref() + if self.compression_algorithm != CompressionAlgorithm::None { + self.compress_buf.as_ref() + } else { + self.buf.as_ref() + } } pub fn compress_block( @@ -693,21 +709,29 @@ impl BlockBuilder { let compression = CompressionAlgorithm::decode(&mut &buf[buf.len() - 9..buf.len() - 8])?; let compressed_data = &buf[..buf.len() - 9]; assert_eq!(compression, CompressionAlgorithm::None); - let mut writer = Self::compress(compressed_data, target_compression); + let mut compress_writer = Self::compress( + compressed_data, + target_compression, + BytesMut::with_capacity(buf.len()), + ); - target_compression.encode(&mut writer); - let checksum = xxhash64_checksum(&writer); - writer.put_u64_le(checksum); - Ok(writer.freeze()) + target_compression.encode(&mut compress_writer); + let checksum = xxhash64_checksum(&compress_writer); + compress_writer.put_u64_le(checksum); + Ok(compress_writer.freeze()) } - pub fn compress(buf: &[u8], compression_algorithm: CompressionAlgorithm) -> BytesMut { + pub fn compress( + buf: &[u8], + compression_algorithm: CompressionAlgorithm, + compress_writer: BytesMut, + ) -> BytesMut { match compression_algorithm { CompressionAlgorithm::None => unreachable!(), CompressionAlgorithm::Lz4 => { let mut encoder = lz4::EncoderBuilder::new() .level(4) - .build(BytesMut::with_capacity(buf.len()).writer()) + .build(compress_writer.writer()) .map_err(HummockError::encode_error) .unwrap(); encoder @@ -719,10 +743,9 @@ impl BlockBuilder { writer.into_inner() } CompressionAlgorithm::Zstd => { - let mut encoder = - zstd::Encoder::new(BytesMut::with_capacity(buf.len()).writer(), 4) - .map_err(HummockError::encode_error) - .unwrap(); + let mut encoder = zstd::Encoder::new(compress_writer.writer(), 4) + .map_err(HummockError::encode_error) + .unwrap(); encoder .write_all(buf) .map_err(HummockError::encode_error) @@ -762,6 +785,10 @@ impl BlockBuilder { pub fn table_id(&self) -> Option { self.table_id } + + fn buf_reserve_size(option: &BlockBuilderOptions) -> usize { + option.capacity + 1024 + 256 + } } #[cfg(test)] diff --git a/src/storage/src/hummock/sstable/builder.rs b/src/storage/src/hummock/sstable/builder.rs index 4fe331f677321..ebaa60e167056 100644 --- a/src/storage/src/hummock/sstable/builder.rs +++ b/src/storage/src/hummock/sstable/builder.rs @@ -240,7 +240,6 @@ impl SstableBuilder { self.add(full_key, value).await } - /// only for test pub fn current_block_size(&self) -> usize { self.block_builder.approximate_len() } @@ -344,6 +343,12 @@ impl SstableBuilder { || !user_key(&self.raw_key).eq(user_key(&self.last_full_key)); let table_id = full_key.user_key.table_id.table_id(); let is_new_table = self.last_table_id.is_none() || self.last_table_id.unwrap() != table_id; + let current_block_size = self.current_block_size(); + let is_block_full = current_block_size >= self.options.block_capacity + || (current_block_size > self.options.block_capacity / 4 * 3 + && current_block_size + self.raw_value.len() + self.raw_key.len() + > self.options.block_capacity); + if is_new_table { assert!( could_switch_block, @@ -356,9 +361,7 @@ impl SstableBuilder { if !self.block_builder.is_empty() { self.build_block().await?; } - } else if self.block_builder.approximate_len() >= self.options.block_capacity - && could_switch_block - { + } else if is_block_full && could_switch_block { self.build_block().await?; } self.last_table_stats.total_key_count += 1; @@ -704,6 +707,15 @@ impl SstableBuilder { data_len, block_meta.offset ) }); + + if data_len as usize > self.options.capacity * 2 { + tracing::warn!( + "WARN unexpected block size {} table {:?}", + data_len, + self.block_builder.table_id() + ); + } + self.block_builder.clear(); Ok(()) } diff --git a/src/storage/src/monitor/hummock_state_store_metrics.rs b/src/storage/src/monitor/hummock_state_store_metrics.rs index 6954263010333..5932185ecd5f7 100644 --- a/src/storage/src/monitor/hummock_state_store_metrics.rs +++ b/src/storage/src/monitor/hummock_state_store_metrics.rs @@ -291,11 +291,6 @@ impl HummockStateStoreMetrics { registry ) .unwrap(); - let spill_task_counts = RelabeledCounterVec::with_metric_level( - MetricLevel::Debug, - spill_task_counts, - metric_level, - ); let spill_task_size = register_int_counter_vec_with_registry!( "state_store_spill_task_size", @@ -304,11 +299,6 @@ impl HummockStateStoreMetrics { registry ) .unwrap(); - let spill_task_size = RelabeledCounterVec::with_metric_level( - MetricLevel::Debug, - spill_task_size, - metric_level, - ); let uploader_uploading_task_size = GenericGauge::new( "state_store_uploader_uploading_task_size", @@ -327,10 +317,11 @@ impl HummockStateStoreMetrics { ) .unwrap(); let read_req_bloom_filter_positive_counts = - RelabeledGuardedIntCounterVec::with_metric_level( + RelabeledGuardedIntCounterVec::with_metric_level_relabel_n( MetricLevel::Info, read_req_bloom_filter_positive_counts, metric_level, + 1, ); let read_req_positive_but_non_exist_counts = register_guarded_int_counter_vec_with_registry!( diff --git a/src/stream/src/cache/managed_lru.rs b/src/stream/src/cache/managed_lru.rs index d91eb664d43a2..9773f3fb51bf0 100644 --- a/src/stream/src/cache/managed_lru.rs +++ b/src/stream/src/cache/managed_lru.rs @@ -156,6 +156,14 @@ impl(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.inner.peek(k) + } + pub fn peek_mut(&mut self, k: &K) -> Option> { let v = self.inner.peek_mut(k); v.map(|inner| { diff --git a/src/stream/src/executor/source/source_executor.rs b/src/stream/src/executor/source/source_executor.rs index 8ad653c5f8397..e2567bb141492 100644 --- a/src/stream/src/executor/source/source_executor.rs +++ b/src/stream/src/executor/source/source_executor.rs @@ -24,7 +24,7 @@ use risingwave_common::system_param::local_manager::SystemParamsReaderRef; use risingwave_common::system_param::reader::SystemParamsRead; use risingwave_connector::source::reader::desc::{SourceDesc, SourceDescBuilder}; use risingwave_connector::source::{ - BoxChunkSourceStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitMetaData, + BoxChunkSourceStream, ConnectorState, SourceContext, SourceCtrlOpts, SplitId, SplitMetaData, }; use risingwave_connector::ConnectorParams; use risingwave_storage::StateStore; @@ -138,13 +138,21 @@ impl SourceExecutor { ] } - /// Returns `target_states` if split changed. Otherwise `None`. + /// - `should_trim_state`: whether to trim state for dropped splits. + /// + /// For scaling, the connector splits can be migrated to other actors, but + /// won't be added or removed. Actors should not trim states for splits that + /// are moved to other actors. + /// + /// For source split change, split will not be migrated and we can trim states + /// for deleted splits. async fn apply_split_change( &mut self, source_desc: &SourceDesc, stream: &mut StreamReaderWithPause, split_assignment: &HashMap>, - ) -> StreamExecutorResult>> { + should_trim_state: bool, + ) -> StreamExecutorResult<()> { self.metrics .source_split_change_count .with_label_values( @@ -156,82 +164,96 @@ impl SourceExecutor { ) .inc(); if let Some(target_splits) = split_assignment.get(&self.actor_ctx.id).cloned() { - if let Some(target_state) = self.update_state_if_changed(Some(target_splits)).await? { - tracing::info!( - actor_id = self.actor_ctx.id, - state = ?target_state, - "apply split change" - ); - - self.replace_stream_reader_with_target_state( - source_desc, - stream, - target_state.clone(), - ) - .await?; - - return Ok(Some(target_state)); + if self + .update_state_if_changed(target_splits, should_trim_state) + .await? + { + self.rebuild_stream_reader(source_desc, stream).await?; } } - Ok(None) + Ok(()) } - /// Returns `target_states` if split changed. Otherwise `None`. - /// - /// Note: `update_state_if_changed` will modify `updated_splits_in_epoch` + /// Returns `true` if split changed. Otherwise `false`. async fn update_state_if_changed( &mut self, - state: ConnectorState, - ) -> StreamExecutorResult { + target_splits: Vec, + should_trim_state: bool, + ) -> StreamExecutorResult { let core = self.stream_source_core.as_mut().unwrap(); - let target_splits: HashMap<_, _> = state - .unwrap() + let target_splits: HashMap<_, _> = target_splits .into_iter() .map(|split| (split.id(), split)) .collect(); - let mut target_state: Vec = Vec::with_capacity(target_splits.len()); + let mut target_state: HashMap = + HashMap::with_capacity(target_splits.len()); let mut split_changed = false; // Checks added splits - for (split_id, split) in &target_splits { - if let Some(s) = core.updated_splits_in_epoch.get(split_id) { - // existing split, no change, clone from cache - target_state.push(s.clone()) + for (split_id, split) in target_splits { + if let Some(s) = core.latest_split_info.get(&split_id) { + // For existing splits, we should use the latest offset from the cache. + // `target_splits` is from meta and contains the initial offset. + target_state.insert(split_id, s.clone()); } else { split_changed = true; // write new assigned split to state cache. snapshot is base on cache. let initial_state = if let Some(recover_state) = core .split_state_store - .try_recover_from_state_store(split) + .try_recover_from_state_store(&split) .await? { recover_state } else { - split.clone() + split }; core.updated_splits_in_epoch - .entry(split.id()) + .entry(split_id.clone()) .or_insert_with(|| initial_state.clone()); - target_state.push(initial_state); + target_state.insert(split_id, initial_state); } } // Checks dropped splits for existing_split_id in core.latest_split_info.keys() { - if !target_splits.contains_key(existing_split_id) { + if !target_state.contains_key(existing_split_id) { tracing::info!("split dropping detected: {}", existing_split_id); split_changed = true; } } - Ok(split_changed.then_some(target_state)) + if split_changed { + tracing::info!( + actor_id = self.actor_ctx.id, + state = ?target_state, + "apply split change" + ); + + core.updated_splits_in_epoch + .retain(|split_id, _| target_state.get(split_id).is_some()); + + let dropped_splits = core + .latest_split_info + .extract_if(|split_id, _| target_state.get(split_id).is_none()) + .map(|(_, split)| split) + .collect_vec(); + + if should_trim_state && !dropped_splits.is_empty() { + // trim dropped splits' state + core.split_state_store.trim_state(&dropped_splits).await?; + } + + core.latest_split_info = target_state; + } + + Ok(split_changed) } /// Rebuild stream if there is a err in stream @@ -256,17 +278,17 @@ impl SourceExecutor { core.source_id.to_string(), ]); - let target_state = core.latest_split_info.values().cloned().collect(); - self.replace_stream_reader_with_target_state(source_desc, stream, target_state) - .await + self.rebuild_stream_reader(source_desc, stream).await } - async fn replace_stream_reader_with_target_state( + async fn rebuild_stream_reader( &mut self, source_desc: &SourceDesc, stream: &mut StreamReaderWithPause, - target_state: Vec, ) -> StreamExecutorResult<()> { + let core = self.stream_source_core.as_mut().unwrap(); + let target_state: Vec = core.latest_split_info.values().cloned().collect(); + tracing::info!( "actor {:?} apply source split change to {:?}", self.actor_ctx.id, @@ -284,56 +306,21 @@ impl SourceExecutor { Ok(()) } - /// - `target_state`: the new split info from barrier. `None` if no split update. - /// - `should_trim_state`: whether to trim state for dropped splits. - /// - /// For scaling, the connector splits can be migrated to other actors, but - /// won't be added or removed. Actors should not trim states for splits that - /// are moved to other actors. - /// - /// For source split change, split will not be migrated and we can trim states - /// for deleted splits. async fn persist_state_and_clear_cache( &mut self, epoch: EpochPair, - // target_state is Some means split change (or migration) happened. - target_state: Option>, - should_trim_state: bool, ) -> StreamExecutorResult<()> { let core = self.stream_source_core.as_mut().unwrap(); - let mut cache = core + let cache = core .updated_splits_in_epoch .values() .map(|split_impl| split_impl.to_owned()) .collect_vec(); - if let Some(target_splits) = target_state { - let target_split_ids: HashSet<_> = - target_splits.iter().map(|split| split.id()).collect(); - - cache.retain(|split| target_split_ids.contains(&split.id())); - - let dropped_splits = core - .latest_split_info - .extract_if(|split_id, _| !target_split_ids.contains(split_id)) - .map(|(_, split)| split) - .collect_vec(); - - if should_trim_state && !dropped_splits.is_empty() { - // trim dropped splits' state - core.split_state_store.trim_state(&dropped_splits).await?; - } - - core.latest_split_info = target_splits - .into_iter() - .map(|split| (split.id(), split)) - .collect(); - } - if !cache.is_empty() { tracing::debug!(actor_id = self.actor_ctx.id, state = ?cache, "take snapshot"); - core.split_state_store.set_states(cache).await? + core.split_state_store.set_states(cache).await?; } // commit anyway, even if no message saved @@ -471,9 +458,6 @@ impl SourceExecutor { let epoch = barrier.epoch; - let mut target_state = None; - let mut should_trim_state = false; - if let Some(mutation) = barrier.mutation.as_deref() { match mutation { Mutation::Pause => stream.pause_stream(), @@ -485,23 +469,29 @@ impl SourceExecutor { "source change split received" ); - target_state = self - .apply_split_change(&source_desc, &mut stream, actor_splits) - .await?; - should_trim_state = true; + self.apply_split_change( + &source_desc, + &mut stream, + actor_splits, + true, + ) + .await?; } Mutation::Update(UpdateMutation { actor_splits, .. }) => { - target_state = self - .apply_split_change(&source_desc, &mut stream, actor_splits) - .await?; + self.apply_split_change( + &source_desc, + &mut stream, + actor_splits, + false, + ) + .await?; } _ => {} } } - self.persist_state_and_clear_cache(epoch, target_state, should_trim_state) - .await?; + self.persist_state_and_clear_cache(epoch).await?; self.metrics .source_row_per_barrier diff --git a/src/stream/src/executor/source/state_table_handler.rs b/src/stream/src/executor/source/state_table_handler.rs index 7bfb2bd3ec487..bcb93655c5784 100644 --- a/src/stream/src/executor/source/state_table_handler.rs +++ b/src/stream/src/executor/source/state_table_handler.rs @@ -178,14 +178,9 @@ impl SourceStateTableHandler { where SS: SplitMetaData, { - if states.is_empty() { - // TODO should be a clear Error Code - bail!("states require not null"); - } else { - for split_impl in states { - self.set(split_impl.id(), split_impl.encode_to_json()) - .await?; - } + for split_impl in states { + self.set(split_impl.id(), split_impl.encode_to_json()) + .await?; } Ok(()) } diff --git a/src/stream/src/executor/temporal_join.rs b/src/stream/src/executor/temporal_join.rs index 32a0c5747083b..da0ac7b45dbdc 100644 --- a/src/stream/src/executor/temporal_join.rs +++ b/src/stream/src/executor/temporal_join.rs @@ -15,14 +15,13 @@ use std::alloc::Global; use std::collections::hash_map::Entry; use std::collections::HashMap; -use std::ops::{Deref, DerefMut}; use std::pin::pin; use std::sync::Arc; use either::Either; use futures::stream::{self, PollNext}; use futures::{pin_mut, StreamExt, TryStreamExt}; -use futures_async_stream::try_stream; +use futures_async_stream::{for_await, try_stream}; use local_stats_alloc::{SharedStatsAlloc, StatsAlloc}; use lru::DefaultHasher; use risingwave_common::array::{Op, StreamChunk}; @@ -108,99 +107,84 @@ impl JoinEntry { } } -struct JoinEntryWrapper(Option); - -impl EstimateSize for JoinEntryWrapper { - fn estimated_heap_size(&self) -> usize { - self.0.estimated_heap_size() - } -} - -impl JoinEntryWrapper { - const MESSAGE: &'static str = "the state should always be `Some`"; - - /// Take the value out of the wrapper. Panic if the value is `None`. - pub fn take(&mut self) -> JoinEntry { - self.0.take().expect(Self::MESSAGE) - } -} - -impl Deref for JoinEntryWrapper { - type Target = JoinEntry; - - fn deref(&self) -> &Self::Target { - self.0.as_ref().expect(Self::MESSAGE) - } -} - -impl DerefMut for JoinEntryWrapper { - fn deref_mut(&mut self) -> &mut Self::Target { - self.0.as_mut().expect(Self::MESSAGE) - } -} - struct TemporalSide { source: StorageTable, table_stream_key_indices: Vec, table_output_indices: Vec, - cache: ManagedLruCache>, + cache: ManagedLruCache>, ctx: ActorContextRef, join_key_data_types: Vec, } impl TemporalSide { - /// Lookup the temporal side table and return a `JoinEntry` which could be empty if there are no - /// matched records. - async fn lookup(&mut self, key: &K, epoch: HummockEpoch) -> StreamExecutorResult { + /// Fetch records from temporal side table and ensure the entry in the cache. + /// If already exists, the entry will be promoted. + async fn fetch_or_promote_keys( + &mut self, + keys: impl Iterator, + epoch: HummockEpoch, + ) -> StreamExecutorResult<()> { let table_id_str = self.source.table_id().to_string(); let actor_id_str = self.ctx.id.to_string(); let fragment_id_str = self.ctx.id.to_string(); - self.ctx - .streaming_metrics - .temporal_join_total_query_cache_count - .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str]) - .inc(); - - let res = if self.cache.contains(key) { - let mut state = self.cache.peek_mut(key).unwrap(); - state.take() - } else { - // cache miss + + let mut futs = Vec::with_capacity(keys.size_hint().1.unwrap_or(0)); + for key in keys { self.ctx .streaming_metrics - .temporal_join_cache_miss_count + .temporal_join_total_query_cache_count .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str]) .inc(); - let pk_prefix = key.deserialize(&self.join_key_data_types)?; - - let iter = self - .source - .batch_iter_with_pk_bounds( - HummockReadEpoch::NoWait(epoch), - &pk_prefix, - .., - false, - PrefetchOptions::default(), - ) - .await?; - - let mut entry = JoinEntry::default(); - - pin_mut!(iter); - while let Some(row) = iter.next_row().await? { - entry.insert( - row.as_ref() - .project(&self.table_stream_key_indices) - .into_owned_row(), - row.project(&self.table_output_indices).into_owned_row(), - ); + if self.cache.get(key).is_none() { + self.ctx + .streaming_metrics + .temporal_join_cache_miss_count + .with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str]) + .inc(); + + futs.push(async { + let pk_prefix = key.deserialize(&self.join_key_data_types)?; + + let iter = self + .source + .batch_iter_with_pk_bounds( + HummockReadEpoch::NoWait(epoch), + &pk_prefix, + .., + false, + PrefetchOptions::default(), + ) + .await?; + + let mut entry = JoinEntry::default(); + + pin_mut!(iter); + while let Some(row) = iter.next_row().await? { + entry.insert( + row.as_ref() + .project(&self.table_stream_key_indices) + .into_owned_row(), + row.project(&self.table_output_indices).into_owned_row(), + ); + } + let key = key.clone(); + Ok((key, entry)) as StreamExecutorResult<_> + }); } + } - entry - }; + #[for_await] + for res in stream::iter(futs).buffered(16) { + let (key, entry) = res?; + self.cache.put(key, entry); + } + + Ok(()) + } - Ok(res) + fn force_peek(&self, key: &K) -> &JoinEntry { + self.cache.peek(key).expect("key should exists") } fn update( @@ -230,10 +214,6 @@ impl TemporalSide { } Ok(()) } - - pub fn insert_back(&mut self, key: K, state: JoinEntry) { - self.cache.put(key, JoinEntryWrapper(Some(state))); - } } enum InternalMessage { @@ -428,12 +408,20 @@ impl TemporalJoinExecutor ); let epoch = prev_epoch.expect("Chunk data should come after some barrier."); let keys = K::build(&self.left_join_keys, chunk.data_chunk())?; + let to_fetch_keys = chunk + .visibility() + .iter() + .zip_eq_debug(keys.iter()) + .filter_map(|(vis, key)| if vis { Some(key) } else { None }); + self.right_table + .fetch_or_promote_keys(to_fetch_keys, epoch) + .await?; for (r, key) in chunk.rows_with_holes().zip_eq_debug(keys.into_iter()) { let Some((op, left_row)) = r else { continue; }; if key.null_bitmap().is_subset(&null_matched) - && let join_entry = self.right_table.lookup(&key, epoch).await? + && let join_entry = self.right_table.force_peek(&key) && !join_entry.is_empty() { for right_row in join_entry.cached.values() { @@ -455,8 +443,6 @@ impl TemporalJoinExecutor } } } - // Insert back the state taken from ht. - self.right_table.insert_back(key.clone(), join_entry); } else if T == JoinType::LeftOuter { if let Some(chunk) = builder.append_row_update(op, left_row) { yield Message::Chunk(chunk); diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index 690fed8acc47a..9c640926f14f6 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -96,7 +96,7 @@ parking_lot_core = { version = "0.9", default-features = false, features = ["dea petgraph = { version = "0.6" } phf = { version = "0.11", features = ["uncased"] } phf_shared = { version = "0.11", features = ["uncased"] } -postgres-types = { version = "0.2", default-features = false, features = ["derive", "with-chrono-0_4", "with-serde_json-1"] } +postgres-types = { version = "0.2", default-features = false, features = ["derive", "with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } proc-macro2 = { version = "1", features = ["span-locations"] } prometheus = { version = "0.13", features = ["process"] } prost = { version = "0.12", features = ["no-recursion-limit"] } @@ -135,7 +135,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-trai time = { version = "0.3", features = ["local-offset", "macros", "serde-well-known"] } tinyvec = { version = "1", features = ["alloc", "grab_spare_slice", "rustc_1_55"] } tokio = { version = "1", features = ["full", "stats", "tracing"] } -tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88", features = ["with-chrono-0_4"] } +tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88", features = ["with-chrono-0_4", "with-uuid-1"] } tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "fe39bb8e", features = ["fs", "net"] } tokio-util = { version = "0.7", features = ["codec", "io"] } toml_datetime = { version = "0.6", default-features = false, features = ["serde"] }