diff --git a/.github/workflows/auto-create-doc-issue-by-issue.yml b/.github/workflows/auto-create-doc-issue-by-issue.yml new file mode 100644 index 0000000000000..0c8d78062977a --- /dev/null +++ b/.github/workflows/auto-create-doc-issue-by-issue.yml @@ -0,0 +1,31 @@ +name: Issue Documentation Checker + +on: + issues: + types: + - closed + - labeled + +jobs: + create-issue: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Log the event payload + run: echo "${{ toJSON(github.event) }}" + - name: Check if issue is done and labeled 'user-facing-changes' + uses: dacbd/create-issue-action@main + if: ${{ github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user-facing-changes') }} + with: + token: ${{ secrets.ACCESS_TOKEN }} + owner: risingwavelabs + repo: risingwave-docs + title: | + Document: ${{ github.event.issue.title }} + body: | + ## Context + Source Issue URL: ${{ github.event.issue.html_url }} + Created At: ${{ github.event.issue.created_at }} + Created By: ${{ github.event.issue.user.login }} + Closed At: ${{ github.event.issue.closed_at }} diff --git a/.github/workflows/auto-create-docs-pr.yml b/.github/workflows/auto-create-doc-issue-by-pr.yml similarity index 100% rename from .github/workflows/auto-create-docs-pr.yml rename to .github/workflows/auto-create-doc-issue-by-pr.yml diff --git a/.gitignore b/.gitignore index 19fb6643dd8a6..375738f67093e 100644 --- a/.gitignore +++ b/.gitignore @@ -74,4 +74,7 @@ simulation-it-test.tar.zst # hummock-trace .trace +# spark binary +e2e_test/iceberg/spark-*-bin* + **/poetry.lock \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index d8977f920c990..85b0330b84367 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1825,13 +1825,13 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.0" +version = "7.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +checksum = "9ab77dbd8adecaf3f0db40581631b995f312a8a5ae3aa9993188bb8f23d83a5b" dependencies = [ - "crossterm 0.27.0", - "strum 0.25.0", - "strum_macros 0.25.2", + "crossterm 0.26.1", + "strum 0.24.1", + "strum_macros 0.24.3", "unicode-width", ] @@ -2140,14 +2140,17 @@ dependencies = [ [[package]] name = "crossterm" -version = "0.27.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +checksum = "a84cda67535339806297f1b331d6dd6320470d2a0fe65381e79ee9e156dd3d13" dependencies = [ - "bitflags 2.4.0", + "bitflags 1.3.2", "crossterm_winapi", "libc", + "mio", "parking_lot 0.12.1", + "signal-hook", + "signal-hook-mio", "winapi", ] @@ -2445,10 +2448,11 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" dependencies = [ + "powerfmt", "serde", ] @@ -2551,8 +2555,7 @@ checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632" [[package]] name = "dlv-list" version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8aead04dc46b5f263c25721cf25c9e595951d15055f8063f92392fa0d7f64cf4" +source = "git+https://github.com/sgodwincs/dlv-list-rs.git?rev=5bbc5d0#5bbc5d0cc84f257e173d851f8dc1674fb6e46f95" dependencies = [ "const-random", ] @@ -3035,7 +3038,7 @@ dependencies = [ [[package]] name = "foyer" version = "0.1.0" -source = "git+https://github.com/mrcroxx/foyer?rev=5d0134b#5d0134b28c0edb03277b01ce08b035ef52c1b783" +source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10" dependencies = [ "foyer-common", "foyer-intrusive", @@ -3046,7 +3049,7 @@ dependencies = [ [[package]] name = "foyer-common" version = "0.1.0" -source = "git+https://github.com/mrcroxx/foyer?rev=5d0134b#5d0134b28c0edb03277b01ce08b035ef52c1b783" +source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10" dependencies = [ "bytes", "foyer-workspace-hack", @@ -3061,7 +3064,7 @@ dependencies = [ [[package]] name = "foyer-intrusive" version = "0.1.0" -source = "git+https://github.com/mrcroxx/foyer?rev=5d0134b#5d0134b28c0edb03277b01ce08b035ef52c1b783" +source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10" dependencies = [ "bytes", "cmsketch", @@ -3078,7 +3081,7 @@ dependencies = [ [[package]] name = "foyer-storage" version = "0.1.0" -source = "git+https://github.com/mrcroxx/foyer?rev=5d0134b#5d0134b28c0edb03277b01ce08b035ef52c1b783" +source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10" dependencies = [ "anyhow", "async-channel", @@ -3107,7 +3110,7 @@ dependencies = [ [[package]] name = "foyer-workspace-hack" version = "0.1.0" -source = "git+https://github.com/mrcroxx/foyer?rev=5d0134b#5d0134b28c0edb03277b01ce08b035ef52c1b783" +source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10" dependencies = [ "crossbeam-utils", "either", @@ -3245,9 +3248,9 @@ dependencies = [ [[package]] name = "futures-async-stream" -version = "0.2.7" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f529ccdeacfa2446a9577041686cf1abb839b1b3e15fee4c1b1232ab3b7d799f" +checksum = "379790776b0d953337df4ab7ecc51936c66ea112484cad7912907b1d34253ebf" dependencies = [ "futures-async-stream-macro", "futures-core", @@ -3256,13 +3259,13 @@ dependencies = [ [[package]] name = "futures-async-stream-macro" -version = "0.2.7" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b48ee06dc8d2808ba5ebad075d06c3406085bb19deaac33be64c39113bf80" +checksum = "5df2c13d48c8cb8a3ec093ede6f0f4482f327d7bb781120c5fb483ef0f17e758" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.37", ] [[package]] @@ -5512,8 +5515,7 @@ dependencies = [ [[package]] name = "ordered-multimap" version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ed8acf08e98e744e5384c8bc63ceb0364e68a6854187221c18df61c4797690e" +source = "git+https://github.com/risingwavelabs/ordered-multimap-rs.git?rev=19c743f#19c743f3e3d106c99ba37628f06a2ca6faa2284f" dependencies = [ "dlv-list", "hashbrown 0.13.2", @@ -6043,6 +6045,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pprof" version = "0.13.0" @@ -7634,6 +7642,7 @@ dependencies = [ name = "risingwave_jni_core" version = "0.1.0" dependencies = [ + "anyhow", "bytes", "cfg-or-panic", "futures", @@ -9824,14 +9833,15 @@ dependencies = [ [[package]] name = "time" -version = "0.3.28" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" dependencies = [ "deranged", "itoa", "libc", "num_threads", + "powerfmt", "serde", "time-core", "time-macros", @@ -9839,15 +9849,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" dependencies = [ "time-core", ] diff --git a/Cargo.toml b/Cargo.toml index ef09221b818a2..f8a9b7d0e2fa5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,7 +97,7 @@ aws-smithy-types = "0.55" aws-endpoint = "0.55" aws-types = "0.55" etcd-client = { package = "madsim-etcd-client", version = "0.4" } -futures-async-stream = "0.2" +futures-async-stream = "0.2.9" hytra = "0.1" rdkafka = { package = "madsim-rdkafka", version = "0.3.0", features = [ "cmake-build", @@ -165,6 +165,8 @@ unused_must_use = "forbid" future_incompatible = "warn" nonstandard_style = "warn" rust_2018_idioms = "warn" +# Backward compatibility is not important for an application. +async_fn_in_trait = "allow" [workspace.lints.clippy] uninlined_format_args = "allow" @@ -229,8 +231,8 @@ opt-level = 2 incremental = false debug = 1 -# Patch third-party crates for deterministic simulation. [patch.crates-io] +# Patch third-party crates for deterministic simulation. quanta = { git = "https://github.com/madsim-rs/quanta.git", rev = "948bdc3" } getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "8daf97e" } tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "fe39bb8e" } @@ -238,3 +240,8 @@ tokio-retry = { git = "https://github.com/madsim-rs/rust-tokio-retry.git", rev = tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88" } # patch: unlimit 4MB message size for grpc client etcd-client = { git = "https://github.com/risingwavelabs/etcd-client.git", rev = "4e84d40" } + +# Patch for coverage_attribute. +# https://github.com/sgodwincs/dlv-list-rs/pull/19#issuecomment-1774786289 +dlv-list = { git = "https://github.com/sgodwincs/dlv-list-rs.git", rev = "5bbc5d0" } +ordered-multimap = { git = "https://github.com/risingwavelabs/ordered-multimap-rs.git", rev = "19c743f" } diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh index 43ff81ade2b85..59c88e5e9a9ae 100755 --- a/ci/build-ci-image.sh +++ b/ci/build-ci-image.sh @@ -13,7 +13,7 @@ cat ../rust-toolchain # !!! CHANGE THIS WHEN YOU WANT TO BUMP CI IMAGE !!! # # AND ALSO docker-compose.yml # ###################################################### -export BUILD_ENV_VERSION=v20230919 +export BUILD_ENV_VERSION=v20231022 export BUILD_TAG="public.ecr.aws/x5u3w5h6/rw-build-env:${BUILD_ENV_VERSION}" diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml index 6fe7cfbfdeca2..66dd2d175e675 100644 --- a/ci/docker-compose.yml +++ b/ci/docker-compose.yml @@ -71,7 +71,7 @@ services: retries: 5 source-test-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022 depends_on: - mysql - db @@ -81,10 +81,11 @@ services: - ..:/risingwave sink-test-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022 depends_on: - mysql - db + - message_queue - elasticsearch - clickhouse-server - pulsar @@ -92,12 +93,12 @@ services: - ..:/risingwave rw-build-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022 volumes: - ..:/risingwave ci-flamegraph-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022 # NOTE(kwannoel): This is used in order to permit # syscalls for `nperf` (perf_event_open), # so it can do CPU profiling. @@ -108,7 +109,7 @@ services: - ..:/risingwave regress-test-env: - image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919 + image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022 depends_on: db: condition: service_healthy diff --git a/ci/rust-toolchain b/ci/rust-toolchain index ebc0b6c285a4e..fe2a026f6e40f 100644 --- a/ci/rust-toolchain +++ b/ci/rust-toolchain @@ -1,2 +1,2 @@ [toolchain] -channel = "nightly-2023-09-09" +channel = "nightly-2023-10-21" diff --git a/ci/scripts/e2e-iceberg-cdc.sh b/ci/scripts/e2e-iceberg-cdc.sh new file mode 100755 index 0000000000000..081f5bbd2afcb --- /dev/null +++ b/ci/scripts/e2e-iceberg-cdc.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +# Exits as soon as any line fails. +set -euo pipefail + +source ci/scripts/common.sh + +# prepare environment +export CONNECTOR_RPC_ENDPOINT="localhost:50051" +export CONNECTOR_LIBS_PATH="./connector-node/libs" + +while getopts 'p:' opt; do + case ${opt} in + p ) + profile=$OPTARG + ;; + \? ) + echo "Invalid Option: -$OPTARG" 1>&2 + exit 1 + ;; + : ) + echo "Invalid option: $OPTARG requires an argument" 1>&2 + ;; + esac +done +shift $((OPTIND -1)) + +download_and_prepare_rw "$profile" source + +echo "--- Download connector node package" +buildkite-agent artifact download risingwave-connector.tar.gz ./ +mkdir ./connector-node +tar xf ./risingwave-connector.tar.gz -C ./connector-node + +echo "--- e2e, ci-1cn-1fe, iceberg cdc" + +node_port=50051 +node_timeout=10 + +wait_for_connector_node_start() { + start_time=$(date +%s) + while : + do + if nc -z localhost $node_port; then + echo "Port $node_port is listened! Connector Node is up!" + break + fi + + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + if [ $elapsed_time -ge $node_timeout ]; then + echo "Timeout waiting for port $node_port to be listened!" + exit 1 + fi + sleep 0.1 + done + sleep 2 +} + +echo "--- starting risingwave cluster with connector node" + +RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \ +cargo make ci-start ci-1cn-1fe-with-recovery +./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-node.log 2>&1 & +echo "waiting for connector node to start" +wait_for_connector_node_start + +# prepare minio iceberg sink +echo "--- preparing iceberg" +.risingwave/bin/mcli -C .risingwave/config/mcli mb hummock-minio/icebergdata + +cd e2e_test/iceberg +bash ./start_spark_connect_server.sh + +# Don't remove the `--quiet` option since poetry has a bug when printing output, see +# https://github.com/python-poetry/poetry/issues/3412 +"$HOME"/.local/bin/poetry update --quiet + +# 1. import data to mysql +mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc.sql + +# 2. create table and sink +"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc_init.toml + +# 3. insert new data to mysql +mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc_insert.sql + +sleep 20 + +# 4. check change +"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc.toml \ No newline at end of file diff --git a/ci/scripts/e2e-kafka-sink-test.sh b/ci/scripts/e2e-kafka-sink-test.sh index 06ef185f46e8b..71a91f2d8fba9 100755 --- a/ci/scripts/e2e-kafka-sink-test.sh +++ b/ci/scripts/e2e-kafka-sink-test.sh @@ -3,10 +3,10 @@ # Exits as soon as any line fails. set -euo pipefail -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --create > /dev/null 2>&1 -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --create > /dev/null 2>&1 -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --create > /dev/null 2>&1 -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --create > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --create > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --create > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --create > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --create > /dev/null 2>&1 sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/create_sink.slt' sleep 2 @@ -14,7 +14,7 @@ sleep 2 # test append-only kafka sink echo "testing append-only kafka sink" diff ./e2e_test/sink/kafka/append_only1.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 10 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 10 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for append-only sink is not as expected." exit 1 @@ -23,7 +23,7 @@ fi # test upsert kafka sink echo "testing upsert kafka sink" diff ./e2e_test/sink/kafka/upsert1.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for upsert sink is not as expected." exit 1 @@ -32,7 +32,7 @@ fi # test upsert kafka sink with schema echo "testing upsert kafka sink with schema" diff ./e2e_test/sink/kafka/upsert_schema1.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for upsert sink with schema is not as expected." exit 1 @@ -40,7 +40,7 @@ fi # test debezium kafka sink echo "testing debezium kafka sink" -(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 10 | sort) > ./e2e_test/sink/kafka/debezium1.tmp.result 2> /dev/null +(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 10 | sort) > ./e2e_test/sink/kafka/debezium1.tmp.result 2> /dev/null python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium1.result e2e_test/sink/kafka/debezium1.tmp.result if [ $? -ne 0 ]; then echo "The output for debezium sink is not as expected." @@ -57,7 +57,7 @@ psql -h localhost -p 4566 -d dev -U root -c "update t_kafka set v_varchar = '', # test append-only kafka sink after update echo "testing append-only kafka sink after updating data" diff ./e2e_test/sink/kafka/append_only2.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 11 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 11 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for append-only sink after update is not as expected." exit 1 @@ -66,7 +66,7 @@ fi # test upsert kafka sink after update echo "testing upsert kafka sink after updating data" diff ./e2e_test/sink/kafka/upsert2.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for upsert sink after update is not as expected." exit 1 @@ -75,7 +75,7 @@ fi # test upsert kafka sink with schema after update echo "testing upsert kafka sink with schema after updating data" diff ./e2e_test/sink/kafka/upsert_schema2.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for upsert sink with schema is not as expected." exit 1 @@ -83,7 +83,7 @@ fi # test debezium kafka sink after update echo "testing debezium kafka sink after updating data" -(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 11 | sort) > ./e2e_test/sink/kafka/debezium2.tmp.result 2> /dev/null +(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 11 | sort) > ./e2e_test/sink/kafka/debezium2.tmp.result 2> /dev/null python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium2.result e2e_test/sink/kafka/debezium2.tmp.result if [ $? -ne 0 ]; then echo "The output for debezium sink after update is not as expected." @@ -100,7 +100,7 @@ psql -h localhost -p 4566 -d dev -U root -c "delete from t_kafka where id = 1;" # test upsert kafka sink after delete echo "testing upsert kafka sink after deleting data" diff ./e2e_test/sink/kafka/upsert3.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for upsert sink after update is not as expected." exit 1 @@ -109,7 +109,7 @@ fi # test upsert kafka sink with schema after delete echo "testing upsert kafka sink with schema after deleting data" diff ./e2e_test/sink/kafka/upsert_schema3.result \ -<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null) +<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null) if [ $? -ne 0 ]; then echo "The output for upsert sink with schema is not as expected." exit 1 @@ -117,7 +117,7 @@ fi # test debezium kafka sink after delete echo "testing debezium kafka sink after deleting data" -(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 13 | sort) > ./e2e_test/sink/kafka/debezium3.tmp.result 2> /dev/null +(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 13 | sort) > ./e2e_test/sink/kafka/debezium3.tmp.result 2> /dev/null python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium3.result e2e_test/sink/kafka/debezium3.tmp.result if [ $? -ne 0 ]; then echo "The output for debezium sink after delete is not as expected." @@ -128,13 +128,13 @@ else fi sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/drop_sink.slt' -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --delete > /dev/null 2>&1 -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --delete > /dev/null 2>&1 -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --delete > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --delete > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --delete > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --delete > /dev/null 2>&1 # test different encoding echo "testing protobuf" cp src/connector/src/test_data/proto_recursive/recursive.pb ./proto-recursive -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only-protobuf --create > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only-protobuf --create > /dev/null 2>&1 sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/protobuf.slt' -./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only-protobuf --delete > /dev/null 2>&1 +./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only-protobuf --delete > /dev/null 2>&1 diff --git a/ci/scripts/e2e-sink-test.sh b/ci/scripts/e2e-sink-test.sh index 2dc02f0eada7a..ce2cc46381eba 100755 --- a/ci/scripts/e2e-sink-test.sh +++ b/ci/scripts/e2e-sink-test.sh @@ -57,7 +57,7 @@ node_port=50051 node_timeout=10 echo "--- starting risingwave cluster with connector node" -cargo make ci-start ci-kafka +cargo make ci-start ci-1cn-1fe ./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-node.log 2>&1 & echo "waiting for connector node to start" diff --git a/ci/workflows/integration-tests.yml b/ci/workflows/integration-tests.yml index 4bd0ec1a000b1..455f29b210ec1 100644 --- a/ci/workflows/integration-tests.yml +++ b/ci/workflows/integration-tests.yml @@ -29,6 +29,7 @@ steps: - "postgres-cdc" - "mysql-sink" - "postgres-sink" + - "iceberg-cdc" # - "iceberg-sink" - "debezium-mysql" format: @@ -79,6 +80,10 @@ steps: # testcase: "iceberg-sink" # format: "protobuf" # skip: true + - with: + testcase: "iceberg-cdc" + format: "protobuf" + skip: true - with: testcase: "debezium-mysql" format: "protobuf" diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml index 985bd0be4b822..3aaa09f0d7716 100644 --- a/ci/workflows/pull-request.yml +++ b/ci/workflows/pull-request.yml @@ -209,6 +209,21 @@ steps: timeout_in_minutes: 10 retry: *auto-retry + - label: "end-to-end iceberg cdc test" + if: build.pull_request.labels includes "ci/run-e2e-iceberg-sink-tests" + command: "ci/scripts/e2e-iceberg-cdc.sh -p ci-dev" + depends_on: + - "build" + - "build-other" + plugins: + - docker-compose#v4.9.0: + run: sink-test-env + config: ci/docker-compose.yml + mount-buildkite-agent: true + - ./ci/plugins/upload-failure-logs + timeout_in_minutes: 10 + retry: *auto-retry + - label: "end-to-end pulsar sink test" if: build.pull_request.labels includes "ci/run-e2e-pulsar-sink-tests" command: "ci/scripts/e2e-pulsar-sink-test.sh -p ci-dev" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index d25c94daf2670..4dbd5fe5bb28d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -260,6 +260,7 @@ services: MINIO_PROMETHEUS_URL: "http://prometheus-0:9500" MINIO_ROOT_PASSWORD: hummockadmin MINIO_ROOT_USER: hummockadmin + MINIO_DOMAIN: "minio-0" container_name: minio-0 healthcheck: test: diff --git a/e2e_test/iceberg/main.py b/e2e_test/iceberg/main.py index fa07aa367a9b3..3f3120227e6e7 100644 --- a/e2e_test/iceberg/main.py +++ b/e2e_test/iceberg/main.py @@ -42,14 +42,16 @@ def init_iceberg_table(args,init_sqls): spark.sql(sql) -def init_risingwave_mv(args,slt): +def execute_slt(args,slt): + if slt is None or slt == "": + return rw_config = args['risingwave'] cmd = f"sqllogictest -p {rw_config['port']} -d {rw_config['db']} {slt}" print(f"Command line is [{cmd}]") subprocess.run(cmd, shell=True, check=True) - time.sleep(10) + time.sleep(30) def verify_result(args,verify_sql,verify_schema,verify_data): @@ -110,6 +112,6 @@ def drop_table(args,drop_sqls): print({section: dict(config[section]) for section in config.sections()}) init_iceberg_table(config,init_sqls) - init_risingwave_mv(config,slt) + execute_slt(config,slt) verify_result(config,verify_sql,verify_schema,verify_data) drop_table(config,drop_sqls) diff --git a/e2e_test/iceberg/test_case/cdc/load.slt b/e2e_test/iceberg/test_case/cdc/load.slt new file mode 100644 index 0000000000000..caefd1326bbda --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/load.slt @@ -0,0 +1,46 @@ +# CDC source basic test + +# enable cdc backfill in ci +statement ok +set cdc_backfill='true'; + +statement ok +create table products ( id INT, + name STRING, + description STRING, + PRIMARY KEY (id) +) with ( + connector = 'mysql-cdc', + hostname = 'mysql', + port = '3306', + username = 'root', + password = '123456', + database.name = 'my@db', + table.name = 'products', + server.id = '5085' +); + + +statement ok +CREATE SINK s1 AS select * from products WITH ( + connector = 'iceberg', + type = 'upsert', + force_append_only = 'false', + database.name = 'demo', + table.name = 'demo_db.demo_table', + catalog.type = 'storage', + warehouse.path = 's3://icebergdata/demo', + s3.endpoint = 'http://127.0.0.1:9301', + s3.region = 'us-east-1', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin', + primary_key = 'id' +); + +query I +select count(*) from products; +---- +8 + +statement ok +flush; diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql new file mode 100644 index 0000000000000..b7b6f13af83cf --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql @@ -0,0 +1,21 @@ +DROP DATABASE IF EXISTS `my@db`; +CREATE DATABASE `my@db`; + +USE `my@db`; + +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description VARCHAR(512) +); + +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products VALUES (default,"101","101"), +(default,"102","102"), +(default,"103","103"), +(default,"104","104"), +(default,"105","105"), +(default,"106","106"), +(default,"107","107"), +(default,"108","108") diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql new file mode 100644 index 0000000000000..641d6220ea8dc --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql @@ -0,0 +1,7 @@ +USE `my@db`; + +INSERT INTO products VALUES (default,"109","109"), +(default,"110","110"), +(default,"111","111"), +(default,"112","112"), +(default,"113","113"); diff --git a/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml b/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml new file mode 100644 index 0000000000000..5ab9647b12eb0 --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml @@ -0,0 +1,25 @@ +init_sqls = [] + +slt = '' + +verify_schema = ['int','string','string'] + +verify_sql = 'SELECT * FROM demo_db.demo_table ORDER BY id ASC' + +verify_data = """ +101,101,101 +102,102,102 +103,103,103 +104,104,104 +105,105,105 +106,106,106 +107,107,107 +108,108,108 +109,109,109 +110,110,110 +111,111,111 +112,112,112 +113,113,113 +""" + +drop_sqls = [] diff --git a/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml b/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml new file mode 100644 index 0000000000000..17e5f7497aae5 --- /dev/null +++ b/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml @@ -0,0 +1,31 @@ +init_sqls = [ + 'CREATE SCHEMA IF NOT EXISTS demo_db', + 'DROP TABLE IF EXISTS demo_db.demo_table', + ''' + CREATE TABLE demo_db.demo_table ( + id int, + name string, + description string + ) USING iceberg + TBLPROPERTIES ('format-version'='2'); + ''' +] + +slt = 'test_case/cdc/load.slt' + +verify_schema = ['int','string','string'] + +verify_sql = 'SELECT * FROM demo_db.demo_table ORDER BY id ASC' + +verify_data = """ +101,101,101 +102,102,102 +103,103,103 +104,104,104 +105,105,105 +106,106,106 +107,107,107 +108,108,108 +""" + +drop_sqls = [] diff --git a/e2e_test/sink/kafka/create_sink.slt b/e2e_test/sink/kafka/create_sink.slt index 25e3a59fdff3a..a1f296774f526 100644 --- a/e2e_test/sink/kafka/create_sink.slt +++ b/e2e_test/sink/kafka/create_sink.slt @@ -31,7 +31,7 @@ create connection mock with ( statement error create sink si_kafka_append_only_conn from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-append-only', type = 'append-only', force_append_only = 'true', @@ -42,7 +42,7 @@ create sink si_kafka_append_only_conn from t_kafka with ( statement ok create sink si_kafka_append_only_conn from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-append-only', type = 'append-only', force_append_only = 'true', @@ -66,7 +66,7 @@ drop connection mock; statement error sink cannot be append-only create sink si_kafka_append_only from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-append-only', type = 'append-only', ); @@ -74,7 +74,7 @@ create sink si_kafka_append_only from t_kafka with ( statement ok create sink si_kafka_append_only from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-append-only', type = 'append-only', force_append_only = 'true' @@ -83,7 +83,7 @@ create sink si_kafka_append_only from t_kafka with ( statement error primary key not defined create sink si_kafka_upsert from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-upsert', type = 'upsert', ); @@ -91,7 +91,7 @@ create sink si_kafka_upsert from t_kafka with ( statement ok create sink si_kafka_upsert from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-upsert', type = 'upsert', primary_key = 'id', @@ -100,7 +100,7 @@ create sink si_kafka_upsert from t_kafka with ( statement ok create sink si_kafka_upsert_schema from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-upsert-schema', primary_key = 'id', ) format upsert encode json ( @@ -110,7 +110,7 @@ create sink si_kafka_upsert_schema from t_kafka with ( statement ok create sink si_kafka_debezium from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-debezium', type = 'debezium', primary_key = 'id', @@ -119,7 +119,7 @@ create sink si_kafka_debezium from t_kafka with ( statement error primary key not defined create sink debezium_without_pk from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-debezium', type = 'debezium', ); @@ -127,7 +127,7 @@ create sink debezium_without_pk from t_kafka with ( statement ok create sink multiple_pk from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-debezium', type = 'debezium', primary_key = 'id,v_varchar' @@ -139,7 +139,7 @@ drop sink multiple_pk; statement error Sink primary key column not found: invalid. create sink invalid_pk_column from t_kafka with ( connector = 'kafka', - properties.bootstrap.server = '127.0.0.1:29092', + properties.bootstrap.server = 'message_queue:29092', topic = 'test-rw-sink-debezium', type = 'debezium', primary_key = 'id,invalid' diff --git a/e2e_test/sink/kafka/protobuf.slt b/e2e_test/sink/kafka/protobuf.slt index f69c4a9d07110..87ab884eddbde 100644 --- a/e2e_test/sink/kafka/protobuf.slt +++ b/e2e_test/sink/kafka/protobuf.slt @@ -2,7 +2,7 @@ statement ok create table from_kafka with ( connector = 'kafka', topic = 'test-rw-sink-append-only-protobuf', - properties.bootstrap.server = '127.0.0.1:29092') + properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( schema.location = 'file:///risingwave/proto-recursive', message = 'recursive.AllTypes'); @@ -37,7 +37,7 @@ statement ok create sink sink0 from into_kafka with ( connector = 'kafka', topic = 'test-rw-sink-append-only-protobuf', - properties.bootstrap.server = '127.0.0.1:29092') + properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, schema.location = 'file:///risingwave/proto-recursive', @@ -70,7 +70,7 @@ statement error failed to read file create sink sink_err from into_kafka with ( connector = 'kafka', topic = 'test-rw-sink-append-only-protobuf', - properties.bootstrap.server = '127.0.0.1:29092') + properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, schema.location = 'file:///risingwave/proto-recursiv', @@ -80,7 +80,7 @@ statement error encode extra_column error: field not in proto create sink sink_err as select 1 as extra_column with ( connector = 'kafka', topic = 'test-rw-sink-append-only-protobuf', - properties.bootstrap.server = '127.0.0.1:29092') + properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, schema.location = 'file:///risingwave/proto-recursive', @@ -90,7 +90,7 @@ statement error s3 URL not supported yet create sink sink_err from into_kafka with ( connector = 'kafka', topic = 'test-rw-sink-append-only-protobuf', - properties.bootstrap.server = '127.0.0.1:29092') + properties.bootstrap.server = 'message_queue:29092') format plain encode protobuf ( force_append_only = true, schema.location = 's3:///risingwave/proto-recursive', diff --git a/integration_tests/iceberg-cdc/README.md b/integration_tests/iceberg-cdc/README.md new file mode 100644 index 0000000000000..56f40172c3dfa --- /dev/null +++ b/integration_tests/iceberg-cdc/README.md @@ -0,0 +1,5 @@ +# Iceberg CDC Integration Tests +`mysql -> rw -> iceberg` + +# How to run +./run_test.sh \ No newline at end of file diff --git a/integration_tests/iceberg-cdc/docker-compose.yaml b/integration_tests/iceberg-cdc/docker-compose.yaml new file mode 100644 index 0000000000000..8e9ad1062ef38 --- /dev/null +++ b/integration_tests/iceberg-cdc/docker-compose.yaml @@ -0,0 +1,142 @@ +version: '3.8' + +services: + compactor-0: + extends: + file: ../../docker/docker-compose.yml + service: compactor-0 + compute-node-0: + extends: + file: ../../docker/docker-compose.yml + service: compute-node-0 + etcd-0: + extends: + file: ../../docker/docker-compose.yml + service: etcd-0 + frontend-node-0: + extends: + file: ../../docker/docker-compose.yml + service: frontend-node-0 + meta-node-0: + extends: + file: ../../docker/docker-compose.yml + service: meta-node-0 + grafana-0: + extends: + file: ../../docker/docker-compose.yml + service: grafana-0 + prometheus-0: + extends: + file: ../../docker/docker-compose.yml + service: prometheus-0 + minio-0: + extends: + file: ../../docker/docker-compose.yml + service: minio-0 + mc: + depends_on: + - minio-0 + image: minio/mc + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio http://minio-0:9301 hummockadmin hummockadmin) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc rm -r --force minio/icebergdata; + /usr/bin/mc mb minio/icebergdata; + /usr/bin/mc anonymous set public minio/icebergdata; + tail -f /dev/null + " + + mysql: + image: mysql:8.0 + expose: + - 3306 + ports: + - "3306:3306" + environment: + - MYSQL_ROOT_PASSWORD=123456 + - MYSQL_USER=mysqluser + - MYSQL_PASSWORD=mysqlpw + - MYSQL_DATABASE=mydb + healthcheck: + test: [ "CMD-SHELL", "mysqladmin ping -h 127.0.0.1 -u root -p123456" ] + interval: 5s + timeout: 5s + retries: 5 + container_name: mysql + prepare_mysql: + image: mysql:8.0 + depends_on: + - mysql + command: + - /bin/sh + - -c + - "mysql -p123456 -h mysql mydb < mysql_prepare.sql" + volumes: + - "./mysql_prepare.sql:/mysql_prepare.sql" + container_name: prepare_mysql + restart: on-failure + + rest: + image: tabulario/iceberg-rest:0.6.0 + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + - CATALOG_CATOLOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog + - CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory + - CATALOG_WAREHOUSE=s3://icebergdata/demo + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio-0:9301 + depends_on: + - minio-0 + # let the rest access minio through: hummock001.minio-0 + links: + - minio-0:icebergdata.minio-0 + expose: + - 8181 + ports: + - "8181:8181" + + spark: + depends_on: + - minio-0 + - rest + image: ghcr.io/icelake-io/icelake-spark:latest + environment: + - AWS_ACCESS_KEY_ID=hummockadmin + - AWS_SECRET_ACCESS_KEY=hummockadmin + - AWS_REGION=us-east-1 + - SPARK_HOME=/opt/spark + - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin:/opt/spark/sbin + user: root + links: + - minio-0:icebergdata.minio-0 + expose: + - 15002 + ports: + - "15002:15002" + healthcheck: + test: netstat -ltn | grep -c 15002 + interval: 1s + retries: 1200 + volumes: + - ./spark:/spark + command: [ "bash", "/spark/spark-connect-server.sh" ] + +volumes: + compute-node-0: + external: false + etcd-0: + external: false + grafana-0: + external: false + minio-0: + external: false + prometheus-0: + external: false + spark: + external: false diff --git a/integration_tests/iceberg-cdc/mysql_prepare.sql b/integration_tests/iceberg-cdc/mysql_prepare.sql new file mode 100644 index 0000000000000..3e5a236a41205 --- /dev/null +++ b/integration_tests/iceberg-cdc/mysql_prepare.sql @@ -0,0 +1,15 @@ +-- mysql -p123456 -uroot -h 127.0.0.1 mydb < mysql_prepare.sql +-- +-- Mysql +USE mydb; + +CREATE TABLE user_behaviors ( + user_id VARCHAR(60), + target_id VARCHAR(60), + target_type VARCHAR(60), + event_timestamp VARCHAR(100), + behavior_type VARCHAR(60), + parent_target_type VARCHAR(60), + parent_target_id VARCHAR(60), + PRIMARY KEY(user_id, target_id, event_timestamp) +); diff --git a/integration_tests/iceberg-cdc/python/check.py b/integration_tests/iceberg-cdc/python/check.py new file mode 100644 index 0000000000000..699fa4df29c30 --- /dev/null +++ b/integration_tests/iceberg-cdc/python/check.py @@ -0,0 +1,25 @@ +from pyspark.sql import SparkSession +import configparser +import psycopg2 + +def check_spark_table(args): + expect_row_count = 0 + rw_config = args['risingwave'] + with psycopg2.connect(database=rw_config['db'], user=rw_config['user'], host=rw_config['host'], + port=rw_config['port']) as conn: + with conn.cursor() as cursor: + cursor.execute("SELECT COUNT(*) FROM user_behaviors") + expect_row_count = cursor.fetchone()[0] + print(f"expect_row_count is {expect_row_count}") + spark_config = args['spark'] + spark = SparkSession.builder.remote(spark_config['url']).getOrCreate() + actual_row_count = spark.sql("SELECT COUNT(*) FROM s1.t1").collect()[0][0] + print(f"actual_row_count is {actual_row_count}") + assert actual_row_count==expect_row_count + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + print({section: dict(config[section]) for section in config.sections()}) + check_spark_table(config) diff --git a/integration_tests/iceberg-cdc/python/config.ini b/integration_tests/iceberg-cdc/python/config.ini new file mode 100644 index 0000000000000..bd95eddc5b80e --- /dev/null +++ b/integration_tests/iceberg-cdc/python/config.ini @@ -0,0 +1,8 @@ +[spark] +url=sc://localhost:15002 + +[risingwave] +db=dev +user=root +host=127.0.0.1 +port=4566 diff --git a/integration_tests/iceberg-cdc/python/init.py b/integration_tests/iceberg-cdc/python/init.py new file mode 100644 index 0000000000000..289fa2f161889 --- /dev/null +++ b/integration_tests/iceberg-cdc/python/init.py @@ -0,0 +1,103 @@ +from pyspark.sql import SparkSession +import configparser +import psycopg2 + + +def init_spark_table(args): + spark_config = args['spark'] + spark = SparkSession.builder.remote(spark_config['url']).getOrCreate() + + init_table_sqls = [ + "CREATE SCHEMA IF NOT EXISTS s1", + "DROP TABLE IF EXISTS s1.t1", + """ + CREATE TABLE s1.t1 + ( + user_id string, + target_id string, + target_type string, + event_timestamp string, + behavior_type string, + parent_target_type string, + parent_target_id string + ) USING iceberg + TBLPROPERTIES ('format-version'='2'); + """, + ] + + for sql in init_table_sqls: + print(f"Executing sql: {sql}") + spark.sql(sql) + + +def init_risingwave_mv(args): + rw_config = args['risingwave'] + sqls = [ + "set streaming_parallelism = 4", + """ + CREATE TABLE user_behaviors ( + user_id VARCHAR, + target_id VARCHAR, + target_type VARCHAR, + event_timestamp VARCHAR, + behavior_type VARCHAR, + parent_target_type VARCHAR, + parent_target_id VARCHAR, + PRIMARY KEY(user_id, target_id, event_timestamp) + ) with ( + connector = 'mysql-cdc', + hostname = 'mysql', + port = '3306', + username = 'root', + password = '123456', + database.name = 'mydb', + table.name = 'user_behaviors', + server.id = '1' + ); + """, + # f""" + # CREATE SINK s1 + # AS SELECT * FROM user_behaviors + # WITH ( + # connector='iceberg', + # type='upsert', + # primary_key = 'user_id, target_id, event_timestamp', + # catalog.type = 'storage', + # s3.endpoint = 'http://minio-0:9301', + # s3.access.key = 'hummockadmin', + # s3.secret.key = 'hummockadmin', + # database.name='demo', + # table.name='s1.t1',warehouse.path = 's3://hummock001/icebergdata/demo',s3.region = 'us-east-1' + # ); + # """ + f""" + CREATE SINK s1 + AS SELECT * FROM user_behaviors + WITH ( + connector='iceberg', + type='upsert', + primary_key = 'user_id, target_id, event_timestamp', + catalog.type = 'rest', + catalog.uri = 'http://rest:8181', + s3.endpoint = 'http://minio-0:9301', + s3.access.key = 'hummockadmin', + s3.secret.key = 'hummockadmin', + database.name='demo', + table.name='s1.t1',warehouse.path = 's3://icebergdata/demo/s1/t1',s3.region = 'us-east-1' + ); + """ + ] + with psycopg2.connect(database=rw_config['db'], user=rw_config['user'], host=rw_config['host'], + port=rw_config['port']) as conn: + with conn.cursor() as cursor: + for sql in sqls: + print(f"Executing sql {sql}") + cursor.execute(sql) + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + print({section: dict(config[section]) for section in config.sections()}) + init_spark_table(config) + init_risingwave_mv(config) diff --git a/integration_tests/iceberg-cdc/python/pyproject.toml b/integration_tests/iceberg-cdc/python/pyproject.toml new file mode 100644 index 0000000000000..4c7bce1165796 --- /dev/null +++ b/integration_tests/iceberg-cdc/python/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "icelake-integration-tests" +version = "0.0.9" +description = "" +authors = ["Renjie Liu "] +readme = "README.md" +packages = [{include = "icelake_integration_tests"}] + +[tool.poetry.dependencies] +python = "^3.11" +pyspark = { version = "3.4.1", extras = ["sql", "connect"] } +psycopg2-binary = "^2.9" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/integration_tests/iceberg-cdc/run_test.sh b/integration_tests/iceberg-cdc/run_test.sh new file mode 100755 index 0000000000000..2d8b691bc7284 --- /dev/null +++ b/integration_tests/iceberg-cdc/run_test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Start test environment. +docker-compose up -d --wait + +# To avoid exiting by unhealth, set it after start environment. +set -ex + +# Generate data +docker build -t iceberg-cdc-datagen ../datagen +timeout 20 docker run --network=iceberg-cdc_default iceberg-cdc-datagen /datagen --mode clickstream --qps 1 mysql --user mysqluser --password mysqlpw --host mysql --port 3306 --db mydb & + +cd python +poetry update --quiet +# Init source, mv, and sink. +poetry run python init.py +# Wait for sink to be finished. +sleep 40; +poetry run python check.py diff --git a/integration_tests/iceberg-cdc/spark/.gitignore b/integration_tests/iceberg-cdc/spark/.gitignore new file mode 100644 index 0000000000000..51dcf07222856 --- /dev/null +++ b/integration_tests/iceberg-cdc/spark/.gitignore @@ -0,0 +1,3 @@ +derby.log +metastore_db +.ivy \ No newline at end of file diff --git a/integration_tests/iceberg-cdc/spark/spark-connect-server.sh b/integration_tests/iceberg-cdc/spark/spark-connect-server.sh new file mode 100755 index 0000000000000..7c1cd64f1a2f2 --- /dev/null +++ b/integration_tests/iceberg-cdc/spark/spark-connect-server.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -ex + +JARS=$(find /opt/spark/deps -type f -name "*.jar" | tr '\n' ':') + +/opt/spark/sbin/start-connect-server.sh \ + --master local[3] \ + --driver-class-path $JARS \ + --conf spark.driver.bindAddress=0.0.0.0 \ + --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \ + --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \ + --conf spark.sql.catalog.demo.catalog-impl=org.apache.iceberg.rest.RESTCatalog \ + --conf spark.sql.catalog.demo.uri=http://rest:8181 \ + --conf spark.sql.catalog.demo.s3.endpoint=http://minio-0:9301 \ + --conf spark.sql.catalog.demo.s3.path.style.access=true \ + --conf spark.sql.catalog.demo.s3.access.key=hummockadmin \ + --conf spark.sql.catalog.demo.s3.secret.key=hummockadmin \ + --conf spark.sql.defaultCatalog=demo + +tail -f /opt/spark/logs/spark*.out diff --git a/integration_tests/redis-sink/create_sink.sql b/integration_tests/redis-sink/create_sink.sql index 03bfc2d0b0df1..2ba9ba67feb39 100644 --- a/integration_tests/redis-sink/create_sink.sql +++ b/integration_tests/redis-sink/create_sink.sql @@ -3,19 +3,13 @@ FROM bhv_mv WITH ( primary_key = 'user_id', connector = 'redis', - type = 'append-only', - force_append_only='true', redis.url= 'redis://127.0.0.1:6379/', -); +)FORMAT PLAIN ENCODE JSON(force_append_only='true'); CREATE SINK bhv_redis_sink_2 FROM bhv_mv WITH ( primary_key = 'user_id', connector = 'redis', - type = 'append-only', - force_append_only='true', redis.url= 'redis://127.0.0.1:6379/', - redis.keyformat='user_id:{user_id}', - redis.valueformat='username:{username},event_timestamp{event_timestamp}' -); \ No newline at end of file +)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'UserID:{user_id}', value_format = 'TargetID:{target_id},EventTimestamp{event_timestamp}'); \ No newline at end of file diff --git a/integration_tests/scripts/run_demos.py b/integration_tests/scripts/run_demos.py index 28623f7ddc4a7..da2519e18db44 100644 --- a/integration_tests/scripts/run_demos.py +++ b/integration_tests/scripts/run_demos.py @@ -42,6 +42,13 @@ def run_demo(demo: str, format: str, wait_time = 40): run_sql_file(sql_file, demo_dir) sleep(10) +def iceberg_cdc_demo(): + demo = "iceberg-cdc" + file_dir = dirname(abspath(__file__)) + project_dir = dirname(file_dir) + demo_dir = os.path.join(project_dir, demo) + print("Running demo: iceberg-cdc") + subprocess.run(["bash","./run_test.sh"], cwd=demo_dir, check=True) def run_iceberg_demo(): demo = "iceberg-sink" @@ -149,5 +156,7 @@ def run_clickhouse_demo(): run_iceberg_demo() elif args.case == "clickhouse-sink": run_clickhouse_demo() +elif args.case == "iceberg-cdc": + iceberg_cdc_demo() else: run_demo(args.case, args.format) diff --git a/proto/ddl_service.proto b/proto/ddl_service.proto index 27c9f2ee82f83..1efc933a7d033 100644 --- a/proto/ddl_service.proto +++ b/proto/ddl_service.proto @@ -314,6 +314,10 @@ message GetTablesResponse { map tables = 1; } +message WaitRequest {} + +message WaitResponse {} + service DdlService { rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse); rpc DropDatabase(DropDatabaseRequest) returns (DropDatabaseResponse); @@ -343,4 +347,5 @@ service DdlService { rpc ListConnections(ListConnectionsRequest) returns (ListConnectionsResponse); rpc DropConnection(DropConnectionRequest) returns (DropConnectionResponse); rpc GetTables(GetTablesRequest) returns (GetTablesResponse); + rpc Wait(WaitRequest) returns (WaitResponse); } diff --git a/proto/plan_common.proto b/proto/plan_common.proto index a88242a572693..d4c7a2e04f138 100644 --- a/proto/plan_common.proto +++ b/proto/plan_common.proto @@ -106,6 +106,7 @@ enum EncodeType { ENCODE_TYPE_PROTOBUF = 4; ENCODE_TYPE_JSON = 5; ENCODE_TYPE_BYTES = 6; + ENCODE_TYPE_TEMPLATE = 7; } enum RowFormatType { diff --git a/risedev.yml b/risedev.yml index a5ba8a7b43f97..135a33f602a6a 100644 --- a/risedev.yml +++ b/risedev.yml @@ -685,40 +685,6 @@ profile: - use: pubsub persist-data: true - ci-kafka: - config-path: src/config/ci.toml - steps: - - use: minio - - use: etcd - unsafe-no-fsync: true - - use: meta-node - - use: compute-node - enable-tiered-cache: true - - use: frontend - - use: compactor - - use: zookeeper - persist-data: true - - use: kafka - persist-data: true - - ci-kafka-plus-pubsub: - config-path: src/config/ci.toml - steps: - - use: minio - - use: etcd - unsafe-no-fsync: true - - use: meta-node - - use: compute-node - enable-tiered-cache: true - - use: frontend - - use: compactor - - use: zookeeper - persist-data: true - - use: kafka - persist-data: true - - use: pubsub - persist-data: true - ci-redis: config-path: src/config/ci.toml steps: diff --git a/src/batch/src/lib.rs b/src/batch/src/lib.rs index 9104c96c951f5..809c096eb49df 100644 --- a/src/batch/src/lib.rs +++ b/src/batch/src/lib.rs @@ -17,8 +17,8 @@ #![feature(trait_alias)] #![feature(exact_size_is_empty)] #![feature(type_alias_impl_trait)] -#![cfg_attr(coverage, feature(no_coverage))] -#![feature(generators)] +#![cfg_attr(coverage, feature(coverage_attribute))] +#![feature(coroutines)] #![feature(proc_macro_hygiene, stmt_expr_attributes)] #![feature(iterator_try_collect)] #![feature(lint_reasons)] @@ -27,13 +27,11 @@ #![feature(let_chains)] #![feature(bound_map)] #![feature(int_roundings)] -#![feature(async_fn_in_trait)] #![feature(allocator_api)] #![feature(impl_trait_in_assoc_type)] #![feature(result_option_inspect)] #![feature(assert_matches)] #![feature(lazy_cell)] -#![feature(return_position_impl_trait_in_trait)] mod error; pub mod exchange_source; diff --git a/src/batch/src/rpc/service/task_service.rs b/src/batch/src/rpc/service/task_service.rs index b49a023acb22b..fb60e352ec293 100644 --- a/src/batch/src/rpc/service/task_service.rs +++ b/src/batch/src/rpc/service/task_service.rs @@ -53,7 +53,7 @@ impl TaskService for BatchServiceImpl { type CreateTaskStream = ReceiverStream; type ExecuteStream = ReceiverStream; - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn create_task( &self, request: Request, @@ -97,7 +97,7 @@ impl TaskService for BatchServiceImpl { } } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn cancel_task( &self, req: Request, @@ -109,7 +109,7 @@ impl TaskService for BatchServiceImpl { Ok(Response::new(CancelTaskResponse { status: None })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn execute( &self, req: Request, diff --git a/src/cmd/src/bin/compactor.rs b/src/cmd/src/bin/compactor.rs index 21b7db2405e2d..554168d8a6683 100644 --- a/src/cmd/src/bin/compactor.rs +++ b/src/cmd/src/bin/compactor.rs @@ -12,6 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] risingwave_cmd::main!(compactor); diff --git a/src/cmd/src/bin/compute_node.rs b/src/cmd/src/bin/compute_node.rs index 0bb1e5211ac57..a24d132b70b94 100644 --- a/src/cmd/src/bin/compute_node.rs +++ b/src/cmd/src/bin/compute_node.rs @@ -12,6 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] risingwave_cmd::main!(compute); diff --git a/src/cmd/src/bin/ctl.rs b/src/cmd/src/bin/ctl.rs index 38345c7a3fc2e..7b4c3132e747d 100644 --- a/src/cmd/src/bin/ctl.rs +++ b/src/cmd/src/bin/ctl.rs @@ -12,6 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] risingwave_cmd::main!(ctl); diff --git a/src/cmd/src/bin/frontend_node.rs b/src/cmd/src/bin/frontend_node.rs index 32d563be109fc..546bacbf1a901 100644 --- a/src/cmd/src/bin/frontend_node.rs +++ b/src/cmd/src/bin/frontend_node.rs @@ -12,6 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] risingwave_cmd::main!(frontend); diff --git a/src/cmd/src/bin/meta_node.rs b/src/cmd/src/bin/meta_node.rs index 032cc6bc28285..4bebfc5f915a2 100644 --- a/src/cmd/src/bin/meta_node.rs +++ b/src/cmd/src/bin/meta_node.rs @@ -12,6 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] risingwave_cmd::main!(meta); diff --git a/src/cmd/src/lib.rs b/src/cmd/src/lib.rs index 12de26657bd33..93df94a63816a 100644 --- a/src/cmd/src/lib.rs +++ b/src/cmd/src/lib.rs @@ -30,7 +30,7 @@ macro_rules! main { #[cfg(not(enable_task_local_alloc))] risingwave_common::enable_jemalloc!(); - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] fn main() { let opts = clap::Parser::parse(); $crate::$component(opts); diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs index 3e9088e16b9e2..b7693c6fa06a2 100644 --- a/src/cmd_all/src/bin/risingwave.rs +++ b/src/cmd_all/src/bin/risingwave.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] use std::str::FromStr; @@ -158,7 +158,7 @@ impl Component { } } -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] fn main() -> Result<()> { let risingwave = || { command!(BINARY_NAME) diff --git a/src/common/proc_macro/src/config.rs b/src/common/proc_macro/src/config.rs index 285834eb123cf..6e369fbad33eb 100644 --- a/src/common/proc_macro/src/config.rs +++ b/src/common/proc_macro/src/config.rs @@ -41,7 +41,7 @@ fn type_is_option(ty: &syn::Type) -> bool { false } -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] pub fn produce_override_config(input: DeriveInput) -> TokenStream { let syn::Data::Struct(syn::DataStruct { fields, .. }) = input.data else { abort!(input, "Only struct is supported"); diff --git a/src/common/proc_macro/src/lib.rs b/src/common/proc_macro/src/lib.rs index 060ee1950624e..a11e407c6c053 100644 --- a/src/common/proc_macro/src/lib.rs +++ b/src/common/proc_macro/src/lib.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] use estimate_size::{ add_trait_bounds, extract_ignored_generics_list, has_nested_flag_attribute_list, @@ -52,7 +52,7 @@ mod estimate_size; /// } /// } /// ``` -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] #[proc_macro_derive(OverrideConfig, attributes(override_opts))] #[proc_macro_error] pub fn override_config(input: TokenStream) -> TokenStream { diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs index 589780a25668e..408c8823d397f 100644 --- a/src/common/src/lib.rs +++ b/src/common/src/lib.rs @@ -24,12 +24,11 @@ #![feature(trusted_len)] #![feature(allocator_api)] #![feature(lint_reasons)] -#![feature(generators)] +#![feature(coroutines)] #![feature(map_try_insert)] #![feature(lazy_cell)] #![feature(error_generic_member_access)] #![feature(let_chains)] -#![feature(return_position_impl_trait_in_trait)] #![feature(portable_simd)] #![feature(array_chunks)] #![feature(inline_const_pat)] @@ -43,7 +42,6 @@ #![feature(result_option_inspect)] #![feature(map_entry_replace)] #![feature(negative_impls)] -#![feature(async_fn_in_trait)] #![feature(bound_map)] #![feature(array_methods)] diff --git a/src/common/src/types/ordered.rs b/src/common/src/types/ordered.rs index 75b07e529d7b9..68cd6329287e2 100644 --- a/src/common/src/types/ordered.rs +++ b/src/common/src/types/ordered.rs @@ -138,7 +138,7 @@ impl From for DefaultOrdered { } } -#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)] +#[allow(clippy::non_canonical_partial_ord_impl)] impl PartialOrd for DefaultOrdered { fn partial_cmp(&self, other: &Self) -> Option { self.0.default_partial_cmp(other.as_inner()) diff --git a/src/common/src/util/future_utils.rs b/src/common/src/util/future_utils.rs index 75c38488457ac..20844d8cd15d4 100644 --- a/src/common/src/util/future_utils.rs +++ b/src/common/src/util/future_utils.rs @@ -13,9 +13,11 @@ // limitations under the License. use std::future::pending; +use std::pin::{pin, Pin}; -use futures::future::Either; -use futures::{Future, FutureExt, Stream}; +use futures::future::{select, Either}; +use futures::stream::Peekable; +use futures::{Future, FutureExt, Stream, StreamExt}; /// Convert a list of streams into a [`Stream`] of results from the streams. pub fn select_all( @@ -43,3 +45,34 @@ pub fn drop_either_future( Either::Right((right, _)) => Either::Right(right), } } + +/// Await on a future while monitoring on a peekable stream that may return error. +/// The peekable stream is polled at a higher priority than the future. +/// +/// When the peekable stream returns with a error and end of stream, the future will +/// return the error immediately. Otherwise, it will keep polling the given future. +/// +/// Return: +/// - Ok(output) as the output of the given future. +/// - Err(None) to indicate that the stream has reached the end. +/// - Err(e) to indicate that the stream returns an error. +pub async fn await_future_with_monitor_error_stream( + peek_stream: &mut Peekable> + Unpin>, + future: F, +) -> Result> { + // Poll the response stream to early see the error + match select(pin!(Pin::new(&mut *peek_stream).peek()), pin!(future)).await { + Either::Left((response_result, send_future)) => match response_result { + None => Err(None), + Some(Err(_)) => { + let err = match peek_stream.next().now_or_never() { + Some(Some(Err(err))) => err, + _ => unreachable!("peek has output, peek output not None, have check err"), + }; + Err(Some(err)) + } + Some(Ok(_)) => Ok(send_future.await), + }, + Either::Right((output, _)) => Ok(output), + } +} diff --git a/src/common/src/util/mod.rs b/src/common/src/util/mod.rs index f4140b558faa7..e1f85263e1415 100644 --- a/src/common/src/util/mod.rs +++ b/src/common/src/util/mod.rs @@ -45,7 +45,9 @@ pub mod tracing; pub mod value_encoding; pub mod worker_util; -pub use future_utils::{drop_either_future, pending_on_none, select_all}; +pub use future_utils::{ + await_future_with_monitor_error_stream, drop_either_future, pending_on_none, select_all, +}; #[macro_use] pub mod match_util; diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs index 65bf59eedf19e..fc5ae9ff19854 100644 --- a/src/compute/src/lib.rs +++ b/src/compute/src/lib.rs @@ -13,14 +13,14 @@ // limitations under the License. #![feature(trait_alias)] -#![feature(generators)] +#![feature(coroutines)] #![feature(type_alias_impl_trait)] #![feature(let_chains)] #![feature(result_option_inspect)] #![feature(lint_reasons)] #![feature(impl_trait_in_assoc_type)] #![feature(lazy_cell)] -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] #[macro_use] extern crate tracing; diff --git a/src/compute/src/rpc/service/exchange_service.rs b/src/compute/src/rpc/service/exchange_service.rs index b59cc39587c2f..6225cef2a7e30 100644 --- a/src/compute/src/rpc/service/exchange_service.rs +++ b/src/compute/src/rpc/service/exchange_service.rs @@ -49,7 +49,7 @@ impl ExchangeService for ExchangeServiceImpl { type GetDataStream = BatchDataStream; type GetStreamStream = StreamDataStream; - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn get_data( &self, request: Request, diff --git a/src/compute/src/rpc/service/monitor_service.rs b/src/compute/src/rpc/service/monitor_service.rs index 97a0b80773791..8fc24664ec016 100644 --- a/src/compute/src/rpc/service/monitor_service.rs +++ b/src/compute/src/rpc/service/monitor_service.rs @@ -53,7 +53,7 @@ impl MonitorServiceImpl { #[async_trait::async_trait] impl MonitorService for MonitorServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn stack_trace( &self, request: Request, @@ -85,7 +85,7 @@ impl MonitorService for MonitorServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn profiling( &self, request: Request, @@ -115,7 +115,7 @@ impl MonitorService for MonitorServiceImpl { } } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn heap_profiling( &self, request: Request, @@ -166,7 +166,7 @@ impl MonitorService for MonitorServiceImpl { } } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn list_heap_profiling( &self, _request: Request, @@ -206,7 +206,7 @@ impl MonitorService for MonitorServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn analyze_heap( &self, request: Request, diff --git a/src/compute/src/rpc/service/stream_service.rs b/src/compute/src/rpc/service/stream_service.rs index 525364b60dc1c..1c1448b3d1e45 100644 --- a/src/compute/src/rpc/service/stream_service.rs +++ b/src/compute/src/rpc/service/stream_service.rs @@ -45,7 +45,7 @@ impl StreamServiceImpl { #[async_trait::async_trait] impl StreamService for StreamServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn update_actors( &self, request: Request, @@ -61,7 +61,7 @@ impl StreamService for StreamServiceImpl { } } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn build_actors( &self, request: Request, @@ -85,7 +85,7 @@ impl StreamService for StreamServiceImpl { } } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn broadcast_actor_info_table( &self, request: Request, @@ -104,7 +104,7 @@ impl StreamService for StreamServiceImpl { } } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn drop_actors( &self, request: Request, @@ -118,7 +118,7 @@ impl StreamService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn force_stop_actors( &self, request: Request, @@ -132,7 +132,7 @@ impl StreamService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn inject_barrier( &self, request: Request, @@ -173,7 +173,7 @@ impl StreamService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn barrier_complete( &self, request: Request, @@ -243,7 +243,7 @@ impl StreamService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn wait_epoch_commit( &self, request: Request, diff --git a/src/compute/tests/cdc_tests.rs b/src/compute/tests/cdc_tests.rs index b3e39ece95002..6a50b8410bbd4 100644 --- a/src/compute/tests/cdc_tests.rs +++ b/src/compute/tests/cdc_tests.rs @@ -13,7 +13,7 @@ // limitations under the License. #![feature(let_chains)] -#![feature(generators)] +#![feature(coroutines)] use std::sync::atomic::AtomicU64; use std::sync::Arc; diff --git a/src/compute/tests/integration_tests.rs b/src/compute/tests/integration_tests.rs index a43ae2e5762da..6d7e93365c275 100644 --- a/src/compute/tests/integration_tests.rs +++ b/src/compute/tests/integration_tests.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![feature(generators)] +#![feature(coroutines)] #![feature(proc_macro_hygiene, stmt_expr_attributes)] use std::sync::atomic::AtomicU64; diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml index 4886b1b52fcc5..87d2a0bdef689 100644 --- a/src/connector/Cargo.toml +++ b/src/connector/Cargo.toml @@ -113,7 +113,7 @@ strum = "0.25" strum_macros = "0.25" tempfile = "3" thiserror = "1" -time = "0.3.28" +time = "0.3.30" tokio = { version = "0.2", package = "madsim-tokio", features = [ "rt", "rt-multi-thread", diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs index 8ccf62486ce65..aa613b4043c23 100644 --- a/src/connector/src/lib.rs +++ b/src/connector/src/lib.rs @@ -14,7 +14,7 @@ #![expect(dead_code)] #![allow(clippy::derive_partial_eq_without_eq)] -#![feature(generators)] +#![feature(coroutines)] #![feature(proc_macro_hygiene)] #![feature(stmt_expr_attributes)] #![feature(box_patterns)] @@ -25,11 +25,9 @@ #![feature(let_chains)] #![feature(box_into_inner)] #![feature(type_alias_impl_trait)] -#![feature(return_position_impl_trait_in_trait)] -#![feature(async_fn_in_trait)] #![feature(associated_type_defaults)] #![feature(impl_trait_in_assoc_type)] -#![feature(iter_from_generator)] +#![feature(iter_from_coroutine)] #![feature(if_let_guard)] #![feature(iterator_try_collect)] diff --git a/src/connector/src/sink/blackhole.rs b/src/connector/src/sink/blackhole.rs index 1f1ace3b0d104..60b0506604c97 100644 --- a/src/connector/src/sink/blackhole.rs +++ b/src/connector/src/sink/blackhole.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +use async_trait::async_trait; + use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset}; use crate::sink::{ DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkError, SinkParam, SinkWriterParam, @@ -45,6 +47,7 @@ impl Sink for BlackHoleSink { } } +#[async_trait] impl LogSinker for BlackHoleSink { async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> { log_reader.init().await?; diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs index c18dd7d10a92c..ca3a09e7f2eda 100644 --- a/src/connector/src/sink/catalog/mod.rs +++ b/src/connector/src/sink/catalog/mod.rs @@ -132,6 +132,7 @@ pub enum SinkEncode { Json, Protobuf, Avro, + Template, } impl SinkFormatDesc { @@ -177,6 +178,7 @@ impl SinkFormatDesc { SinkEncode::Json => E::Json, SinkEncode::Protobuf => E::Protobuf, SinkEncode::Avro => E::Avro, + SinkEncode::Template => E::Template, }; let options = self .options @@ -212,6 +214,7 @@ impl TryFrom for SinkFormatDesc { let encode = match value.encode() { E::Json => SinkEncode::Json, E::Protobuf => SinkEncode::Protobuf, + E::Template => SinkEncode::Template, E::Avro => SinkEncode::Avro, e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => { return Err(SinkError::Config(anyhow!( diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs index 2bddf8026216f..f4fdf9b761f38 100644 --- a/src/connector/src/sink/clickhouse.rs +++ b/src/connector/src/sink/clickhouse.rs @@ -29,7 +29,11 @@ use serde_derive::Deserialize; use serde_with::serde_as; use super::{DummySinkCommitCoordinator, SinkWriterParam}; -use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt}; +use crate::sink::catalog::desc::SinkDesc; +use crate::sink::log_store::DeliveryFutureManagerAddFuture; +use crate::sink::writer::{ + AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, +}; use crate::sink::{ Result, Sink, SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, }; @@ -243,10 +247,14 @@ impl ClickHouseSink { } impl Sink for ClickHouseSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf; + type LogSinker = AsyncTruncateLogSinkerOf; const SINK_NAME: &'static str = CLICKHOUSE_SINK; + fn default_sink_decouple(desc: &SinkDesc) -> bool { + desc.sink_type.is_append_only() + } + async fn validate(&self) -> Result<()> { // For upsert clickhouse sink, the primary key must be defined. if !self.is_append_only && self.pk_indices.is_empty() { @@ -277,7 +285,7 @@ impl Sink for ClickHouseSink { Ok(()) } - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { + async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result { Ok(ClickHouseSinkWriter::new( self.config.clone(), self.schema.clone(), @@ -285,7 +293,7 @@ impl Sink for ClickHouseSink { self.is_append_only, ) .await? - .into_log_sinker(writer_param.sink_metrics)) + .into_log_sinker(usize::MAX)) } } pub struct ClickHouseSinkWriter { @@ -496,24 +504,18 @@ impl ClickHouseSinkWriter { } } -#[async_trait::async_trait] -impl SinkWriter for ClickHouseSinkWriter { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { +impl AsyncTruncateSinkWriter for ClickHouseSinkWriter { + async fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> Result<()> { if self.is_append_only { self.append_only(chunk).await } else { self.upsert(chunk).await } } - - async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> { - // clickhouse no transactional guarantees, so we do nothing here. - Ok(()) - } - - async fn barrier(&mut self, _is_checkpoint: bool) -> Result<()> { - Ok(()) - } } #[derive(ClickHouseRow, Deserialize, Clone)] diff --git a/src/connector/src/sink/encoder/template.rs b/src/connector/src/sink/encoder/template.rs index 85f085989b6c4..97d8271f9e83a 100644 --- a/src/connector/src/sink/encoder/template.rs +++ b/src/connector/src/sink/encoder/template.rs @@ -12,11 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; + +use regex::Regex; use risingwave_common::catalog::Schema; use risingwave_common::row::Row; use risingwave_common::types::ToText; use super::{Result, RowEncoder}; +use crate::sink::SinkError; /// Encode a row according to a specified string template `user_id:{user_id}` pub struct TemplateEncoder { @@ -34,6 +38,24 @@ impl TemplateEncoder { template, } } + + pub fn check_string_format(format: &str, set: &HashSet) -> Result<()> { + // We will check if the string inside {} corresponds to a column name in rw. + // In other words, the content within {} should exclusively consist of column names from rw, + // which means '{{column_name}}' or '{{column_name1},{column_name2}}' would be incorrect. + let re = Regex::new(r"\{([^}]*)\}").unwrap(); + if !re.is_match(format) { + return Err(SinkError::Redis( + "Can't find {} in key_format or value_format".to_string(), + )); + } + for capture in re.captures_iter(format) { + if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){ + return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str()))) + } + } + Ok(()) + } } impl RowEncoder for TemplateEncoder { diff --git a/src/connector/src/sink/formatter/append_only.rs b/src/connector/src/sink/formatter/append_only.rs index 523a52dab91bb..f0efcc21d9009 100644 --- a/src/connector/src/sink/formatter/append_only.rs +++ b/src/connector/src/sink/formatter/append_only.rs @@ -40,7 +40,7 @@ impl SinkFormatter for AppendOnlyFormatter impl Iterator, Option)>> { - std::iter::from_generator(|| { + std::iter::from_coroutine(|| { for (op, row) in chunk.rows() { if op != Op::Insert { continue; diff --git a/src/connector/src/sink/formatter/debezium_json.rs b/src/connector/src/sink/formatter/debezium_json.rs index 637aa23f06410..ce98daab88756 100644 --- a/src/connector/src/sink/formatter/debezium_json.rs +++ b/src/connector/src/sink/formatter/debezium_json.rs @@ -85,7 +85,7 @@ impl SinkFormatter for DebeziumJsonFormatter { &self, chunk: &StreamChunk, ) -> impl Iterator, Option)>> { - std::iter::from_generator(|| { + std::iter::from_coroutine(|| { let DebeziumJsonFormatter { schema, pk_indices, diff --git a/src/connector/src/sink/formatter/mod.rs b/src/connector/src/sink/formatter/mod.rs index a7463f7e3b306..17cb708292890 100644 --- a/src/connector/src/sink/formatter/mod.rs +++ b/src/connector/src/sink/formatter/mod.rs @@ -29,6 +29,7 @@ pub use upsert::UpsertFormatter; use super::catalog::{SinkEncode, SinkFormat, SinkFormatDesc}; use super::encoder::template::TemplateEncoder; use super::encoder::KafkaConnectParams; +use super::redis::{KEY_FORMAT, VALUE_FORMAT}; use crate::sink::encoder::{JsonEncoder, ProtoEncoder, TimestampHandlingMode}; /// Transforms a `StreamChunk` into a sequence of key-value pairs according a specific format, @@ -92,7 +93,7 @@ impl SinkFormatterImpl { let key_encoder = (!pk_indices.is_empty()).then(|| { JsonEncoder::new( schema.clone(), - Some(pk_indices), + Some(pk_indices.clone()), TimestampHandlingMode::Milli, ) }); @@ -115,6 +116,28 @@ impl SinkFormatterImpl { Ok(SinkFormatterImpl::AppendOnlyProto(formatter)) } SinkEncode::Avro => err_unsupported(), + SinkEncode::Template => { + let key_format = format_desc.options.get(KEY_FORMAT).ok_or_else(|| { + SinkError::Config(anyhow!( + "Cannot find 'key_format',please set it or use JSON" + )) + })?; + let value_format = + format_desc.options.get(VALUE_FORMAT).ok_or_else(|| { + SinkError::Config(anyhow!( + "Cannot find 'redis_value_format',please set it or use JSON" + )) + })?; + let key_encoder = TemplateEncoder::new( + schema.clone(), + Some(pk_indices), + key_format.clone(), + ); + let val_encoder = TemplateEncoder::new(schema, None, value_format.clone()); + Ok(SinkFormatterImpl::AppendOnlyTemplate( + AppendOnlyFormatter::new(Some(key_encoder), val_encoder), + )) + } } } SinkFormat::Debezium => { @@ -131,85 +154,66 @@ impl SinkFormatterImpl { ))) } SinkFormat::Upsert => { - if format_desc.encode != SinkEncode::Json { - return err_unsupported(); - } + match format_desc.encode { + SinkEncode::Json => { + let mut key_encoder = JsonEncoder::new( + schema.clone(), + Some(pk_indices), + TimestampHandlingMode::Milli, + ); + let mut val_encoder = + JsonEncoder::new(schema, None, TimestampHandlingMode::Milli); - let mut key_encoder = JsonEncoder::new( - schema.clone(), - Some(pk_indices), - TimestampHandlingMode::Milli, - ); - let mut val_encoder = JsonEncoder::new(schema, None, TimestampHandlingMode::Milli); - - if let Some(s) = format_desc.options.get("schemas.enable") { - match s.to_lowercase().parse::() { - Ok(true) => { - let kafka_connect = KafkaConnectParams { - schema_name: format!("{}.{}", db_name, sink_from_name), - }; - key_encoder = key_encoder.with_kafka_connect(kafka_connect.clone()); - val_encoder = val_encoder.with_kafka_connect(kafka_connect); - } - Ok(false) => {} - _ => { - return Err(SinkError::Config(anyhow!( - "schemas.enable is expected to be `true` or `false`, got {}", - s - ))); - } + if let Some(s) = format_desc.options.get("schemas.enable") { + match s.to_lowercase().parse::() { + Ok(true) => { + let kafka_connect = KafkaConnectParams { + schema_name: format!("{}.{}", db_name, sink_from_name), + }; + key_encoder = + key_encoder.with_kafka_connect(kafka_connect.clone()); + val_encoder = val_encoder.with_kafka_connect(kafka_connect); + } + Ok(false) => {} + _ => { + return Err(SinkError::Config(anyhow!( + "schemas.enable is expected to be `true` or `false`, got {}", + s + ))); + } + } + }; + + // Initialize the upsert_stream + let formatter = UpsertFormatter::new(key_encoder, val_encoder); + Ok(SinkFormatterImpl::UpsertJson(formatter)) } - }; - - // Initialize the upsert_stream - let formatter = UpsertFormatter::new(key_encoder, val_encoder); - Ok(SinkFormatterImpl::UpsertJson(formatter)) - } - } - } - - pub fn new_with_redis( - schema: Schema, - pk_indices: Vec, - is_append_only: bool, - key_format: Option, - value_format: Option, - ) -> Result { - match (key_format, value_format) { - (Some(k), Some(v)) => { - let key_encoder = TemplateEncoder::new( - schema.clone(), - Some(pk_indices), - k, - ); - let val_encoder = - TemplateEncoder::new(schema, None, v); - if is_append_only { - Ok(SinkFormatterImpl::AppendOnlyTemplate(AppendOnlyFormatter::new(Some(key_encoder), val_encoder))) - } else { - Ok(SinkFormatterImpl::UpsertTemplate(UpsertFormatter::new(key_encoder, val_encoder))) - } - } - (None, None) => { - let key_encoder = JsonEncoder::new( - schema.clone(), - Some(pk_indices), - TimestampHandlingMode::Milli, - ); - let val_encoder = JsonEncoder::new( - schema, - None, - TimestampHandlingMode::Milli, - ); - if is_append_only { - Ok(SinkFormatterImpl::AppendOnlyJson(AppendOnlyFormatter::new(Some(key_encoder), val_encoder))) - } else { - Ok(SinkFormatterImpl::UpsertJson(UpsertFormatter::new(key_encoder, val_encoder))) + SinkEncode::Template => { + let key_format = format_desc.options.get(KEY_FORMAT).ok_or_else(|| { + SinkError::Config(anyhow!( + "Cannot find 'key_format',please set it or use JSON" + )) + })?; + let value_format = + format_desc.options.get(VALUE_FORMAT).ok_or_else(|| { + SinkError::Config(anyhow!( + "Cannot find 'redis_value_format',please set it or use JSON" + )) + })?; + let key_encoder = TemplateEncoder::new( + schema.clone(), + Some(pk_indices), + key_format.clone(), + ); + let val_encoder = TemplateEncoder::new(schema, None, value_format.clone()); + Ok(SinkFormatterImpl::UpsertTemplate(UpsertFormatter::new( + key_encoder, + val_encoder, + ))) + } + _ => err_unsupported(), } } - _ => { - Err(SinkError::Encode("Please provide template formats for both key and value, or choose the JSON format.".to_string())) - } } } } diff --git a/src/connector/src/sink/formatter/upsert.rs b/src/connector/src/sink/formatter/upsert.rs index 6ef2b5f2ca333..af8e70ff92850 100644 --- a/src/connector/src/sink/formatter/upsert.rs +++ b/src/connector/src/sink/formatter/upsert.rs @@ -40,7 +40,7 @@ impl SinkFormatter for UpsertFormatter { &self, chunk: &StreamChunk, ) -> impl Iterator, Option)>> { - std::iter::from_generator(|| { + std::iter::from_coroutine(|| { for (op, row) in chunk.rows() { let event_key_object = Some(tri!(self.key_encoder.encode(row))); diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index a204a8d121706..f77b2b0a88c36 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -14,20 +14,18 @@ use std::collections::HashMap; use std::fmt::Debug; -use std::pin::pin; use std::sync::Arc; use std::time::Duration; use anyhow::anyhow; -use futures::future::{select, Either}; use futures::{Future, FutureExt, TryFuture}; use rdkafka::error::KafkaError; use rdkafka::message::ToBytes; use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord}; use rdkafka::types::RDKafkaErrorCode; use rdkafka::ClientConfig; +use risingwave_common::array::StreamChunk; use risingwave_common::catalog::Schema; -use risingwave_common::util::drop_either_future; use serde_derive::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; use strum_macros::{Display, EnumString}; @@ -37,11 +35,11 @@ use super::{Sink, SinkError, SinkParam}; use crate::common::KafkaCommon; use crate::sink::catalog::desc::SinkDesc; use crate::sink::formatter::SinkFormatterImpl; -use crate::sink::log_store::{ - DeliveryFutureManager, DeliveryFutureManagerAddFuture, LogReader, LogStoreReadItem, +use crate::sink::log_store::DeliveryFutureManagerAddFuture; +use crate::sink::writer::{ + AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, FormattedSink, }; -use crate::sink::writer::FormattedSink; -use crate::sink::{DummySinkCommitCoordinator, LogSinker, Result, SinkWriterParam}; +use crate::sink::{DummySinkCommitCoordinator, Result, SinkWriterParam}; use crate::source::kafka::{KafkaProperties, KafkaSplitEnumerator, PrivateLinkProducerContext}; use crate::source::{SourceEnumeratorContext, SplitEnumerator}; use crate::{ @@ -299,7 +297,7 @@ impl TryFrom for KafkaSink { impl Sink for KafkaSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = KafkaLogSinker; + type LogSinker = AsyncTruncateLogSinkerOf; const SINK_NAME: &'static str = KAFKA_SINK; @@ -316,7 +314,18 @@ impl Sink for KafkaSink { self.sink_from_name.clone(), ) .await?; - KafkaLogSinker::new(self.config.clone(), formatter).await + let max_delivery_buffer_size = (self + .config + .rdkafka_properties + .queue_buffering_max_messages + .as_ref() + .cloned() + .unwrap_or(KAFKA_WRITER_MAX_QUEUE_SIZE) as f32 + * KAFKA_WRITER_MAX_QUEUE_SIZE_RATIO) as usize; + + Ok(KafkaSinkWriter::new(self.config.clone(), formatter) + .await? + .into_log_sinker(max_delivery_buffer_size)) } async fn validate(&self) -> Result<()> { @@ -370,16 +379,15 @@ struct KafkaPayloadWriter<'a> { config: &'a KafkaConfig, } -type KafkaSinkDeliveryFuture = impl TryFuture + Unpin + 'static; +pub type KafkaSinkDeliveryFuture = impl TryFuture + Unpin + 'static; -pub struct KafkaLogSinker { +pub struct KafkaSinkWriter { formatter: SinkFormatterImpl, inner: FutureProducer, - future_manager: DeliveryFutureManager, config: KafkaConfig, } -impl KafkaLogSinker { +impl KafkaSinkWriter { async fn new(config: KafkaConfig, formatter: SinkFormatterImpl) -> Result { let inner: FutureProducer = { let mut c = ClientConfig::new(); @@ -403,19 +411,29 @@ impl KafkaLogSinker { c.create_with_context(producer_ctx).await? }; - let max_delivery_buffer_size = (config - .rdkafka_properties - .queue_buffering_max_messages - .as_ref() - .cloned() - .unwrap_or(KAFKA_WRITER_MAX_QUEUE_SIZE) as f32 - * KAFKA_WRITER_MAX_QUEUE_SIZE_RATIO) as usize; - - Ok(KafkaLogSinker { + Ok(KafkaSinkWriter { formatter, inner, config: config.clone(), - future_manager: DeliveryFutureManager::new(max_delivery_buffer_size), + }) + } +} + +impl AsyncTruncateSinkWriter for KafkaSinkWriter { + type DeliveryFuture = KafkaSinkDeliveryFuture; + + async fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> Result<()> { + let mut payload_writer = KafkaPayloadWriter { + inner: &mut self.inner, + add_future, + config: &self.config, + }; + dispatch_sink_formatter_impl!(&self.formatter, formatter, { + payload_writer.write_chunk(chunk, formatter).await }) } } @@ -537,50 +555,6 @@ impl<'a> FormattedSink for KafkaPayloadWriter<'a> { } } -impl LogSinker for KafkaLogSinker { - async fn consume_log_and_sink(mut self, mut log_reader: impl LogReader) -> Result<()> { - log_reader.init().await?; - loop { - let select_result = drop_either_future( - select( - pin!(log_reader.next_item()), - pin!(self.future_manager.next_truncate_offset()), - ) - .await, - ); - match select_result { - Either::Left(item_result) => { - let (epoch, item) = item_result?; - match item { - LogStoreReadItem::StreamChunk { chunk_id, chunk } => { - dispatch_sink_formatter_impl!(&self.formatter, formatter, { - let mut writer = KafkaPayloadWriter { - inner: &self.inner, - add_future: self - .future_manager - .start_write_chunk(epoch, chunk_id), - config: &self.config, - }; - writer.write_chunk(chunk, formatter).await?; - }) - } - LogStoreReadItem::Barrier { - is_checkpoint: _is_checkpoint, - } => { - self.future_manager.add_barrier(epoch); - } - LogStoreReadItem::UpdateVnodeBitmap(_) => {} - } - } - Either::Right(offset_result) => { - let offset = offset_result?; - log_reader.truncate(offset).await?; - } - } - } - } -} - #[cfg(test)] mod test { use maplit::hashmap; @@ -748,7 +722,7 @@ mod test { let kafka_config = KafkaConfig::from_hashmap(properties)?; // Create the actual sink writer to Kafka - let mut sink = KafkaLogSinker::new( + let sink = KafkaSinkWriter::new( kafka_config.clone(), SinkFormatterImpl::AppendOnlyJson(AppendOnlyFormatter::new( // We do not specify primary key for this schema @@ -759,12 +733,16 @@ mod test { .await .unwrap(); + use crate::sink::log_store::DeliveryFutureManager; + + let mut future_manager = DeliveryFutureManager::new(usize::MAX); + for i in 0..10 { println!("epoch: {}", i); for j in 0..100 { let mut writer = KafkaPayloadWriter { inner: &sink.inner, - add_future: sink.future_manager.start_write_chunk(i, j), + add_future: future_manager.start_write_chunk(i, j), config: &sink.config, }; match writer diff --git a/src/connector/src/sink/kinesis.rs b/src/connector/src/sink/kinesis.rs index dd8518af39948..605edde3b1eb0 100644 --- a/src/connector/src/sink/kinesis.rs +++ b/src/connector/src/sink/kinesis.rs @@ -30,8 +30,12 @@ use super::catalog::SinkFormatDesc; use super::SinkParam; use crate::common::KinesisCommon; use crate::dispatch_sink_formatter_impl; +use crate::sink::catalog::desc::SinkDesc; use crate::sink::formatter::SinkFormatterImpl; -use crate::sink::writer::{FormattedSink, LogSinkerOf, SinkWriter, SinkWriterExt}; +use crate::sink::log_store::DeliveryFutureManagerAddFuture; +use crate::sink::writer::{ + AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, FormattedSink, +}; use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkError, SinkWriterParam}; pub const KINESIS_SINK: &str = "kinesis"; @@ -67,10 +71,14 @@ impl TryFrom for KinesisSink { impl Sink for KinesisSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf; + type LogSinker = AsyncTruncateLogSinkerOf; const SINK_NAME: &'static str = KINESIS_SINK; + fn default_sink_decouple(desc: &SinkDesc) -> bool { + desc.sink_type.is_append_only() + } + async fn validate(&self) -> Result<()> { // Kinesis requires partition key. There is no builtin support for round-robin as in kafka/pulsar. // https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html#Streams-PutRecord-request-PartitionKey @@ -103,7 +111,7 @@ impl Sink for KinesisSink { Ok(()) } - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { + async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result { Ok(KinesisSinkWriter::new( self.config.clone(), self.schema.clone(), @@ -113,7 +121,7 @@ impl Sink for KinesisSink { self.sink_from_name.clone(), ) .await? - .into_log_sinker(writer_param.sink_metrics)) + .into_log_sinker(usize::MAX)) } } @@ -214,20 +222,16 @@ impl FormattedSink for KinesisSinkPayloadWriter { } } -#[async_trait::async_trait] -impl SinkWriter for KinesisSinkWriter { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { - dispatch_sink_formatter_impl!(&self.formatter, formatter, { +impl AsyncTruncateSinkWriter for KinesisSinkWriter { + async fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> Result<()> { + dispatch_sink_formatter_impl!( + &self.formatter, + formatter, self.payload_writer.write_chunk(chunk, formatter).await - }) - } - - async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> { - // Kinesis offers no transactional guarantees, so we do nothing here. - Ok(()) - } - - async fn barrier(&mut self, _is_checkpoint: bool) -> Result<()> { - Ok(()) + ) } } diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs index 7769a87f4e715..6afd08778cd96 100644 --- a/src/connector/src/sink/mod.rs +++ b/src/connector/src/sink/mod.rs @@ -34,7 +34,6 @@ pub mod utils; pub mod writer; use std::collections::HashMap; -use std::future::Future; use ::clickhouse::error::Error as ClickHouseError; use ::redis::RedisError; @@ -278,11 +277,9 @@ pub trait Sink: TryFrom { } } -pub trait LogSinker: Send + 'static { - fn consume_log_and_sink( - self, - log_reader: impl LogReader, - ) -> impl Future> + Send + 'static; +#[async_trait] +pub trait LogSinker: 'static { + async fn consume_log_and_sink(self, log_reader: impl LogReader) -> Result<()>; } #[async_trait] diff --git a/src/connector/src/sink/nats.rs b/src/connector/src/sink/nats.rs index 8e3f3e2c18022..2f810eed786a9 100644 --- a/src/connector/src/sink/nats.rs +++ b/src/connector/src/sink/nats.rs @@ -25,10 +25,14 @@ use tokio_retry::strategy::{jitter, ExponentialBackoff}; use tokio_retry::Retry; use super::utils::chunk_to_json; -use super::{DummySinkCommitCoordinator, SinkWriter, SinkWriterParam}; +use super::{DummySinkCommitCoordinator, SinkWriterParam}; use crate::common::NatsCommon; +use crate::sink::catalog::desc::SinkDesc; use crate::sink::encoder::{JsonEncoder, TimestampHandlingMode}; -use crate::sink::writer::{LogSinkerOf, SinkWriterExt}; +use crate::sink::log_store::DeliveryFutureManagerAddFuture; +use crate::sink::writer::{ + AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, +}; use crate::sink::{Result, Sink, SinkError, SinkParam, SINK_TYPE_APPEND_ONLY}; pub const NATS_SINK: &str = "nats"; @@ -88,10 +92,14 @@ impl TryFrom for NatsSink { impl Sink for NatsSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf; + type LogSinker = AsyncTruncateLogSinkerOf; const SINK_NAME: &'static str = NATS_SINK; + fn default_sink_decouple(desc: &SinkDesc) -> bool { + desc.sink_type.is_append_only() + } + async fn validate(&self) -> Result<()> { if !self.is_append_only { return Err(SinkError::Nats(anyhow!( @@ -110,11 +118,11 @@ impl Sink for NatsSink { Ok(()) } - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { + async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result { Ok( NatsSinkWriter::new(self.config.clone(), self.schema.clone()) .await? - .into_log_sinker(writer_param.sink_metrics), + .into_log_sinker(usize::MAX), ) } } @@ -153,17 +161,12 @@ impl NatsSinkWriter { } } -#[async_trait::async_trait] -impl SinkWriter for NatsSinkWriter { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { +impl AsyncTruncateSinkWriter for NatsSinkWriter { + async fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> Result<()> { self.append_only(chunk).await } - - async fn begin_epoch(&mut self, _epoch_id: u64) -> Result<()> { - Ok(()) - } - - async fn barrier(&mut self, _is_checkpoint: bool) -> Result<()> { - Ok(()) - } } diff --git a/src/connector/src/sink/pulsar.rs b/src/connector/src/sink/pulsar.rs index f980b2ad9f9b1..9eb57c1ae0771 100644 --- a/src/connector/src/sink/pulsar.rs +++ b/src/connector/src/sink/pulsar.rs @@ -12,14 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::{HashMap, VecDeque}; +use std::collections::HashMap; use std::fmt::Debug; use std::time::Duration; use anyhow::anyhow; -use async_trait::async_trait; -use futures::future::try_join_all; -use futures::TryFutureExt; +use futures::{FutureExt, TryFuture, TryFutureExt}; use pulsar::producer::{Message, SendFuture}; use pulsar::{Producer, ProducerOptions, Pulsar, TokioExecutor}; use risingwave_common::array::StreamChunk; @@ -28,10 +26,15 @@ use serde::Deserialize; use serde_with::{serde_as, DisplayFromStr}; use super::catalog::{SinkFormat, SinkFormatDesc}; -use super::{Sink, SinkError, SinkParam, SinkWriter, SinkWriterParam}; +use super::{Sink, SinkError, SinkParam, SinkWriterParam}; use crate::common::PulsarCommon; -use crate::sink::formatter::SinkFormatterImpl; -use crate::sink::writer::{FormattedSink, LogSinkerOf, SinkWriterExt}; +use crate::sink::catalog::desc::SinkDesc; +use crate::sink::encoder::SerTo; +use crate::sink::formatter::{SinkFormatter, SinkFormatterImpl}; +use crate::sink::log_store::DeliveryFutureManagerAddFuture; +use crate::sink::writer::{ + AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, FormattedSink, +}; use crate::sink::{DummySinkCommitCoordinator, Result}; use crate::{deserialize_duration_from_string, dispatch_sink_formatter_impl}; @@ -155,11 +158,15 @@ impl TryFrom for PulsarSink { impl Sink for PulsarSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf; + type LogSinker = AsyncTruncateLogSinkerOf; const SINK_NAME: &'static str = PULSAR_SINK; - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { + fn default_sink_decouple(desc: &SinkDesc) -> bool { + desc.sink_type.is_append_only() + } + + async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result { Ok(PulsarSinkWriter::new( self.config.clone(), self.schema.clone(), @@ -169,7 +176,7 @@ impl Sink for PulsarSink { self.sink_from_name.clone(), ) .await? - .into_log_sinker(writer_param.sink_metrics)) + .into_log_sinker(PULSAR_SEND_FUTURE_BUFFER_MAX_SIZE)) } async fn validate(&self) -> Result<()> { @@ -199,15 +206,26 @@ impl Sink for PulsarSink { } pub struct PulsarSinkWriter { - payload_writer: PulsarPayloadWriter, formatter: SinkFormatterImpl, -} - -struct PulsarPayloadWriter { pulsar: Pulsar, producer: Producer, config: PulsarConfig, - send_future_buffer: VecDeque, +} + +struct PulsarPayloadWriter<'w> { + producer: &'w mut Producer, + config: &'w PulsarConfig, + add_future: DeliveryFutureManagerAddFuture<'w, PulsarDeliveryFuture>, +} + +pub type PulsarDeliveryFuture = impl TryFuture + Unpin + 'static; + +fn may_delivery_future(future: SendFuture) -> PulsarDeliveryFuture { + future.map(|result| { + result + .map(|_| ()) + .map_err(|e: pulsar::Error| SinkError::Pulsar(anyhow!(e))) + }) } impl PulsarSinkWriter { @@ -226,17 +244,14 @@ impl PulsarSinkWriter { let producer = build_pulsar_producer(&pulsar, &config).await?; Ok(Self { formatter, - payload_writer: PulsarPayloadWriter { - pulsar, - producer, - config, - send_future_buffer: VecDeque::new(), - }, + pulsar, + producer, + config, }) } } -impl PulsarPayloadWriter { +impl<'w> PulsarPayloadWriter<'w> { async fn send_message(&mut self, message: Message) -> Result<()> { let mut success_flag = false; let mut connection_err = None; @@ -247,17 +262,10 @@ impl PulsarPayloadWriter { // a SendFuture holding the message receipt // or error after sending is returned Ok(send_future) => { - // Check if send_future_buffer is greater than the preset limit - while self.send_future_buffer.len() >= PULSAR_SEND_FUTURE_BUFFER_MAX_SIZE { - self.send_future_buffer - .pop_front() - .expect("Expect the SendFuture not to be None") - .map_err(|e| SinkError::Pulsar(anyhow!(e))) - .await?; - } - + self.add_future + .add_future_may_await(may_delivery_future(send_future)) + .await?; success_flag = true; - self.send_future_buffer.push_back(send_future); break; } // error upon sending @@ -295,24 +303,9 @@ impl PulsarPayloadWriter { self.send_message(message).await?; Ok(()) } - - async fn commit_inner(&mut self) -> Result<()> { - self.producer - .send_batch() - .map_err(pulsar_to_sink_err) - .await?; - try_join_all( - self.send_future_buffer - .drain(..) - .map(|send_future| send_future.map_err(|e| SinkError::Pulsar(anyhow!(e)))), - ) - .await?; - - Ok(()) - } } -impl FormattedSink for PulsarPayloadWriter { +impl<'w> FormattedSink for PulsarPayloadWriter<'w> { type K = String; type V = Vec; @@ -321,23 +314,33 @@ impl FormattedSink for PulsarPayloadWriter { } } -#[async_trait] -impl SinkWriter for PulsarSinkWriter { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { +impl AsyncTruncateSinkWriter for PulsarSinkWriter { + type DeliveryFuture = PulsarDeliveryFuture; + + async fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> Result<()> { dispatch_sink_formatter_impl!(&self.formatter, formatter, { - self.payload_writer.write_chunk(chunk, formatter).await + let mut payload_writer = PulsarPayloadWriter { + producer: &mut self.producer, + add_future, + config: &self.config, + }; + // TODO: we can call `payload_writer.write_chunk(chunk, formatter)`, + // but for an unknown reason, this will greatly increase the compile time, + // by nearly 4x. May investigate it later. + for r in formatter.format_chunk(&chunk) { + let (key, value) = r?; + payload_writer + .write_inner( + key.map(SerTo::ser_to).transpose()?, + value.map(SerTo::ser_to).transpose()?, + ) + .await?; + } + Ok(()) }) } - - async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> { - Ok(()) - } - - async fn barrier(&mut self, is_checkpoint: bool) -> Result { - if is_checkpoint { - self.payload_writer.commit_inner().await?; - } - - Ok(()) - } } diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs index cc8ff74d0c9c5..af3ec3b981620 100644 --- a/src/connector/src/sink/redis.rs +++ b/src/connector/src/sink/redis.rs @@ -18,29 +18,30 @@ use anyhow::anyhow; use async_trait::async_trait; use redis::aio::Connection; use redis::{Client as RedisClient, Pipeline}; -use regex::Regex; use risingwave_common::array::StreamChunk; use risingwave_common::catalog::Schema; use serde_derive::{Deserialize, Serialize}; use serde_with::serde_as; +use super::catalog::SinkFormatDesc; +use super::encoder::template::TemplateEncoder; use super::formatter::SinkFormatterImpl; use super::writer::FormattedSink; -use super::{SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT}; +use super::{SinkError, SinkParam}; use crate::dispatch_sink_formatter_impl; -use crate::sink::writer::{LogSinkerOf, SinkWriterExt}; -use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkWriter, SinkWriterParam}; +use crate::sink::log_store::DeliveryFutureManagerAddFuture; +use crate::sink::writer::{ + AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, +}; +use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkWriterParam}; pub const REDIS_SINK: &str = "redis"; - +pub const KEY_FORMAT: &str = "key_format"; +pub const VALUE_FORMAT: &str = "value_format"; #[derive(Deserialize, Serialize, Debug, Clone)] pub struct RedisCommon { #[serde(rename = "redis.url")] pub url: String, - #[serde(rename = "redis.keyformat")] - pub key_format: Option, - #[serde(rename = "redis.valueformat")] - pub value_format: Option, } impl RedisCommon { @@ -54,23 +55,13 @@ impl RedisCommon { pub struct RedisConfig { #[serde(flatten)] pub common: RedisCommon, - - pub r#type: String, // accept "append-only" or "upsert" } impl RedisConfig { pub fn from_hashmap(properties: HashMap) -> Result { let config = serde_json::from_value::(serde_json::to_value(properties).unwrap()) - .map_err(|e| SinkError::Config(anyhow!(e)))?; - if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT { - return Err(SinkError::Config(anyhow!( - "`{}` must be {}, or {}", - SINK_TYPE_OPTION, - SINK_TYPE_APPEND_ONLY, - SINK_TYPE_UPSERT - ))); - } + .map_err(|e| SinkError::Config(anyhow!("{:?}", e)))?; Ok(config) } } @@ -79,28 +70,10 @@ impl RedisConfig { pub struct RedisSink { config: RedisConfig, schema: Schema, - is_append_only: bool, pk_indices: Vec, -} - -fn check_string_format(format: &Option, set: &HashSet) -> Result<()> { - if let Some(format) = format { - // We will check if the string inside {} corresponds to a column name in rw. - // In other words, the content within {} should exclusively consist of column names from rw, - // which means '{{column_name}}' or '{{column_name1},{column_name2}}' would be incorrect. - let re = Regex::new(r"\{([^}]*)\}").unwrap(); - if !re.is_match(format) { - return Err(SinkError::Redis( - "Can't find {} in key_format or value_format".to_string(), - )); - } - for capture in re.captures_iter(format) { - if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){ - return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str()))) - } - } - } - Ok(()) + format_desc: SinkFormatDesc, + db_name: String, + sink_from_name: String, } #[async_trait] @@ -117,27 +90,33 @@ impl TryFrom for RedisSink { Ok(Self { config, schema: param.schema(), - is_append_only: param.sink_type.is_append_only(), pk_indices: param.downstream_pk, + format_desc: param + .format_desc + .ok_or_else(|| SinkError::Config(anyhow!("missing FORMAT ... ENCODE ...")))?, + db_name: param.db_name, + sink_from_name: param.sink_from_name, }) } } impl Sink for RedisSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf; + type LogSinker = AsyncTruncateLogSinkerOf; const SINK_NAME: &'static str = "redis"; - async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { + async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result { Ok(RedisSinkWriter::new( self.config.clone(), self.schema.clone(), self.pk_indices.clone(), - self.is_append_only, + &self.format_desc, + self.db_name.clone(), + self.sink_from_name.clone(), ) .await? - .into_log_sinker(writer_param.sink_metrics)) + .into_log_sinker(usize::MAX)) } async fn validate(&self) -> Result<()> { @@ -157,8 +136,23 @@ impl Sink for RedisSink { .filter(|(k, _)| self.pk_indices.contains(k)) .map(|(_, v)| v.name.clone()) .collect(); - check_string_format(&self.config.common.key_format, &pk_set)?; - check_string_format(&self.config.common.value_format, &all_set)?; + if matches!( + self.format_desc.encode, + super::catalog::SinkEncode::Template + ) { + let key_format = self.format_desc.options.get(KEY_FORMAT).ok_or_else(|| { + SinkError::Config(anyhow!( + "Cannot find 'key_format',please set it or use JSON" + )) + })?; + let value_format = self.format_desc.options.get(VALUE_FORMAT).ok_or_else(|| { + SinkError::Config(anyhow!( + "Cannot find 'value_format',please set it or use JSON" + )) + })?; + TemplateEncoder::check_string_format(key_format, &pk_set)?; + TemplateEncoder::check_string_format(value_format, &all_set)?; + } Ok(()) } } @@ -166,7 +160,6 @@ impl Sink for RedisSink { pub struct RedisSinkWriter { epoch: u64, schema: Schema, - is_append_only: bool, pk_indices: Vec, formatter: SinkFormatterImpl, payload_writer: RedisSinkPayloadWriter, @@ -220,21 +213,23 @@ impl RedisSinkWriter { config: RedisConfig, schema: Schema, pk_indices: Vec, - is_append_only: bool, + format_desc: &SinkFormatDesc, + db_name: String, + sink_from_name: String, ) -> Result { let payload_writer = RedisSinkPayloadWriter::new(config.clone()).await?; - let formatter = SinkFormatterImpl::new_with_redis( + let formatter = SinkFormatterImpl::new( + format_desc, schema.clone(), pk_indices.clone(), - is_append_only, - config.common.key_format, - config.common.value_format, - )?; + db_name, + sink_from_name, + ) + .await?; Ok(Self { schema, pk_indices, - is_append_only, epoch: 0, formatter, payload_writer, @@ -242,24 +237,22 @@ impl RedisSinkWriter { } #[cfg(test)] - pub fn mock( + pub async fn mock( schema: Schema, pk_indices: Vec, - is_append_only: bool, - key_format: Option, - value_format: Option, + format_desc: &SinkFormatDesc, ) -> Result { - let formatter = SinkFormatterImpl::new_with_redis( + let formatter = SinkFormatterImpl::new( + format_desc, schema.clone(), pk_indices.clone(), - is_append_only, - key_format, - value_format, - )?; + "d1".to_string(), + "t1".to_string(), + ) + .await?; Ok(Self { schema, pk_indices, - is_append_only, epoch: 0, formatter, payload_writer: RedisSinkPayloadWriter::mock(), @@ -267,29 +260,22 @@ impl RedisSinkWriter { } } -#[async_trait] -impl SinkWriter for RedisSinkWriter { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> { +impl AsyncTruncateSinkWriter for RedisSinkWriter { + async fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> Result<()> { dispatch_sink_formatter_impl!(&self.formatter, formatter, { self.payload_writer.write_chunk(chunk, formatter).await }) } - - async fn begin_epoch(&mut self, epoch: u64) -> Result<()> { - self.epoch = epoch; - Ok(()) - } - - async fn barrier(&mut self, is_checkpoint: bool) -> Result<()> { - if is_checkpoint { - self.payload_writer.commit().await?; - } - Ok(()) - } } #[cfg(test)] mod test { + use std::collections::BTreeMap; + use rdkafka::message::FromBytes; use risingwave_common::array::{Array, I32Array, Op, StreamChunk, Utf8Array}; use risingwave_common::catalog::{Field, Schema}; @@ -297,6 +283,8 @@ mod test { use risingwave_common::util::iter_util::ZipEqDebug; use super::*; + use crate::sink::catalog::{SinkEncode, SinkFormat}; + use crate::sink::log_store::DeliveryFutureManager; #[tokio::test] async fn test_write() { @@ -315,8 +303,15 @@ mod test { }, ]); - let mut redis_sink_writer = - RedisSinkWriter::mock(schema, vec![0], true, None, None).unwrap(); + let format_desc = SinkFormatDesc { + format: SinkFormat::AppendOnly, + encode: SinkEncode::Json, + options: BTreeMap::default(), + }; + + let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc) + .await + .unwrap(); let chunk_a = StreamChunk::new( vec![Op::Insert, Op::Insert, Op::Insert], @@ -326,8 +321,10 @@ mod test { ], ); + let mut manager = DeliveryFutureManager::new(0); + redis_sink_writer - .write_batch(chunk_a) + .write_chunk(chunk_a, manager.start_write_chunk(0, 0)) .await .expect("failed to write batch"); let expected_a = @@ -367,14 +364,23 @@ mod test { }, ]); - let mut redis_sink_writer = RedisSinkWriter::mock( - schema, - vec![0], - true, - Some("key-{id}".to_string()), - Some("values:{id:{id},name:{name}}".to_string()), - ) - .unwrap(); + let mut btree_map = BTreeMap::default(); + btree_map.insert(KEY_FORMAT.to_string(), "key-{id}".to_string()); + btree_map.insert( + VALUE_FORMAT.to_string(), + "values:{id:{id},name:{name}}".to_string(), + ); + let format_desc = SinkFormatDesc { + format: SinkFormat::AppendOnly, + encode: SinkEncode::Template, + options: btree_map, + }; + + let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc) + .await + .unwrap(); + + let mut future_manager = DeliveryFutureManager::new(0); let chunk_a = StreamChunk::new( vec![Op::Insert, Op::Insert, Op::Insert], @@ -385,7 +391,7 @@ mod test { ); redis_sink_writer - .write_batch(chunk_a) + .write_chunk(chunk_a, future_manager.start_write_chunk(0, 0)) .await .expect("failed to write batch"); let expected_a = vec![ diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs index ad182e734a33a..3c52cb720dbd4 100644 --- a/src/connector/src/sink/remote.rs +++ b/src/connector/src/sink/remote.rs @@ -13,17 +13,23 @@ // limitations under the License. use std::collections::HashMap; +use std::fmt::Formatter; +use std::future::Future; use std::marker::PhantomData; use std::ops::Deref; +use std::time::Instant; use anyhow::anyhow; use async_trait::async_trait; +use futures::stream::Peekable; +use futures::{StreamExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; use jni::objects::{JByteArray, JValue, JValueOwned}; use prost::Message; use risingwave_common::array::StreamChunk; use risingwave_common::error::anyhow_error; use risingwave_common::types::DataType; +use risingwave_common::util::await_future_with_monitor_error_stream; use risingwave_jni_core::jvm_runtime::JVM; use risingwave_pb::connector_service::sink_coordinator_stream_request::{ CommitMetadata, StartCoordinator, @@ -43,15 +49,17 @@ use risingwave_pb::connector_service::{ }; use tokio::sync::mpsc; use tokio::sync::mpsc::{Receiver, Sender}; +use tokio_stream::wrappers::ReceiverStream; use tracing::warn; use super::encoder::{JsonEncoder, RowEncoder}; use crate::sink::coordinate::CoordinatedSinkWriter; use crate::sink::encoder::TimestampHandlingMode; +use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset}; use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt}; use crate::sink::{ - DummySinkCommitCoordinator, Result, Sink, SinkCommitCoordinator, SinkError, SinkMetrics, - SinkParam, SinkWriterParam, + DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkCommitCoordinator, SinkError, + SinkMetrics, SinkParam, SinkWriterParam, }; use crate::ConnectorParams; @@ -101,18 +109,12 @@ impl TryFrom for RemoteSink { impl Sink for RemoteSink { type Coordinator = DummySinkCommitCoordinator; - type LogSinker = LogSinkerOf>; + type LogSinker = RemoteLogSinker; const SINK_NAME: &'static str = R::SINK_NAME; async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result { - Ok(RemoteSinkWriter::new( - self.param.clone(), - writer_param.connector_params, - writer_param.sink_metrics.clone(), - ) - .await? - .into_log_sinker(writer_param.sink_metrics)) + RemoteLogSinker::new(self.param.clone(), writer_param).await } async fn validate(&self) -> Result<()> { @@ -192,6 +194,140 @@ impl Sink for RemoteSink { } } +pub struct RemoteLogSinker { + writer: RemoteSinkWriter, + sink_metrics: SinkMetrics, +} + +impl RemoteLogSinker { + async fn new(sink_param: SinkParam, writer_param: SinkWriterParam) -> Result { + let writer = RemoteSinkWriter::new( + sink_param, + writer_param.connector_params, + writer_param.sink_metrics.clone(), + ) + .await?; + let sink_metrics = writer_param.sink_metrics; + Ok(RemoteLogSinker { + writer, + sink_metrics, + }) + } +} + +/// Await the given future while monitoring on error of the receiver stream. +async fn await_future_with_monitor_receiver_err>>( + receiver: &mut SinkWriterStreamJniReceiver, + future: F, +) -> Result { + match await_future_with_monitor_error_stream(&mut receiver.response_stream, future).await { + Ok(result) => result, + Err(None) => Err(SinkError::Remote(anyhow!("end of remote receiver stream"))), + Err(Some(err)) => Err(SinkError::Internal(err)), + } +} + +#[async_trait] +impl LogSinker for RemoteLogSinker { + async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> { + // Note: this is a total copy of the implementation of LogSinkerOf, + // except that we monitor the future of `log_reader.next_item` with await_future_with_monitor_receiver_err + // to monitor the error in the response stream. + + let mut sink_writer = self.writer; + let sink_metrics = self.sink_metrics; + #[derive(Debug)] + enum LogConsumerState { + /// Mark that the log consumer is not initialized yet + Uninitialized, + + /// Mark that a new epoch has begun. + EpochBegun { curr_epoch: u64 }, + + /// Mark that the consumer has just received a barrier + BarrierReceived { prev_epoch: u64 }, + } + + let mut state = LogConsumerState::Uninitialized; + + log_reader.init().await?; + + loop { + let (epoch, item): (u64, LogStoreReadItem) = await_future_with_monitor_receiver_err( + &mut sink_writer.stream_handle.response_rx, + log_reader.next_item().map_err(SinkError::Internal), + ) + .await?; + if let LogStoreReadItem::UpdateVnodeBitmap(_) = &item { + match &state { + LogConsumerState::BarrierReceived { .. } => {} + _ => unreachable!( + "update vnode bitmap can be accepted only right after \ + barrier, but current state is {:?}", + state + ), + } + } + // begin_epoch when not previously began + state = match state { + LogConsumerState::Uninitialized => { + sink_writer.begin_epoch(epoch).await?; + LogConsumerState::EpochBegun { curr_epoch: epoch } + } + LogConsumerState::EpochBegun { curr_epoch } => { + assert!( + epoch >= curr_epoch, + "new epoch {} should not be below the current epoch {}", + epoch, + curr_epoch + ); + LogConsumerState::EpochBegun { curr_epoch: epoch } + } + LogConsumerState::BarrierReceived { prev_epoch } => { + assert!( + epoch > prev_epoch, + "new epoch {} should be greater than prev epoch {}", + epoch, + prev_epoch + ); + sink_writer.begin_epoch(epoch).await?; + LogConsumerState::EpochBegun { curr_epoch: epoch } + } + }; + match item { + LogStoreReadItem::StreamChunk { chunk, .. } => { + if let Err(e) = sink_writer.write_batch(chunk).await { + sink_writer.abort().await?; + return Err(e); + } + } + LogStoreReadItem::Barrier { is_checkpoint } => { + let prev_epoch = match state { + LogConsumerState::EpochBegun { curr_epoch } => curr_epoch, + _ => unreachable!("epoch must have begun before handling barrier"), + }; + if is_checkpoint { + let start_time = Instant::now(); + sink_writer.barrier(true).await?; + sink_metrics + .sink_commit_duration_metrics + .observe(start_time.elapsed().as_millis() as f64); + log_reader + .truncate(TruncateOffset::Barrier { epoch }) + .await?; + } else { + sink_writer.barrier(false).await?; + } + state = LogConsumerState::BarrierReceived { prev_epoch } + } + LogStoreReadItem::UpdateVnodeBitmap(vnode_bitmap) => { + sink_writer.update_vnode_bitmap(vnode_bitmap).await?; + } + } + } + } +} + #[derive(Debug)] pub struct CoordinatedRemoteSink(pub RemoteSink); @@ -286,14 +422,11 @@ impl SinkCoordinatorStreamJniHandle { } } -const DEFAULT_CHANNEL_SIZE: usize = 16; -#[derive(Debug)] -pub struct SinkWriterStreamJniHandle { +struct SinkWriterStreamJniSender { request_tx: Sender, - response_rx: Receiver, } -impl SinkWriterStreamJniHandle { +impl SinkWriterStreamJniSender { pub async fn start_epoch(&mut self, epoch: u64) -> Result<()> { self.request_tx .send(SinkWriterStreamRequest { @@ -316,33 +449,29 @@ impl SinkWriterStreamJniHandle { .map_err(|err| SinkError::Internal(err.into())) } - pub async fn barrier(&mut self, epoch: u64) -> Result<()> { + pub async fn barrier(&mut self, epoch: u64, is_checkpoint: bool) -> Result<()> { self.request_tx .send(SinkWriterStreamRequest { request: Some(SinkRequest::Barrier(Barrier { epoch, - is_checkpoint: false, + is_checkpoint, })), }) .await .map_err(|err| SinkError::Internal(err.into())) } +} - pub async fn commit(&mut self, epoch: u64) -> Result { - self.request_tx - .send(SinkWriterStreamRequest { - request: Some(SinkRequest::Barrier(Barrier { - epoch, - is_checkpoint: true, - })), - }) - .await - .map_err(|err| SinkError::Internal(err.into()))?; +struct SinkWriterStreamJniReceiver { + response_stream: Peekable>>, +} - match self.response_rx.recv().await { - Some(SinkWriterStreamResponse { +impl SinkWriterStreamJniReceiver { + async fn next_commit_response(&mut self) -> Result { + match self.response_stream.try_next().await { + Ok(Some(SinkWriterStreamResponse { response: Some(sink_writer_stream_response::Response::Commit(rsp)), - }) => Ok(rsp), + })) => Ok(rsp), msg => Err(SinkError::Internal(anyhow!( "should get Sync response but get {:?}", msg @@ -351,6 +480,53 @@ impl SinkWriterStreamJniHandle { } } +const DEFAULT_CHANNEL_SIZE: usize = 16; +struct SinkWriterStreamJniHandle { + request_tx: SinkWriterStreamJniSender, + response_rx: SinkWriterStreamJniReceiver, +} + +impl std::fmt::Debug for SinkWriterStreamJniHandle { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SinkWriterStreamJniHandle").finish() + } +} + +impl SinkWriterStreamJniHandle { + async fn start_epoch(&mut self, epoch: u64) -> Result<()> { + await_future_with_monitor_receiver_err( + &mut self.response_rx, + self.request_tx.start_epoch(epoch), + ) + .await + } + + async fn write_batch(&mut self, epoch: u64, batch_id: u64, payload: Payload) -> Result<()> { + await_future_with_monitor_receiver_err( + &mut self.response_rx, + self.request_tx.write_batch(epoch, batch_id, payload), + ) + .await + } + + async fn barrier(&mut self, epoch: u64) -> Result<()> { + await_future_with_monitor_receiver_err( + &mut self.response_rx, + self.request_tx.barrier(epoch, false), + ) + .await + } + + async fn commit(&mut self, epoch: u64) -> Result { + await_future_with_monitor_receiver_err( + &mut self.response_rx, + self.request_tx.barrier(epoch, true), + ) + .await?; + self.response_rx.next_commit_response().await + } +} + pub type RemoteSinkWriter = RemoteSinkWriterInner<(), R>; pub type CoordinatedRemoteSinkWriter = RemoteSinkWriterInner, R>; @@ -374,10 +550,7 @@ impl RemoteSinkWriterInner { let (request_tx, request_rx) = mpsc::channel(DEFAULT_CHANNEL_SIZE); let (response_tx, response_rx) = mpsc::channel(DEFAULT_CHANNEL_SIZE); - let mut stream_handle = SinkWriterStreamJniHandle { - request_tx, - response_rx, - }; + let mut response_stream = ReceiverStream::new(response_rx).peekable(); std::thread::spawn(move || { let mut env = JVM.get_or_init().unwrap().attach_current_thread().unwrap(); @@ -388,7 +561,10 @@ impl RemoteSinkWriterInner { "(JJ)V", &[ JValue::from(&request_rx as *const Receiver as i64), - JValue::from(&response_tx as *const Sender as i64), + JValue::from( + &response_tx as *const Sender> + as i64, + ), ], ); @@ -410,8 +586,7 @@ impl RemoteSinkWriterInner { }; // First request - stream_handle - .request_tx + request_tx .send(sink_writer_stream_request) .await .map_err(|err| { @@ -423,17 +598,18 @@ impl RemoteSinkWriterInner { })?; // First response - match stream_handle.response_rx.recv().await { - Some(SinkWriterStreamResponse { + match response_stream.try_next().await { + Ok(Some(SinkWriterStreamResponse { response: Some(sink_writer_stream_response::Response::Start(_)), - }) => {} - msg => { + })) => {} + Ok(msg) => { return Err(SinkError::Internal(anyhow!( "should get start response for connector `{}` but get {:?}", R::SINK_NAME, msg ))); } + Err(e) => return Err(SinkError::Internal(e)), }; tracing::trace!( @@ -444,6 +620,11 @@ impl RemoteSinkWriterInner { let schema = param.schema(); + let stream_handle = SinkWriterStreamJniHandle { + request_tx: SinkWriterStreamJniSender { request_tx }, + response_rx: SinkWriterStreamJniReceiver { response_stream }, + }; + Ok(Self { properties: param.properties, epoch: None, @@ -458,7 +639,7 @@ impl RemoteSinkWriterInner { #[cfg(test)] fn for_test( - response_receiver: Receiver, + response_receiver: Receiver>, request_sender: Sender, ) -> RemoteSinkWriter { use risingwave_common::catalog::{Field, Schema}; @@ -480,8 +661,12 @@ impl RemoteSinkWriterInner { ]); let stream_handle = SinkWriterStreamJniHandle { - request_tx: request_sender, - response_rx: response_receiver, + request_tx: SinkWriterStreamJniSender { + request_tx: request_sender, + }, + response_rx: SinkWriterStreamJniReceiver { + response_stream: ReceiverStream::new(response_receiver).peekable(), + }, }; RemoteSinkWriter { @@ -828,12 +1013,12 @@ mod test { // test commit response_sender - .send(SinkWriterStreamResponse { + .send(Ok(SinkWriterStreamResponse { response: Some(Response::Commit(CommitResponse { epoch: 2022, metadata: None, })), - }) + })) .await .expect("test failed: failed to sync epoch"); sink.barrier(true).await.unwrap(); diff --git a/src/connector/src/sink/writer.rs b/src/connector/src/sink/writer.rs index 37ad452831b2e..64261bb42ab48 100644 --- a/src/connector/src/sink/writer.rs +++ b/src/connector/src/sink/writer.rs @@ -12,17 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::future::{Future, Ready}; +use std::pin::pin; use std::sync::Arc; use std::time::Instant; use async_trait::async_trait; +use futures::future::{select, Either}; +use futures::TryFuture; use risingwave_common::array::StreamChunk; use risingwave_common::buffer::Bitmap; +use risingwave_common::util::drop_either_future; use crate::sink::encoder::SerTo; use crate::sink::formatter::SinkFormatter; -use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset}; -use crate::sink::{LogSinker, Result, SinkMetrics}; +use crate::sink::log_store::{ + DeliveryFutureManager, DeliveryFutureManagerAddFuture, LogReader, LogStoreReadItem, + TruncateOffset, +}; +use crate::sink::{LogSinker, Result, SinkError, SinkMetrics}; #[async_trait] pub trait SinkWriter: Send + 'static { @@ -48,22 +56,17 @@ pub trait SinkWriter: Send + 'static { } } -// TODO: remove this trait after KafkaSinkWriter implements SinkWriter -#[async_trait] -// An old version of SinkWriter for backward compatibility -pub trait SinkWriterV1: Send + 'static { - async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()>; - - // the following interface is for transactions, if not supported, return Ok(()) - // start a transaction with epoch number. Note that epoch number should be increasing. - async fn begin_epoch(&mut self, epoch: u64) -> Result<()>; +pub type DummyDeliveryFuture = Ready>; - // commits the current transaction and marks all messages in the transaction success. - async fn commit(&mut self) -> Result<()>; +pub trait AsyncTruncateSinkWriter: Send + 'static { + type DeliveryFuture: TryFuture + Unpin + Send + 'static = + DummyDeliveryFuture; - // aborts the current transaction because some error happens. we should rollback to the last - // commit point. - async fn abort(&mut self) -> Result<()>; + fn write_chunk<'a>( + &'a mut self, + chunk: StreamChunk, + add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>, + ) -> impl Future> + Send + 'a; } /// A free-form sink that may output in multiple formats and encodings. Examples include kafka, @@ -104,12 +107,12 @@ pub trait FormattedSink { } } -pub struct LogSinkerOf> { +pub struct LogSinkerOf { writer: W, sink_metrics: SinkMetrics, } -impl> LogSinkerOf { +impl LogSinkerOf { pub fn new(writer: W, sink_metrics: SinkMetrics) -> Self { LogSinkerOf { writer, @@ -118,6 +121,7 @@ impl> LogSinkerOf { } } +#[async_trait] impl> LogSinker for LogSinkerOf { async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> { let mut sink_writer = self.writer; @@ -222,3 +226,64 @@ where } } } + +pub struct AsyncTruncateLogSinkerOf { + writer: W, + future_manager: DeliveryFutureManager, +} + +impl AsyncTruncateLogSinkerOf { + pub fn new(writer: W, max_future_count: usize) -> Self { + AsyncTruncateLogSinkerOf { + writer, + future_manager: DeliveryFutureManager::new(max_future_count), + } + } +} + +#[async_trait] +impl LogSinker for AsyncTruncateLogSinkerOf { + async fn consume_log_and_sink(mut self, mut log_reader: impl LogReader) -> Result<()> { + log_reader.init().await?; + loop { + let select_result = drop_either_future( + select( + pin!(log_reader.next_item()), + pin!(self.future_manager.next_truncate_offset()), + ) + .await, + ); + match select_result { + Either::Left(item_result) => { + let (epoch, item) = item_result?; + match item { + LogStoreReadItem::StreamChunk { chunk_id, chunk } => { + let add_future = self.future_manager.start_write_chunk(epoch, chunk_id); + self.writer.write_chunk(chunk, add_future).await?; + } + LogStoreReadItem::Barrier { + is_checkpoint: _is_checkpoint, + } => { + self.future_manager.add_barrier(epoch); + } + LogStoreReadItem::UpdateVnodeBitmap(_) => {} + } + } + Either::Right(offset_result) => { + let offset = offset_result?; + log_reader.truncate(offset).await?; + } + } + } + } +} + +#[easy_ext::ext(AsyncTruncateSinkWriterExt)] +impl T +where + T: AsyncTruncateSinkWriter + Sized, +{ + pub fn into_log_sinker(self, max_future_count: usize) -> AsyncTruncateLogSinkerOf { + AsyncTruncateLogSinkerOf::new(self, max_future_count) + } +} diff --git a/src/expr/core/src/lib.rs b/src/expr/core/src/lib.rs index c2f46d5632274..b49c4ae161dfc 100644 --- a/src/expr/core/src/lib.rs +++ b/src/expr/core/src/lib.rs @@ -17,7 +17,7 @@ #![feature(lint_reasons)] #![feature(iterator_try_collect)] #![feature(lazy_cell)] -#![feature(generators)] +#![feature(coroutines)] #![feature(arc_unwrap_or_clone)] #![feature(never_type)] diff --git a/src/expr/impl/src/lib.rs b/src/expr/impl/src/lib.rs index a5906e4320282..6ea82d30ac5f1 100644 --- a/src/expr/impl/src/lib.rs +++ b/src/expr/impl/src/lib.rs @@ -28,7 +28,7 @@ #![feature(exclusive_range_pattern)] #![feature(lazy_cell)] #![feature(round_ties_even)] -#![feature(generators)] +#![feature(coroutines)] #![feature(test)] #![feature(arc_unwrap_or_clone)] diff --git a/src/frontend/planner_test/tests/testdata/output/append_only.yaml b/src/frontend/planner_test/tests/testdata/output/append_only.yaml index 184abd564c32b..d693d3fc942df 100644 --- a/src/frontend/planner_test/tests/testdata/output/append_only.yaml +++ b/src/frontend/planner_test/tests/testdata/output/append_only.yaml @@ -14,11 +14,12 @@ select t1.v1 as id, v2, v3 from t1 join t2 on t1.v1=t2.v1; stream_plan: |- StreamMaterialize { columns: [id, v2, v3, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, id], pk_columns: [t1._row_id, t2._row_id, id], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v3, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v1) } - └─StreamTableScan { table: t2, columns: [t2.v1, t2.v3, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) } + └─StreamHashJoin [append_only] { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v3, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v1) } + └─StreamTableScan { table: t2, columns: [t2.v1, t2.v3, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - sql: | create table t1 (v1 int, v2 int) append only; select v1 from t1 order by v1 limit 3 offset 3; diff --git a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml index fde09972bb66b..ce6724dc91c37 100644 --- a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml +++ b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml @@ -234,9 +234,10 @@ └─BatchValues { rows: [] } stream_plan: |- StreamMaterialize { columns: [v, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v], pk_columns: [t._row_id, t._row_id#1, v], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] } - ├─StreamExchange { dist: HashShard(t.v) } - │ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(t.v) } - └─StreamFilter { predicate: false:Boolean } - └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v, t._row_id, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] } + ├─StreamExchange { dist: HashShard(t.v) } + │ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v) } + └─StreamFilter { predicate: false:Boolean } + └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml index 6f4f8a673c996..e7196f7cf4fea 100644 --- a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml @@ -134,141 +134,145 @@ └─BatchScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], distribution: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } stream_plan: |- StreamMaterialize { columns: [s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id(hidden), stock.s_w_id(hidden), min(stock.s_quantity)(hidden), $expr2(hidden), region.r_regionkey(hidden), supplier.s_nationkey(hidden)], stream_key: [stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_columns: [n_name, s_name, i_id, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] } - │ └─StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] } - │ ├─StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] } - │ │ └─StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] } - │ │ └─StreamExchange { dist: HashShard(stock.s_i_id) } - │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } - │ │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } - │ │ │ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } - │ │ │ ├─StreamExchange { dist: HashShard(region.r_regionkey) } - │ │ │ │ └─StreamProject { exprs: [region.r_regionkey] } - │ │ │ │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) } - │ │ │ │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } - │ │ │ └─StreamExchange { dist: HashShard(nation.n_regionkey) } - │ │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } - │ │ │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } - │ │ │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } - │ │ │ └─StreamExchange { dist: HashShard(nation.n_nationkey) } - │ │ │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } - │ │ └─StreamExchange { dist: HashShard($expr1) } - │ │ └─StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] } - │ │ └─StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) } - │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } - │ └─StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all } - │ ├─StreamExchange { dist: HashShard(item.i_id) } - │ │ └─StreamProject { exprs: [item.i_id, item.i_name] } - │ │ └─StreamFilter { predicate: Like(item.i_data, '%b':Varchar) } - │ │ └─StreamTableScan { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } - │ └─StreamExchange { dist: HashShard(stock.s_i_id) } - │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } - └─StreamExchange { dist: HashShard(supplier.s_suppkey) } - └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] } - ├─StreamExchange { dist: HashShard(region.r_regionkey) } - │ └─StreamProject { exprs: [region.r_regionkey] } - │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) } - │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } - └─StreamExchange { dist: HashShard(nation.n_regionkey) } - └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } - ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } - │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } - └─StreamExchange { dist: HashShard(nation.n_nationkey) } - └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } + └─StreamExchange { dist: HashShard(stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey) } + └─StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] } + │ └─StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] } + │ ├─StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] } + │ │ └─StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] } + │ │ └─StreamExchange { dist: HashShard(stock.s_i_id) } + │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } + │ │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } + │ │ │ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } + │ │ │ ├─StreamExchange { dist: HashShard(region.r_regionkey) } + │ │ │ │ └─StreamProject { exprs: [region.r_regionkey] } + │ │ │ │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) } + │ │ │ │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } + │ │ │ └─StreamExchange { dist: HashShard(nation.n_regionkey) } + │ │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } + │ │ │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } + │ │ │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } + │ │ │ └─StreamExchange { dist: HashShard(nation.n_nationkey) } + │ │ │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } + │ │ └─StreamExchange { dist: HashShard($expr1) } + │ │ └─StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] } + │ │ └─StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) } + │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } + │ └─StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all } + │ ├─StreamExchange { dist: HashShard(item.i_id) } + │ │ └─StreamProject { exprs: [item.i_id, item.i_name] } + │ │ └─StreamFilter { predicate: Like(item.i_data, '%b':Varchar) } + │ │ └─StreamTableScan { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } + │ └─StreamExchange { dist: HashShard(stock.s_i_id) } + │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } + └─StreamExchange { dist: HashShard(supplier.s_suppkey) } + └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] } + ├─StreamExchange { dist: HashShard(region.r_regionkey) } + │ └─StreamProject { exprs: [region.r_regionkey] } + │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) } + │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } + └─StreamExchange { dist: HashShard(nation.n_regionkey) } + └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } + ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } + │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } + └─StreamExchange { dist: HashShard(nation.n_nationkey) } + └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id(hidden), stock.s_w_id(hidden), min(stock.s_quantity)(hidden), $expr2(hidden), region.r_regionkey(hidden), supplier.s_nationkey(hidden)], stream_key: [stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_columns: [n_name, s_name, i_id, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([2]) from 1 - └── StreamExchange Hash([0]) from 11 + └── StreamExchange Hash([8, 9, 10, 11, 12, 13]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([2]) from 2 + └── StreamExchange Hash([0]) from 12 + + Fragment 2 StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] } └── StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } ├── StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] } │ └── StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] } { intermediate state table: 9, state tables: [ 8 ], distinct tables: [] } - │ └── StreamExchange Hash([0]) from 2 + │ └── StreamExchange Hash([0]) from 3 └── StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all } { left table: 26, right table: 28, left degree table: 27, right degree table: 29 } - ├── StreamExchange Hash([0]) from 9 - └── StreamExchange Hash([0]) from 10 - - Fragment 2 - StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 } - ├── StreamExchange Hash([0]) from 3 - └── StreamExchange Hash([2]) from 8 + ├── StreamExchange Hash([0]) from 10 + └── StreamExchange Hash([0]) from 11 Fragment 3 - StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 } + StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 } ├── StreamExchange Hash([0]) from 4 - └── StreamExchange Hash([1]) from 5 + └── StreamExchange Hash([2]) from 9 Fragment 4 + StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 } + ├── StreamExchange Hash([0]) from 5 + └── StreamExchange Hash([1]) from 6 + + Fragment 5 StreamProject { exprs: [region.r_regionkey] } └── StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) } └── Chain { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } { state table: 18 } ├── Upstream └── BatchPlanNode - Fragment 5 + Fragment 6 StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } { left table: 19, right table: 21, left degree table: 20, right degree table: 22 } - ├── StreamExchange Hash([1]) from 6 - └── StreamExchange Hash([0]) from 7 + ├── StreamExchange Hash([1]) from 7 + └── StreamExchange Hash([0]) from 8 - Fragment 6 + Fragment 7 Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 23 } ├── Upstream └── BatchPlanNode - Fragment 7 + Fragment 8 Chain { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 24 } ├── Upstream └── BatchPlanNode - Fragment 8 + Fragment 9 StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] } └── StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) } └── Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 25 } ├── Upstream └── BatchPlanNode - Fragment 9 + Fragment 10 StreamProject { exprs: [item.i_id, item.i_name] } └── StreamFilter { predicate: Like(item.i_data, '%b':Varchar) } └── Chain { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } { state table: 30 } ├── Upstream └── BatchPlanNode - Fragment 10 + Fragment 11 Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 31 } ├── Upstream └── BatchPlanNode - Fragment 11 + Fragment 12 StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] } { left table: 32, right table: 34, left degree table: 33, right degree table: 35 } - ├── StreamExchange Hash([0]) from 12 - └── StreamExchange Hash([6]) from 13 + ├── StreamExchange Hash([0]) from 13 + └── StreamExchange Hash([6]) from 14 - Fragment 12 + Fragment 13 StreamProject { exprs: [region.r_regionkey] } └── StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) } └── Chain { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } { state table: 36 } ├── Upstream └── BatchPlanNode - Fragment 13 + Fragment 14 StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } { left table: 37, right table: 39, left degree table: 38, right degree table: 40 } - ├── StreamExchange Hash([3]) from 14 - └── StreamExchange Hash([0]) from 15 + ├── StreamExchange Hash([3]) from 15 + └── StreamExchange Hash([0]) from 16 - Fragment 14 + Fragment 15 Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 41 } ├── Upstream └── BatchPlanNode - Fragment 15 + Fragment 16 Chain { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 42 } ├── Upstream └── BatchPlanNode @@ -359,7 +363,7 @@ Table 42 { columns: [ vnode, n_nationkey, nation_backfill_finished, nation_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 } - Table 4294967294 { columns: [ s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey ], primary key: [ $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $12 ASC, $13 ASC, $11 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], distribution key: [ 11 ], read pk prefix len hint: 9 } + Table 4294967294 { columns: [ s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey ], primary key: [ $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $12 ASC, $13 ASC, $11 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], distribution key: [ 8, 9, 10, 11, 12, 13 ], read pk prefix len hint: 9 } - id: ch_q3 before: @@ -2496,59 +2500,63 @@ └─LogicalScan { table: revenue1, columns: [revenue1.total_revenue] } stream_plan: |- StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no(hidden)], stream_key: [s_suppkey, revenue1.supplier_no, total_revenue], pk_columns: [s_suppkey, revenue1.supplier_no, total_revenue], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } - ├─StreamExchange { dist: HashShard(revenue1.total_revenue) } - │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } - │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } - │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } - │ └─StreamExchange { dist: HashShard($expr1) } - │ └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] } - │ └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } - └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) } - └─StreamProject { exprs: [max(max(revenue1.total_revenue))] } - └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } - └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] } - └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] } - └─StreamTableScan { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } + └─StreamExchange { dist: HashShard(supplier.s_suppkey, revenue1.total_revenue, revenue1.supplier_no) } + └─StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } + ├─StreamExchange { dist: HashShard(revenue1.total_revenue) } + │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } + │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } + │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } + │ └─StreamExchange { dist: HashShard($expr1) } + │ └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] } + │ └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } + └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) } + └─StreamProject { exprs: [max(max(revenue1.total_revenue))] } + └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] } + └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] } + └─StreamTableScan { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no(hidden)], stream_key: [s_suppkey, revenue1.supplier_no, total_revenue], pk_columns: [s_suppkey, revenue1.supplier_no, total_revenue], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([4]) from 1 - └── StreamExchange Hash([0]) from 4 + └── StreamExchange Hash([0, 4, 5]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([4]) from 2 + └── StreamExchange Hash([0]) from 5 + + Fragment 2 StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] } ├── left table: 4 ├── right table: 6 ├── left degree table: 5 ├── right degree table: 7 - ├── StreamExchange Hash([0]) from 2 - └── StreamExchange Hash([1]) from 3 + ├── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([1]) from 4 - Fragment 2 + Fragment 3 Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 8 } ├── Upstream └── BatchPlanNode - Fragment 3 + Fragment 4 StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] } └── Chain { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } { state table: 9 } ├── Upstream └── BatchPlanNode - Fragment 4 + Fragment 5 StreamProject { exprs: [max(max(revenue1.total_revenue))] } └── StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } { intermediate state table: 11, state tables: [ 10 ], distinct tables: [] } - └── StreamExchange Single from 5 + └── StreamExchange Single from 6 - Fragment 5 + Fragment 6 StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] } { intermediate state table: 13, state tables: [ 12 ], distinct tables: [] } └── StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] } └── Chain { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } { state table: 14 } @@ -2590,7 +2598,7 @@ Table 14 { columns: [ vnode, supplier_no, revenue1_backfill_finished, revenue1_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 } - Table 4294967294 { columns: [ s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no ], primary key: [ $0 ASC, $5 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 4 ], read pk prefix len hint: 3 } + Table 4294967294 { columns: [ s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no ], primary key: [ $0 ASC, $5 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 0, 4, 5 ], read pk prefix len hint: 3 } - id: ch_q16 before: @@ -3174,58 +3182,62 @@ └─BatchScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_delivery_d], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [s_name, s_address, supplier.s_suppkey(hidden), supplier.s_nationkey(hidden)], stream_key: [supplier.s_suppkey, supplier.s_nationkey], pk_columns: [s_name, supplier.s_suppkey, supplier.s_nationkey], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } - ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } - │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } - │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } - │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } - │ └─StreamExchange { dist: HashShard(nation.n_nationkey) } - │ └─StreamProject { exprs: [nation.n_nationkey] } - │ └─StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) } - │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } - └─StreamExchange { dist: HashShard($expr1) } - └─StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] } - └─StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) } - └─StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] } - └─StreamHashAgg { group_key: [stock.s_i_id, stock.s_w_id, stock.s_quantity], aggs: [sum(order_line.ol_quantity), count] } - └─StreamHashJoin { type: LeftSemi, predicate: stock.s_i_id = item.i_id, output: all } - ├─StreamHashJoin { type: Inner, predicate: stock.s_i_id = order_line.ol_i_id, output: [stock.s_i_id, stock.s_w_id, stock.s_quantity, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } - │ ├─StreamExchange { dist: HashShard(stock.s_i_id) } - │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } - │ └─StreamExchange { dist: HashShard(order_line.ol_i_id) } - │ └─StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } - │ └─StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) } - │ └─StreamTableScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) } - └─StreamExchange { dist: HashShard(item.i_id) } - └─StreamProject { exprs: [item.i_id] } - └─StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) } - └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } + └─StreamExchange { dist: HashShard(supplier.s_suppkey, supplier.s_nationkey) } + └─StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } + ├─StreamExchange { dist: HashShard(supplier.s_suppkey) } + │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } + │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) } + │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } + │ └─StreamExchange { dist: HashShard(nation.n_nationkey) } + │ └─StreamProject { exprs: [nation.n_nationkey] } + │ └─StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) } + │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } + └─StreamExchange { dist: HashShard($expr1) } + └─StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] } + └─StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) } + └─StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] } + └─StreamHashAgg { group_key: [stock.s_i_id, stock.s_w_id, stock.s_quantity], aggs: [sum(order_line.ol_quantity), count] } + └─StreamHashJoin { type: LeftSemi, predicate: stock.s_i_id = item.i_id, output: all } + ├─StreamHashJoin { type: Inner, predicate: stock.s_i_id = order_line.ol_i_id, output: [stock.s_i_id, stock.s_w_id, stock.s_quantity, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } + │ ├─StreamExchange { dist: HashShard(stock.s_i_id) } + │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } + │ └─StreamExchange { dist: HashShard(order_line.ol_i_id) } + │ └─StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } + │ └─StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) } + │ └─StreamTableScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) } + └─StreamExchange { dist: HashShard(item.i_id) } + └─StreamProject { exprs: [item.i_id] } + └─StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) } + └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [s_name, s_address, supplier.s_suppkey(hidden), supplier.s_nationkey(hidden)], stream_key: [supplier.s_suppkey, supplier.s_nationkey], pk_columns: [s_name, supplier.s_suppkey, supplier.s_nationkey], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 4 + └── StreamExchange Hash([2, 3]) from 1 Fragment 1 - StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } - ├── StreamExchange Hash([3]) from 2 - └── StreamExchange Hash([0]) from 3 + StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 5 Fragment 2 + StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } + ├── StreamExchange Hash([3]) from 3 + └── StreamExchange Hash([0]) from 4 + + Fragment 3 Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 8 } ├── Upstream └── BatchPlanNode - Fragment 3 + Fragment 4 StreamProject { exprs: [nation.n_nationkey] } └── StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) } └── Chain { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 9 } ├── Upstream └── BatchPlanNode - Fragment 4 + Fragment 5 StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] } └── StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) } └── StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] } @@ -3236,16 +3248,16 @@ │ ├── right table: 17 │ ├── left degree table: 16 │ ├── right degree table: 18 - │ ├── StreamExchange Hash([0]) from 5 - │ └── StreamExchange Hash([0]) from 6 - └── StreamExchange Hash([0]) from 7 + │ ├── StreamExchange Hash([0]) from 6 + │ └── StreamExchange Hash([0]) from 7 + └── StreamExchange Hash([0]) from 8 - Fragment 5 + Fragment 6 Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 19 } ├── Upstream └── BatchPlanNode - Fragment 6 + Fragment 7 StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] } └── StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) } └── Chain { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) } @@ -3253,7 +3265,7 @@ ├── Upstream └── BatchPlanNode - Fragment 7 + Fragment 8 StreamProject { exprs: [item.i_id] } └── StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) } └── Chain { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } { state table: 21 } @@ -3304,7 +3316,7 @@ Table 21 { columns: [ vnode, i_id, item_backfill_finished, item_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 } - Table 4294967294 { columns: [ s_name, s_address, supplier.s_suppkey, supplier.s_nationkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 2 ], read pk prefix len hint: 3 } + Table 4294967294 { columns: [ s_name, s_address, supplier.s_suppkey, supplier.s_nationkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 2, 3 ], read pk prefix len hint: 3 } - id: ch_q21 before: diff --git a/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml b/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml index 9c961429276a3..dece27002b19b 100644 --- a/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml +++ b/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml @@ -23,11 +23,12 @@ └─LogicalScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id] } stream_plan: |- StreamMaterialize { columns: [v3, v4, v1, t2._row_id(hidden), t1._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, v3], pk_columns: [t2._row_id, t1._row_id, v3], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t2._row_id, t1._row_id] } - ├─StreamExchange { dist: HashShard(t2.v3) } - │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - └─StreamExchange { dist: HashShard(t1.v1) } - └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v3, t2._row_id, t1._row_id) } + └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t2._row_id, t1._row_id] } + ├─StreamExchange { dist: HashShard(t2.v3) } + │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1) } + └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - sql: | create table t1 (v1 int, v2 int); create table t2 (v3 int, v4 int); @@ -79,8 +80,9 @@ └─LogicalValues { rows: [['cn':Varchar, 'China':Varchar], ['us':Varchar, 'United States':Varchar]], schema: Schema { fields: [*VALUES*_0.column_0:Varchar, *VALUES*_0.column_1:Varchar] } } stream_plan: |- StreamMaterialize { columns: [v, c, abbr, real, t._row_id(hidden), _row_id(hidden)], stream_key: [t._row_id, _row_id, c], pk_columns: [t._row_id, _row_id, c], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.c = *VALUES*_0.column_0, output: [t.v, t.c, *VALUES*_0.column_0, *VALUES*_0.column_1, t._row_id, _row_id] } - ├─StreamExchange { dist: HashShard(t.c) } - │ └─StreamTableScan { table: t, columns: [t.v, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(*VALUES*_0.column_0) } - └─StreamValues { rows: [['cn':Varchar, 'China':Varchar, 0:Int64], ['us':Varchar, 'United States':Varchar, 1:Int64]] } + └─StreamExchange { dist: HashShard(t.c, t._row_id, _row_id) } + └─StreamHashJoin { type: Inner, predicate: t.c = *VALUES*_0.column_0, output: [t.v, t.c, *VALUES*_0.column_0, *VALUES*_0.column_1, t._row_id, _row_id] } + ├─StreamExchange { dist: HashShard(t.c) } + │ └─StreamTableScan { table: t, columns: [t.v, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(*VALUES*_0.column_0) } + └─StreamValues { rows: [['cn':Varchar, 'China':Varchar, 0:Int64], ['us':Varchar, 'United States':Varchar, 1:Int64]] } diff --git a/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml b/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml index 85d76188f3e76..818fd88b30a20 100644 --- a/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml +++ b/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml @@ -963,31 +963,35 @@ └─BatchScan { table: a, columns: [a.k1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), a.k1(hidden)], stream_key: [ak1.a._row_id, ak1.k1], pk_columns: [ak1.a._row_id, ak1.k1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] } - ├─StreamExchange { dist: HashShard(ak1.k1) } - │ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } - └─StreamHashAgg { group_key: [a.k1], aggs: [count] } - └─StreamExchange { dist: HashShard(a.k1) } - └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } + └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1) } + └─StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] } + ├─StreamExchange { dist: HashShard(ak1.k1) } + │ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } + └─StreamHashAgg { group_key: [a.k1], aggs: [count] } + └─StreamExchange { dist: HashShard(a.k1) } + └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), a.k1(hidden)], stream_key: [ak1.a._row_id, ak1.k1], pk_columns: [ak1.a._row_id, ak1.k1], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([2, 3]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } { state table: 4 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 6 } ├── Upstream └── BatchPlanNode @@ -1022,7 +1026,7 @@ ├── columns: [ v, bv, ak1.a._row_id, ak1.k1, a.k1 ] ├── primary key: [ $2 ASC, $3 ASC ] ├── value indices: [ 0, 1, 2, 3, 4 ] - ├── distribution key: [ 3 ] + ├── distribution key: [ 2, 3 ] └── read pk prefix len hint: 2 - id: aggk1_join_Ak1_onk1 @@ -1054,31 +1058,35 @@ └─BatchScan { table: a, columns: [a.k1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v, bv, a.k1(hidden), ak1.a._row_id(hidden)], stream_key: [a.k1, ak1.a._row_id], pk_columns: [a.k1, ak1.a._row_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] } - ├─StreamHashAgg { group_key: [a.k1], aggs: [count] } - │ └─StreamExchange { dist: HashShard(a.k1) } - │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } - └─StreamExchange { dist: HashShard(ak1.k1) } - └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } + └─StreamExchange { dist: HashShard(a.k1, ak1.a._row_id) } + └─StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] } + ├─StreamHashAgg { group_key: [a.k1], aggs: [count] } + │ └─StreamExchange { dist: HashShard(a.k1) } + │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } + └─StreamExchange { dist: HashShard(ak1.k1) } + └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [v, bv, a.k1(hidden), ak1.a._row_id(hidden)], stream_key: [a.k1, ak1.a._row_id], pk_columns: [a.k1, ak1.a._row_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] } - │ └── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([2, 3]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] } + │ └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 5 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } { state table: 6 } ├── Upstream └── BatchPlanNode @@ -1113,7 +1121,7 @@ ├── columns: [ v, bv, a.k1, ak1.a._row_id ] ├── primary key: [ $2 ASC, $3 ASC ] ├── value indices: [ 0, 1, 2, 3 ] - ├── distribution key: [ 2 ] + ├── distribution key: [ 2, 3 ] └── read pk prefix len hint: 2 - id: aggk1_join_aggk1_onk1 @@ -1156,33 +1164,37 @@ └─BatchScan { table: b, columns: [b.k1], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [num, bv, a.k1(hidden), b.k1(hidden)], stream_key: [a.k1], pk_columns: [a.k1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] } - ├─StreamHashAgg { group_key: [a.k1], aggs: [count] } - │ └─StreamExchange { dist: HashShard(a.k1) } - │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } - └─StreamHashAgg { group_key: [b.k1], aggs: [count] } - └─StreamExchange { dist: HashShard(b.k1) } - └─StreamTableScan { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } + └─StreamExchange { dist: HashShard(a.k1) } + └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] } + ├─StreamHashAgg { group_key: [a.k1], aggs: [count] } + │ └─StreamExchange { dist: HashShard(a.k1) } + │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } + └─StreamHashAgg { group_key: [b.k1], aggs: [count] } + └─StreamExchange { dist: HashShard(b.k1) } + └─StreamTableScan { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [num, bv, a.k1(hidden), b.k1(hidden)], stream_key: [a.k1], pk_columns: [a.k1], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] } - │ └── StreamExchange Hash([0]) from 1 - └── StreamHashAgg { group_key: [b.k1], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([2]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] } + │ └── StreamExchange Hash([0]) from 2 + └── StreamHashAgg { group_key: [b.k1], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 5 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } { state table: 7 } ├── Upstream └── BatchPlanNode diff --git a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml index a340014298c47..ab282ebe3858a 100644 --- a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml @@ -124,16 +124,17 @@ └─LogicalScan { table: t2, columns: [t2.v2] } stream_plan: |- StreamMaterialize { columns: [v1, max, t1._row_id(hidden)], stream_key: [t1._row_id, v1], pk_columns: [t1._row_id, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = max(max(t2.v2)), output: [t1.v1, max(max(t2.v2)), t1._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(max(max(t2.v2))) } - └─StreamProject { exprs: [max(max(t2.v2))] } - └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } - └─StreamExchange { dist: Single } - └─StreamHashAgg { group_key: [$expr1], aggs: [max(t2.v2), count] } - └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } - └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v1 = max(max(t2.v2)), output: [t1.v1, max(max(t2.v2)), t1._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(max(max(t2.v2))) } + └─StreamProject { exprs: [max(max(t2.v2))] } + └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] } + └─StreamExchange { dist: Single } + └─StreamHashAgg { group_key: [$expr1], aggs: [max(t2.v2), count] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: Dynamic filter join on unequal types sql: | create table t1 (v1 int); diff --git a/src/frontend/planner_test/tests/testdata/output/join.yaml b/src/frontend/planner_test/tests/testdata/output/join.yaml index 4ef01cc84ab47..a61d2a0d73327 100644 --- a/src/frontend/planner_test/tests/testdata/output/join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/join.yaml @@ -14,14 +14,15 @@ └─LogicalScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id] } stream_plan: |- StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = t3.v5, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] } - ├─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] } - │ ├─StreamExchange { dist: HashShard(t1.v1) } - │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ └─StreamExchange { dist: HashShard(t2.v3) } - │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - └─StreamExchange { dist: HashShard(t3.v5) } - └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id, t3._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v1 = t3.v5, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] } + ├─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] } + │ ├─StreamExchange { dist: HashShard(t1.v1) } + │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ └─StreamExchange { dist: HashShard(t2.v3) } + │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t3.v5) } + └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } - name: self join sql: | create table t (v1 int, v2 int); @@ -33,11 +34,12 @@ └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] } stream_plan: |- StreamMaterialize { columns: [t1v1, t2v1, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, t1v1], pk_columns: [t._row_id, t._row_id#1, t1v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v1, t._row_id, t._row_id] } - ├─StreamExchange { dist: HashShard(t.v1) } - │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(t.v1) } - └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v1, t._row_id, t._row_id] } + ├─StreamExchange { dist: HashShard(t.v1) } + │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v1) } + └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t1 (v1 int, v2 int); create table t2 (v1 int, v2 int); @@ -65,15 +67,16 @@ └─BatchScan { table: t3, columns: [t3.v1, t3.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [t1_v1, t1_v2, t2_v1, t2_v2, t3_v1, t3_v2, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1, t3._row_id, t2_v2], pk_columns: [t1._row_id, t2._row_id, t1_v1, t3._row_id, t2_v2], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t2.v2 = t3.v2, output: [t1.v1, t1.v2, t2.v1, t2.v2, t3.v1, t3.v2, t1._row_id, t2._row_id, t3._row_id] } - ├─StreamExchange { dist: HashShard(t2.v2) } - │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] } - │ ├─StreamExchange { dist: HashShard(t1.v1) } - │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ └─StreamExchange { dist: HashShard(t2.v1) } - │ └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - └─StreamExchange { dist: HashShard(t3.v2) } - └─StreamTableScan { table: t3, columns: [t3.v1, t3.v2, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t2.v2, t1._row_id, t2._row_id, t3._row_id) } + └─StreamHashJoin { type: Inner, predicate: t2.v2 = t3.v2, output: [t1.v1, t1.v2, t2.v1, t2.v2, t3.v1, t3.v2, t1._row_id, t2._row_id, t3._row_id] } + ├─StreamExchange { dist: HashShard(t2.v2) } + │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] } + │ ├─StreamExchange { dist: HashShard(t1.v1) } + │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ └─StreamExchange { dist: HashShard(t2.v1) } + │ └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t3.v2) } + └─StreamTableScan { table: t3, columns: [t3.v1, t3.v2, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } - sql: | create table t1 (v1 int, v2 int); create table t2 (v1 int, v2 int); @@ -93,11 +96,12 @@ └─BatchScan { table: t2, columns: [t2.v1, t2.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [t1_v2, t2_v2, t1._row_id(hidden), t1.v1(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1.v1], pk_columns: [t1._row_id, t2._row_id, t1.v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v2, t2.v2, t1._row_id, t1.v1, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v1) } - └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1._row_id, t1.v1, t2._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v2, t2.v2, t1._row_id, t1.v1, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v1) } + └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - sql: | create table t1 (v1 int, v2 int); create table t2 (v1 int, v2 int); @@ -154,11 +158,12 @@ └─BatchScan { table: i, columns: [i.x], distribution: UpstreamHashShard(i.x) } stream_plan: |- StreamMaterialize { columns: [ix, iix, i.t._row_id(hidden), i.t._row_id#1(hidden)], stream_key: [i.t._row_id, i.t._row_id#1, ix], pk_columns: [i.t._row_id, i.t._row_id#1, ix], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id] } - ├─StreamExchange { dist: HashShard(i.x) } - │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } - └─StreamExchange { dist: HashShard(i.x) } - └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } + └─StreamExchange { dist: HashShard(i.x, i.t._row_id, i.t._row_id) } + └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id] } + ├─StreamExchange { dist: HashShard(i.x) } + │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } + └─StreamExchange { dist: HashShard(i.x) } + └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } - name: Left & right has same SomeShard distribution. There should still be exchanges below hash join sql: | create table t(x int); @@ -170,11 +175,12 @@ └─BatchScan { table: i, columns: [i.x], distribution: UpstreamHashShard(i.x) } stream_plan: |- StreamMaterialize { columns: [ix, tx, i.t._row_id(hidden), t._row_id(hidden)], stream_key: [i.t._row_id, t._row_id, ix], pk_columns: [i.t._row_id, t._row_id, ix], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: i.x = t.x, output: [i.x, t.x, i.t._row_id, t._row_id] } - ├─StreamExchange { dist: HashShard(i.x) } - │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } - └─StreamExchange { dist: HashShard(t.x) } - └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(i.x, i.t._row_id, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: i.x = t.x, output: [i.x, t.x, i.t._row_id, t._row_id] } + ├─StreamExchange { dist: HashShard(i.x) } + │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) } + └─StreamExchange { dist: HashShard(t.x) } + └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: Left & right has same HashShard distribution. There should be no exchange below hash join sql: | create table t(x int); @@ -628,12 +634,13 @@ └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), $expr1(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, $expr1], pk_columns: [t1._row_id, t2._row_id, $expr1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] } - ├─StreamExchange { dist: HashShard($expr1) } - │ └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v2) } - └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1._row_id, $expr1, t2._row_id) } + └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] } + ├─StreamExchange { dist: HashShard($expr1) } + │ └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v2) } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: Repeated columns in project should not interfere with join result (https://github.com/risingwavelabs/risingwave/issues/8216) sql: | create table t(x int); @@ -652,39 +659,43 @@ select t1.src p1, t1.dst p2, t2.dst p3 from t t1, t t2, t t3 where t1.dst = t2.src and t2.src = t3.dst and t3.dst = t1.src; stream_plan: |- StreamMaterialize { columns: [p1, p2, p3, t._row_id(hidden), t._row_id#1(hidden), t.src(hidden), t._row_id#2(hidden)], stream_key: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_columns: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } - ├─StreamExchange { dist: HashShard(t.src) } - │ └─StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } - │ ├─StreamExchange { dist: HashShard(t.dst) } - │ │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - │ └─StreamExchange { dist: HashShard(t.src) } - │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(t.dst) } - └─StreamTableScan { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.src, t.dst, t._row_id, t._row_id, t.src, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } + ├─StreamExchange { dist: HashShard(t.src) } + │ └─StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } + │ ├─StreamExchange { dist: HashShard(t.dst) } + │ │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + │ └─StreamExchange { dist: HashShard(t.src) } + │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.dst) } + └─StreamTableScan { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [p1, p2, p3, t._row_id(hidden), t._row_id#1(hidden), t.src(hidden), t._row_id#2(hidden)], stream_key: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_columns: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 4 + └── StreamExchange Hash([0, 1, 3, 4, 5, 6]) from 1 Fragment 1 - StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } - ├── StreamExchange Hash([1]) from 2 - └── StreamExchange Hash([0]) from 3 + StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 5 Fragment 2 + StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } + ├── StreamExchange Hash([1]) from 3 + └── StreamExchange Hash([0]) from 4 + + Fragment 3 Chain { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 8 } ├── Upstream └── BatchPlanNode - Fragment 3 + Fragment 4 Chain { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 9 } ├── Upstream └── BatchPlanNode - Fragment 4 + Fragment 5 Chain { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 10 } ├── Upstream └── BatchPlanNode @@ -711,5 +722,5 @@ Table 10 { columns: [ vnode, _row_id, t_backfill_finished, t_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 } - Table 4294967294 { columns: [ p1, p2, p3, t._row_id, t._row_id#1, t.src, t._row_id#2 ], primary key: [ $3 ASC, $4 ASC, $1 ASC, $6 ASC, $5 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 6 } + Table 4294967294 { columns: [ p1, p2, p3, t._row_id, t._row_id#1, t.src, t._row_id#2 ], primary key: [ $3 ASC, $4 ASC, $1 ASC, $6 ASC, $5 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 1, 3, 4, 5, 6 ], read pk prefix len hint: 6 } diff --git a/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml b/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml index b24fc18c6b513..31c53d02a9a18 100644 --- a/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml +++ b/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml @@ -34,19 +34,20 @@ └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id, v2, t4._row_id, v5], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id, v2, t4._row_id, v5], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] } - ├─StreamExchange { dist: HashShard(t3.v5) } - │ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] } - │ ├─StreamExchange { dist: HashShard(t1.v2) } - │ │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] } - │ │ ├─StreamExchange { dist: HashShard(t1.v1) } - │ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ │ └─StreamExchange { dist: HashShard(t2.v3) } - │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - │ └─StreamExchange { dist: HashShard(t3.v6) } - │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } - └─StreamExchange { dist: HashShard(t4.v7) } - └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1.v2, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) } + └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] } + ├─StreamExchange { dist: HashShard(t3.v5) } + │ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] } + │ ├─StreamExchange { dist: HashShard(t1.v2) } + │ │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] } + │ │ ├─StreamExchange { dist: HashShard(t1.v1) } + │ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ │ └─StreamExchange { dist: HashShard(t2.v3) } + │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + │ └─StreamExchange { dist: HashShard(t3.v6) } + │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } + └─StreamExchange { dist: HashShard(t4.v7) } + └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } - name: bushy tree join ordering sql: | create table t1 (v1 int, v2 int); @@ -81,19 +82,20 @@ └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t2._row_id(hidden), t1._row_id(hidden), t4._row_id(hidden), t3._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, v3, t4._row_id, t3._row_id, v7, v2], pk_columns: [t2._row_id, t1._row_id, v3, t4._row_id, t3._row_id, v7, v2], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t1._row_id, t4._row_id, t3._row_id] } - ├─StreamExchange { dist: HashShard(t1.v2) } - │ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t1.v2, t2._row_id, t1._row_id] } - │ ├─StreamExchange { dist: HashShard(t2.v3) } - │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - │ └─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t3.v6) } - └─StreamHashJoin { type: Inner, predicate: t4.v7 = t3.v5, output: [t4.v7, t4.v8, t3.v5, t3.v6, t4._row_id, t3._row_id] } - ├─StreamExchange { dist: HashShard(t4.v7) } - │ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } - └─StreamExchange { dist: HashShard(t3.v5) } - └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } + └─StreamExchange { dist: HashShard(t1.v2, t2.v3, t4.v7, t2._row_id, t1._row_id, t4._row_id, t3._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t1._row_id, t4._row_id, t3._row_id] } + ├─StreamExchange { dist: HashShard(t1.v2) } + │ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t1.v2, t2._row_id, t1._row_id] } + │ ├─StreamExchange { dist: HashShard(t2.v3) } + │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + │ └─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t3.v6) } + └─StreamHashJoin { type: Inner, predicate: t4.v7 = t3.v5, output: [t4.v7, t4.v8, t3.v5, t3.v6, t4._row_id, t3._row_id] } + ├─StreamExchange { dist: HashShard(t4.v7) } + │ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } + └─StreamExchange { dist: HashShard(t3.v5) } + └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } - name: bushy tree join ordering manually sql: | set rw_enable_join_ordering = false; @@ -128,19 +130,20 @@ └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id, t4._row_id, v5, v2], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id, t4._row_id, v5, v2], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] } - ├─StreamExchange { dist: HashShard(t1.v2) } - │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] } - │ ├─StreamExchange { dist: HashShard(t1.v1) } - │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ └─StreamExchange { dist: HashShard(t2.v3) } - │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - └─StreamExchange { dist: HashShard(t3.v6) } - └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] } - ├─StreamExchange { dist: HashShard(t3.v5) } - │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } - └─StreamExchange { dist: HashShard(t4.v7) } - └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1.v2, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] } + ├─StreamExchange { dist: HashShard(t1.v2) } + │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] } + │ ├─StreamExchange { dist: HashShard(t1.v1) } + │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ └─StreamExchange { dist: HashShard(t2.v3) } + │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t3.v6) } + └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] } + ├─StreamExchange { dist: HashShard(t3.v5) } + │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } + └─StreamExchange { dist: HashShard(t4.v7) } + └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } - name: right deep tree join ordering manually sql: | set rw_enable_join_ordering = false; @@ -175,16 +178,17 @@ └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t3._row_id, t4._row_id, v5, v4, v1], pk_columns: [t1._row_id, t2._row_id, t3._row_id, t4._row_id, v5, v4, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v3) } - └─StreamHashJoin { type: Inner, predicate: t2.v4 = t3.v6, output: [t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t3._row_id, t4._row_id] } - ├─StreamExchange { dist: HashShard(t2.v4) } - │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - └─StreamExchange { dist: HashShard(t3.v6) } - └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] } - ├─StreamExchange { dist: HashShard(t3.v5) } - │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } - └─StreamExchange { dist: HashShard(t4.v7) } - └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t2.v4, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v3) } + └─StreamHashJoin { type: Inner, predicate: t2.v4 = t3.v6, output: [t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t3._row_id, t4._row_id] } + ├─StreamExchange { dist: HashShard(t2.v4) } + │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t3.v6) } + └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] } + ├─StreamExchange { dist: HashShard(t3.v5) } + │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) } + └─StreamExchange { dist: HashShard(t4.v7) } + └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml index 8e63beb9798c1..85bfb1a6cda36 100644 --- a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml @@ -42,22 +42,23 @@ └─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), all_sales._row_id(hidden), salesperson.id(hidden), all_sales.amount(hidden), salesperson.id#1(hidden)], stream_key: [salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount], pk_columns: [salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM salesperson.id AND all_sales.amount = max(all_sales.amount), output: [salesperson.name, max(all_sales.amount), all_sales.customer_name, salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount, salesperson.id] } - ├─StreamHashJoin { type: Inner, predicate: salesperson.id = all_sales.salesperson_id, output: [salesperson.id, salesperson.name, all_sales.customer_name, all_sales.amount, salesperson._row_id, all_sales._row_id] } - │ ├─StreamExchange { dist: HashShard(salesperson.id) } - │ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } - │ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } - │ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } - └─StreamProject { exprs: [salesperson.id, max(all_sales.amount)] } - └─StreamHashAgg { group_key: [salesperson.id], aggs: [max(all_sales.amount), count] } - └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.id, all_sales.amount, all_sales._row_id] } - ├─StreamProject { exprs: [salesperson.id] } - │ └─StreamHashAgg { group_key: [salesperson.id], aggs: [count] } - │ └─StreamExchange { dist: HashShard(salesperson.id) } - │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } - └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } - └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) } - └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } + └─StreamExchange { dist: HashShard(salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount) } + └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM salesperson.id AND all_sales.amount = max(all_sales.amount), output: [salesperson.name, max(all_sales.amount), all_sales.customer_name, salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount, salesperson.id] } + ├─StreamHashJoin { type: Inner, predicate: salesperson.id = all_sales.salesperson_id, output: [salesperson.id, salesperson.name, all_sales.customer_name, all_sales.amount, salesperson._row_id, all_sales._row_id] } + │ ├─StreamExchange { dist: HashShard(salesperson.id) } + │ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } + │ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } + │ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } + └─StreamProject { exprs: [salesperson.id, max(all_sales.amount)] } + └─StreamHashAgg { group_key: [salesperson.id], aggs: [max(all_sales.amount), count] } + └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.id, all_sales.amount, all_sales._row_id] } + ├─StreamProject { exprs: [salesperson.id] } + │ └─StreamHashAgg { group_key: [salesperson.id], aggs: [count] } + │ └─StreamExchange { dist: HashShard(salesperson.id) } + │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } + └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } + └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) } + └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } - name: lateral join 2 sql: | create table all_sales (salesperson_id int, customer_name varchar, amount int ); @@ -87,14 +88,15 @@ └─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), salesperson.id(hidden), all_sales.salesperson_id(hidden)], stream_key: [salesperson._row_id, salesperson.id], pk_columns: [salesperson._row_id, salesperson.id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] } - ├─StreamExchange { dist: HashShard(salesperson.id) } - │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } - └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] } - └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } - └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] } - └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) } - └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } + └─StreamExchange { dist: HashShard(salesperson._row_id, salesperson.id) } + └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] } + ├─StreamExchange { dist: HashShard(salesperson.id) } + │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } + └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] } + └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } + └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] } + └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) } + └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } - name: lateral join 2 (left join) sql: | create table all_sales (salesperson_id int, customer_name varchar, amount int ); @@ -124,14 +126,15 @@ └─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), salesperson.id(hidden), all_sales.salesperson_id(hidden)], stream_key: [salesperson._row_id, salesperson.id], pk_columns: [salesperson._row_id, salesperson.id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] } - ├─StreamExchange { dist: HashShard(salesperson.id) } - │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } - └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] } - └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } - └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] } - └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) } - └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } + └─StreamExchange { dist: HashShard(salesperson._row_id, salesperson.id) } + └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] } + ├─StreamExchange { dist: HashShard(salesperson.id) } + │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) } + └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] } + └─StreamExchange { dist: HashShard(all_sales.salesperson_id) } + └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] } + └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) } + └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) } - name: lateral join 2 (right join) should throw an error sql: | create table all_sales (salesperson_id int, customer_name varchar, amount int ); @@ -165,14 +168,15 @@ └─BatchScan { table: t, columns: [t.arr], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [x, arr, unnest, t._row_id(hidden), t.arr(hidden), projected_row_id(hidden)], stream_key: [t._row_id, projected_row_id, arr], pk_columns: [t._row_id, projected_row_id, arr], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.arr IS NOT DISTINCT FROM t.arr, output: [t.x, t.arr, Unnest($0), t._row_id, t.arr, projected_row_id] } - ├─StreamExchange { dist: HashShard(t.arr) } - │ └─StreamTableScan { table: t, columns: [t.x, t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamProjectSet { select_list: [$0, Unnest($0)] } - └─StreamProject { exprs: [t.arr] } - └─StreamHashAgg { group_key: [t.arr], aggs: [count] } - └─StreamExchange { dist: HashShard(t.arr) } - └─StreamTableScan { table: t, columns: [t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.arr, t._row_id, projected_row_id) } + └─StreamHashJoin { type: Inner, predicate: t.arr IS NOT DISTINCT FROM t.arr, output: [t.x, t.arr, Unnest($0), t._row_id, t.arr, projected_row_id] } + ├─StreamExchange { dist: HashShard(t.arr) } + │ └─StreamTableScan { table: t, columns: [t.x, t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamProjectSet { select_list: [$0, Unnest($0)] } + └─StreamProject { exprs: [t.arr] } + └─StreamHashAgg { group_key: [t.arr], aggs: [count] } + └─StreamExchange { dist: HashShard(t.arr) } + └─StreamTableScan { table: t, columns: [t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: https://github.com/risingwavelabs/risingwave/issues/12298 sql: | create table t1(c varchar, n varchar, id varchar, d varchar); diff --git a/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml b/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml index 2f7d9e5e75b3b..6838ddb331939 100644 --- a/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml +++ b/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml @@ -12,8 +12,9 @@ select m1.v1 as m1v1, m1.v2 as m1v2, m2.v1 as m2v1, m2.v2 as m2v2 from m1 join m2 on m1.v1 = m2.v1; stream_plan: |- StreamMaterialize { columns: [m1v1, m1v2, m2v1, m2v2, m1.t1._row_id(hidden), m2.t1._row_id(hidden)], stream_key: [m1.t1._row_id, m2.t1._row_id, m1v1], pk_columns: [m1.t1._row_id, m2.t1._row_id, m1v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: m1.v1 = m2.v1, output: [m1.v1, m1.v2, m2.v1, m2.v2, m1.t1._row_id, m2.t1._row_id] } - ├─StreamExchange { dist: HashShard(m1.v1) } - │ └─StreamTableScan { table: m1, columns: [m1.v1, m1.v2, m1.t1._row_id], pk: [m1.t1._row_id], dist: UpstreamHashShard(m1.t1._row_id) } - └─StreamExchange { dist: HashShard(m2.v1) } - └─StreamTableScan { table: m2, columns: [m2.v1, m2.v2, m2.t1._row_id], pk: [m2.t1._row_id], dist: UpstreamHashShard(m2.t1._row_id) } + └─StreamExchange { dist: HashShard(m1.v1, m1.t1._row_id, m2.t1._row_id) } + └─StreamHashJoin { type: Inner, predicate: m1.v1 = m2.v1, output: [m1.v1, m1.v2, m2.v1, m2.v2, m1.t1._row_id, m2.t1._row_id] } + ├─StreamExchange { dist: HashShard(m1.v1) } + │ └─StreamTableScan { table: m1, columns: [m1.v1, m1.v2, m1.t1._row_id], pk: [m1.t1._row_id], dist: UpstreamHashShard(m1.t1._row_id) } + └─StreamExchange { dist: HashShard(m2.v1) } + └─StreamTableScan { table: m2, columns: [m2.v1, m2.v2, m2.t1._row_id], pk: [m2.t1._row_id], dist: UpstreamHashShard(m2.t1._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml index 2f23b0674fa84..8d452bf45bc36 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml @@ -181,34 +181,38 @@ └─BatchScan { table: auction, columns: [auction.id, auction.seller, auction.category], distribution: UpstreamHashShard(auction.id) } stream_plan: |- StreamMaterialize { columns: [name, city, state, id, auction.seller(hidden), person.id(hidden)], stream_key: [id, auction.seller], pk_columns: [id, auction.seller], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] } - ├─StreamExchange { dist: HashShard(auction.seller) } - │ └─StreamProject { exprs: [auction.id, auction.seller] } - │ └─StreamFilter { predicate: (auction.category = 10:Int32) } - │ └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } - └─StreamExchange { dist: HashShard(person.id) } - └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } - └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) } + └─StreamExchange { dist: HashShard(auction.id, auction.seller) } + └─StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] } + ├─StreamExchange { dist: HashShard(auction.seller) } + │ └─StreamProject { exprs: [auction.id, auction.seller] } + │ └─StreamFilter { predicate: (auction.category = 10:Int32) } + │ └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } + └─StreamExchange { dist: HashShard(person.id) } + └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } + └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [name, city, state, id, auction.seller(hidden), person.id(hidden)], stream_key: [id, auction.seller], pk_columns: [id, auction.seller], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([1]) from 1 - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([3, 4]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([1]) from 2 + └── StreamExchange Hash([0]) from 3 + + Fragment 2 StreamProject { exprs: [auction.id, auction.seller] } └── StreamFilter { predicate: (auction.category = 10:Int32) } └── Chain { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 4 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) } └── Chain { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 5 } ├── Upstream @@ -242,7 +246,7 @@ ├── columns: [ name, city, state, id, auction.seller, person.id ] ├── primary key: [ $3 ASC, $4 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5 ] - ├── distribution key: [ 4 ] + ├── distribution key: [ 3, 4 ] └── read pk prefix len hint: 2 - id: nexmark_q4 @@ -847,34 +851,38 @@ └─BatchScan { table: auction, columns: [auction.date_time, auction.seller], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), auction.seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, starttime, $expr2], pk_columns: [id, starttime, $expr2], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] } - ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) } - │ └─StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] } - │ └─StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] } - │ └─StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] } - │ └─StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] } - │ └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } - └─StreamProject { exprs: [auction.seller, $expr3, $expr4] } - └─StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } - └─StreamExchange { dist: HashShard(auction.seller, $expr3, $expr4) } - └─StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] } - └─StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] } - └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } + └─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) } + └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] } + ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) } + │ └─StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] } + │ └─StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] } + │ └─StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] } + │ └─StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] } + │ └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } + └─StreamProject { exprs: [auction.seller, $expr3, $expr4] } + └─StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } + └─StreamExchange { dist: HashShard(auction.seller, $expr3, $expr4) } + └─StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] } + └─StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] } + └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), auction.seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, starttime, $expr2], pk_columns: [id, starttime, $expr2], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0, 1, 2]) from 1 - └── StreamProject { exprs: [auction.seller, $expr3, $expr4] } - └── StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0, 1, 2]) from 2 + └── StreamExchange Hash([0, 2, 3]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0, 1, 2]) from 2 + └── StreamProject { exprs: [auction.seller, $expr3, $expr4] } + └── StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0, 1, 2]) from 3 + + Fragment 2 StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] } └── StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] } { intermediate state table: 4, state tables: [], distinct tables: [] } └── StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] } @@ -883,7 +891,7 @@ ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] } └── StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] } └── Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 } @@ -1133,27 +1141,31 @@ └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) } stream_plan: |- StreamMaterialize { columns: [auction, bidder, price, date_time, value, bid._row_id(hidden), $expr1(hidden), side_input.key(hidden)], stream_key: [bid._row_id, $expr1], pk_columns: [bid._row_id, $expr1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] } - ├─StreamExchange { dist: HashShard($expr1) } - │ └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] } - │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(side_input.key) } - └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) } + └─StreamExchange { dist: HashShard(bid._row_id, $expr1) } + └─StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] } + ├─StreamExchange { dist: HashShard($expr1) } + │ └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] } + │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(side_input.key) } + └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, date_time, value, bid._row_id(hidden), $expr1(hidden), side_input.key(hidden)], stream_key: [bid._row_id, $expr1], pk_columns: [bid._row_id, $expr1], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] } - ├── StreamExchange Hash([4]) from 1 - └── StreamExchange NoShuffle from 2 + └── StreamExchange Hash([5, 6]) from 1 Fragment 1 + StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] } + ├── StreamExchange Hash([4]) from 2 + └── StreamExchange NoShuffle from 3 + + Fragment 2 StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] } └── Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 0 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) } { state table: 1 } ├── Upstream └── BatchPlanNode @@ -1166,7 +1178,7 @@ ├── columns: [ auction, bidder, price, date_time, value, bid._row_id, $expr1, side_input.key ] ├── primary key: [ $5 ASC, $6 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7 ] - ├── distribution key: [ 6 ] + ├── distribution key: [ 5, 6 ] └── read pk prefix len hint: 2 - id: nexmark_q14 @@ -1795,30 +1807,34 @@ └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id(hidden), auction.id(hidden)], stream_key: [bid._row_id, auction], pk_columns: [bid._row_id, auction], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] } - ├─StreamExchange { dist: HashShard(bid.auction) } - │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } - └─StreamExchange { dist: HashShard(auction.id) } - └─StreamFilter { predicate: (auction.category = 10:Int32) } - └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } + └─StreamExchange { dist: HashShard(bid.auction, bid._row_id) } + └─StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] } + ├─StreamExchange { dist: HashShard(bid.auction) } + │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + └─StreamExchange { dist: HashShard(auction.id) } + └─StreamFilter { predicate: (auction.category = 10:Int32) } + └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id(hidden), auction.id(hidden)], stream_key: [bid._row_id, auction], pk_columns: [bid._row_id, auction], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0, 14]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 4 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 StreamFilter { predicate: (auction.category = 10:Int32) } └── Chain { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 5 } ├── Upstream @@ -1840,7 +1856,7 @@ ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id, auction.id ] ├── primary key: [ $14 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 14 ] └── read pk prefix len hint: 2 - id: nexmark_q21 @@ -1946,33 +1962,37 @@ └─BatchScan { table: bid, columns: [bid.auction, bid.price], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction(hidden)], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] } - ├─StreamExchange { dist: HashShard(auction.id) } - │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } - └─StreamProject { exprs: [bid.auction, max(bid.price)] } - └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } - └─StreamExchange { dist: HashShard(bid.auction) } - └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + └─StreamExchange { dist: HashShard(auction.id) } + └─StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] } + ├─StreamExchange { dist: HashShard(auction.id) } + │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } + └─StreamProject { exprs: [bid.auction, max(bid.price)] } + └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } + └─StreamExchange { dist: HashShard(bid.auction) } + └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction(hidden)], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [bid.auction, max(bid.price)] } - └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 1 Fragment 1 + StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [bid.auction, max(bid.price)] } + └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 4 } ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 } ├── Upstream └── BatchPlanNode @@ -2215,39 +2235,43 @@ └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all } - ├─StreamExchange { dist: HashShard(auction.id) } - │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } - └─StreamProject { exprs: [bid.auction] } - └─StreamFilter { predicate: (count >= 20:Int32) } - └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } - └─StreamExchange { dist: HashShard(bid.auction) } - └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + └─StreamExchange { dist: HashShard(auction.id) } + └─StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all } + ├─StreamExchange { dist: HashShard(auction.id) } + │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } + └─StreamProject { exprs: [bid.auction] } + └─StreamFilter { predicate: (count >= 20:Int32) } + └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } + └─StreamExchange { dist: HashShard(bid.auction) } + └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [bid.auction] } - └── StreamFilter { predicate: (count >= 20:Int32) } - └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } - ├── intermediate state table: 5 - ├── state tables: [] - ├── distinct tables: [] - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 1 Fragment 1 + StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [bid.auction] } + └── StreamFilter { predicate: (count >= 20:Int32) } + └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } + ├── intermediate state table: 5 + ├── state tables: [] + ├── distinct tables: [] + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } ├── state table: 4 ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 } ├── Upstream └── BatchPlanNode @@ -2333,39 +2357,43 @@ └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all } - ├─StreamExchange { dist: HashShard(auction.id) } - │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } - └─StreamProject { exprs: [bid.auction] } - └─StreamFilter { predicate: (count < 20:Int32) } - └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } - └─StreamExchange { dist: HashShard(bid.auction) } - └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } + └─StreamExchange { dist: HashShard(auction.id) } + └─StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all } + ├─StreamExchange { dist: HashShard(auction.id) } + │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } + └─StreamProject { exprs: [bid.auction] } + └─StreamFilter { predicate: (count < 20:Int32) } + └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } + └─StreamExchange { dist: HashShard(bid.auction) } + └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [bid.auction] } - └── StreamFilter { predicate: (count < 20:Int32) } - └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } - ├── intermediate state table: 5 - ├── state tables: [] - ├── distinct tables: [] - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 1 Fragment 1 + StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [bid.auction] } + └── StreamFilter { predicate: (count < 20:Int32) } + └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } + ├── intermediate state table: 5 + ├── state tables: [] + ├── distinct tables: [] + └── StreamExchange Hash([0]) from 3 + + Fragment 2 Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } ├── state table: 4 ├── Upstream └── BatchPlanNode - Fragment 2 + Fragment 3 Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 } ├── Upstream └── BatchPlanNode diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml index 7c694fad1fa67..31be64b2c480a 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml @@ -158,29 +158,33 @@ └─BatchSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, seller], pk_columns: [_row_id, _row_id#1, seller], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } - ├─StreamExchange { dist: HashShard(seller) } - │ └─StreamFilter { predicate: (category = 10:Int32) } - │ └─StreamRowIdGen { row_id_index: 10 } - │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } - └─StreamExchange { dist: HashShard(id) } - └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } - └─StreamRowIdGen { row_id_index: 8 } - └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } + └─StreamExchange { dist: HashShard(_row_id, seller, _row_id) } + └─StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } + ├─StreamExchange { dist: HashShard(seller) } + │ └─StreamFilter { predicate: (category = 10:Int32) } + │ └─StreamRowIdGen { row_id_index: 10 } + │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamExchange { dist: HashShard(id) } + └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } + └─StreamRowIdGen { row_id_index: 8 } + └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, seller], pk_columns: [_row_id, _row_id#1, seller], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([7]) from 1 - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([4, 5, 6]) from 1 Fragment 1 + StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([7]) from 2 + └── StreamExchange Hash([0]) from 3 + + Fragment 2 StreamFilter { predicate: (category = 10:Int32) } └── StreamRowIdGen { row_id_index: 10 } └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 4 } - Fragment 2 + Fragment 3 StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) } └── StreamRowIdGen { row_id_index: 8 } └── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { source state table: 5 } @@ -211,7 +215,7 @@ ├── columns: [ name, city, state, id, _row_id, seller, _row_id#1 ] ├── primary key: [ $4 ASC, $6 ASC, $5 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ] - ├── distribution key: [ 5 ] + ├── distribution key: [ 4, 5, 6 ] └── read pk prefix len hint: 3 - id: nexmark_q4 @@ -737,40 +741,44 @@ └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, name, starttime, $expr2], pk_columns: [id, name, starttime, $expr2], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } - ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) } - │ └─StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } - │ └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) } - │ └─StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] } - │ └─StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] } - │ └─StreamRowIdGen { row_id_index: 8 } - │ └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } - └─StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } - └─StreamExchange { dist: HashShard(seller, $expr3, $expr4) } - └─StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] } - └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] } - └─StreamRowIdGen { row_id_index: 10 } - └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) } + └─StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } + ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) } + │ └─StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } + │ └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) } + │ └─StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] } + │ └─StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] } + │ └─StreamRowIdGen { row_id_index: 8 } + │ └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } + └─StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } + └─StreamExchange { dist: HashShard(seller, $expr3, $expr4) } + └─StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] } + └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] } + └─StreamRowIdGen { row_id_index: 10 } + └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, name, starttime, $expr2], pk_columns: [id, name, starttime, $expr2], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0, 2, 3]) from 1 - └── StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } { state table: 6 } - └── StreamExchange Hash([0, 1, 2]) from 3 + └── StreamExchange Hash([0, 1, 2, 3]) from 1 Fragment 1 - StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } { state table: 4 } - └── StreamExchange Hash([0, 1, 2, 3]) from 2 + StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0, 2, 3]) from 2 + └── StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } { state table: 6 } + └── StreamExchange Hash([0, 1, 2]) from 4 Fragment 2 + StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } { state table: 4 } + └── StreamExchange Hash([0, 1, 2, 3]) from 3 + + Fragment 3 StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] } └── StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] } └── StreamRowIdGen { row_id_index: 8 } └── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] } └── StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] } └── StreamRowIdGen { row_id_index: 10 } @@ -796,7 +804,7 @@ ├── columns: [ id, name, starttime, $expr2, seller, $expr3, $expr4 ] ├── primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ] - ├── distribution key: [ 0, 2, 3 ] + ├── distribution key: [ 0, 1, 2, 3 ] └── read pk prefix len hint: 4 - id: nexmark_q9 @@ -1629,31 +1637,31 @@ └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] } - ├─StreamExchange { dist: HashShard(auction) } - │ └─StreamRowIdGen { row_id_index: 7 } - │ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } - └─StreamExchange { dist: HashShard(id) } - └─StreamFilter { predicate: (category = 10:Int32) } - └─StreamRowIdGen { row_id_index: 10 } - └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamExchange { dist: HashShard(auction, _row_id, _row_id) } + └─StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard(auction) } + │ └─StreamRowIdGen { row_id_index: 7 } + │ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } + └─StreamExchange { dist: HashShard(id) } + └─StreamFilter { predicate: (category = 10:Int32) } + └─StreamRowIdGen { row_id_index: 10 } + └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0, 14, 15]) from 1 Fragment 1 + StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 3 + + Fragment 2 StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 4 } - Fragment 2 + Fragment 3 StreamFilter { predicate: (category = 10:Int32) } └── StreamRowIdGen { row_id_index: 10 } └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 5 } @@ -1674,7 +1682,7 @@ ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ] ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 14, 15 ] └── read pk prefix len hint: 3 - id: nexmark_q21 @@ -1775,30 +1783,34 @@ └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } - ├─StreamExchange { dist: HashShard(id) } - │ └─StreamRowIdGen { row_id_index: 10 } - │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } - └─StreamProject { exprs: [auction, max(price)] } - └─StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } - └─StreamExchange { dist: HashShard(auction) } - └─StreamRowIdGen { row_id_index: 7 } - └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } + └─StreamExchange { dist: HashShard(id, _row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } + ├─StreamExchange { dist: HashShard(id) } + │ └─StreamRowIdGen { row_id_index: 10 } + │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamProject { exprs: [auction, max(price)] } + └─StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [auction, max(price)] } - └── StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0, 3]) from 1 Fragment 1 + StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [auction, max(price)] } + └── StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 3 + + Fragment 2 StreamRowIdGen { row_id_index: 10 } └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 4 } - Fragment 2 + Fragment 3 StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 } @@ -1825,7 +1837,7 @@ ├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, auction ] ├── primary key: [ $3 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2, 3, 4 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 3 ] └── read pk prefix len hint: 2 - id: nexmark_q102 @@ -1992,37 +2004,41 @@ └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] } - ├─StreamExchange { dist: HashShard(id) } - │ └─StreamRowIdGen { row_id_index: 10 } - │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } - └─StreamProject { exprs: [auction] } - └─StreamFilter { predicate: (count >= 20:Int32) } - └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } - └─StreamExchange { dist: HashShard(auction) } - └─StreamRowIdGen { row_id_index: 7 } - └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } + └─StreamExchange { dist: HashShard(id, _row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] } + ├─StreamExchange { dist: HashShard(id) } + │ └─StreamRowIdGen { row_id_index: 10 } + │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamProject { exprs: [auction] } + └─StreamFilter { predicate: (count >= 20:Int32) } + └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [auction] } - └── StreamFilter { predicate: (count >= 20:Int32) } - └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0, 2]) from 1 Fragment 1 + StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [auction] } + └── StreamFilter { predicate: (count >= 20:Int32) } + └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 3 + + Fragment 2 StreamRowIdGen { row_id_index: 10 } └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } └── source state table: 4 - Fragment 2 + Fragment 3 StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 } @@ -2049,7 +2065,7 @@ ├── columns: [ auction_id, auction_item_name, _row_id ] ├── primary key: [ $2 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 2 ] └── read pk prefix len hint: 2 - id: nexmark_q104 @@ -2080,37 +2096,41 @@ └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] } - ├─StreamExchange { dist: HashShard(id) } - │ └─StreamRowIdGen { row_id_index: 10 } - │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } - └─StreamProject { exprs: [auction] } - └─StreamFilter { predicate: (count < 20:Int32) } - └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } - └─StreamExchange { dist: HashShard(auction) } - └─StreamRowIdGen { row_id_index: 7 } - └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } + └─StreamExchange { dist: HashShard(id, _row_id) } + └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] } + ├─StreamExchange { dist: HashShard(id) } + │ └─StreamRowIdGen { row_id_index: 10 } + │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamProject { exprs: [auction] } + └─StreamFilter { predicate: (count < 20:Int32) } + └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } + └─StreamExchange { dist: HashShard(auction) } + └─StreamRowIdGen { row_id_index: 7 } + └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [auction] } - └── StreamFilter { predicate: (count < 20:Int32) } - └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0, 2]) from 1 Fragment 1 + StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [auction] } + └── StreamFilter { predicate: (count < 20:Int32) } + └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 3 + + Fragment 2 StreamRowIdGen { row_id_index: 10 } └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } └── source state table: 4 - Fragment 2 + Fragment 3 StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 } @@ -2137,7 +2157,7 @@ ├── columns: [ auction_id, auction_item_name, _row_id ] ├── primary key: [ $2 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 2 ] └── read pk prefix len hint: 2 - id: nexmark_q105 diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml index ccdde39e76764..c6c3ffd4f5ad6 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml @@ -717,65 +717,69 @@ AND P.endtime = A.endtime; stream_plan: |- StreamMaterialize { columns: [id, name, starttime, $expr6(hidden), $expr8(hidden), $expr9(hidden), $expr10(hidden)], stream_key: [id, name, starttime, $expr6], pk_columns: [id, name, starttime, $expr6], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } - ├─StreamExchange { dist: HashShard($expr2, $expr5, $expr6) } - │ └─StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } - │ └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) } - │ └─StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] } - │ └─StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] } - │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] } - │ └─StreamFilter { predicate: (event_type = 0:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, _row_id] } - │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } - └─StreamExchange { dist: HashShard($expr8, $expr9, $expr10) } - └─StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] } - └─StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] } - └─StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] } - └─StreamFilter { predicate: (event_type = 1:Int32) } - └─StreamShare { id: 5 } - └─StreamProject { exprs: [event_type, person, auction, _row_id] } - └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) } + └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } + ├─StreamExchange { dist: HashShard($expr2, $expr5, $expr6) } + │ └─StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } + │ └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) } + │ └─StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] } + │ └─StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] } + │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] } + │ └─StreamFilter { predicate: (event_type = 0:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, _row_id] } + │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } + └─StreamExchange { dist: HashShard($expr8, $expr9, $expr10) } + └─StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] } + └─StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] } + └─StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] } + └─StreamFilter { predicate: (event_type = 1:Int32) } + └─StreamShare { id: 5 } + └─StreamProject { exprs: [event_type, person, auction, _row_id] } + └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [id, name, starttime, $expr6(hidden), $expr8(hidden), $expr9(hidden), $expr10(hidden)], stream_key: [id, name, starttime, $expr6], pk_columns: [id, name, starttime, $expr6], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0, 2, 3]) from 1 - └── StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } { state table: 6 } - └── StreamExchange Hash([0, 1, 2]) from 4 + └── StreamExchange Hash([0, 1, 2, 3]) from 1 Fragment 1 - StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } { state table: 4 } - └── StreamExchange Hash([0, 1, 2, 3]) from 2 + StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0, 2, 3]) from 2 + └── StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } { state table: 6 } + └── StreamExchange Hash([0, 1, 2]) from 5 Fragment 2 + StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } { state table: 4 } + └── StreamExchange Hash([0, 1, 2, 3]) from 3 + + Fragment 3 StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] } └── StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] } └── StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] } └── StreamFilter { predicate: (event_type = 0:Int32) } - └── StreamExchange NoShuffle from 3 + └── StreamExchange NoShuffle from 4 - Fragment 3 + Fragment 4 StreamProject { exprs: [event_type, person, auction, _row_id] } └── StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } └── StreamRowIdGen { row_id_index: 5 } └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 4 + Fragment 5 StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] } └── StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] } └── StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 3 + └── StreamExchange NoShuffle from 4 Table 0 { columns: [ $expr2, $expr3, $expr5, $expr6 ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 } @@ -795,7 +799,7 @@ ├── columns: [ id, name, starttime, $expr6, $expr8, $expr9, $expr10 ] ├── primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ] ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ] - ├── distribution key: [ 0, 2, 3 ] + ├── distribution key: [ 0, 1, 2, 3 ] └── read pk prefix len hint: 4 - id: nexmark_q9 @@ -1180,59 +1184,63 @@ WHERE A.category = 10; stream_plan: |- StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } - ├─StreamExchange { dist: HashShard($expr3) } - │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] } - │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } - │ ├─StreamFilter { predicate: (event_type = 2:Int32) } - │ │ └─StreamShare { id: 5 } - │ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } - │ │ └─StreamRowIdGen { row_id_index: 5 } - │ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: HashShard($expr9) } - └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] } - └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } - └─StreamShare { id: 5 } - └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr3, _row_id, _row_id) } + └─StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard($expr3) } + │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] } + │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } + │ ├─StreamFilter { predicate: (event_type = 2:Int32) } + │ │ └─StreamShare { id: 5 } + │ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } + │ │ └─StreamRowIdGen { row_id_index: 5 } + │ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard($expr9) } + └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] } + └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } + └─StreamShare { id: 5 } + └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 4 + └── StreamExchange Hash([0, 14, 15]) from 1 Fragment 1 + StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 5 + + Fragment 2 StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] } └── StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { left table: 4, right table: 5 } ├── StreamFilter { predicate: (event_type = 2:Int32) } - │ └── StreamExchange NoShuffle from 2 - └── StreamExchange Broadcast from 3 + │ └── StreamExchange NoShuffle from 3 + └── StreamExchange Broadcast from 4 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } └── StreamRowIdGen { row_id_index: 5 } └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 6 } - Fragment 3 + Fragment 4 StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } └── StreamNow { output: [now] } { state table: 7 } - Fragment 4 + Fragment 5 StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] } └── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr3, $expr4, $expr5, $expr6, $expr7, $expr8, _row_id ], primary key: [ $0 ASC, $6 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 1 } @@ -1250,7 +1258,12 @@ Table 7 { columns: [ now ], primary key: [], value indices: [ 0 ], distribution key: [], read pk prefix len hint: 0 } - Table 4294967294 { columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ], primary key: [ $14 ASC, $15 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], distribution key: [ 0 ], read pk prefix len hint: 3 } + Table 4294967294 + ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ] + ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ] + ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] + ├── distribution key: [ 0, 14, 15 ] + └── read pk prefix len hint: 3 - id: nexmark_q21 before: @@ -1375,61 +1388,65 @@ ) b ON a.id = b.auction; stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (event_type = 1:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamProject { exprs: [$expr5, max($expr6)] } - └─StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } - └─StreamExchange { dist: HashShard($expr5) } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] } - └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } - ├─StreamFilter { predicate: (event_type = 2:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard($expr2, _row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (event_type = 1:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamProject { exprs: [$expr5, max($expr6)] } + └─StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } + └─StreamExchange { dist: HashShard($expr5) } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] } + └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } + ├─StreamFilter { predicate: (event_type = 2:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } + └─StreamNow { output: [now] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [$expr5, max($expr6)] } - └── StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } { intermediate state table: 6, state tables: [ 5 ], distinct tables: [] } - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 3]) from 1 Fragment 1 + StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [$expr5, max($expr6)] } + └── StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } { intermediate state table: 6, state tables: [ 5 ], distinct tables: [] } + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } └── StreamRowIdGen { row_id_index: 5 } └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] } └── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { left table: 7, right table: 8 } ├── StreamFilter { predicate: (event_type = 2:Int32) } - │ └── StreamExchange NoShuffle from 2 - └── StreamExchange Broadcast from 4 + │ └── StreamExchange NoShuffle from 3 + └── StreamExchange Broadcast from 5 - Fragment 4 + Fragment 5 StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } └── StreamNow { output: [now] } { state table: 9 } @@ -1457,7 +1474,7 @@ ├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, $expr5 ] ├── primary key: [ $3 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2, 3, 4 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 3 ] └── read pk prefix len hint: 2 - id: nexmark_q102 @@ -1642,65 +1659,69 @@ ); stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (event_type = 1:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamProject { exprs: [$expr5] } - └─StreamFilter { predicate: (count >= 20:Int32) } - └─StreamHashAgg { group_key: [$expr5], aggs: [count] } - └─StreamExchange { dist: HashShard($expr5) } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] } - └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } - ├─StreamFilter { predicate: (event_type = 2:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard($expr2, _row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (event_type = 1:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamProject { exprs: [$expr5] } + └─StreamFilter { predicate: (count >= 20:Int32) } + └─StreamHashAgg { group_key: [$expr5], aggs: [count] } + └─StreamExchange { dist: HashShard($expr5) } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] } + └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } + ├─StreamFilter { predicate: (event_type = 2:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } + └─StreamNow { output: [now] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [$expr5] } - └── StreamFilter { predicate: (count >= 20:Int32) } - └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 2]) from 1 Fragment 1 + StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [$expr5] } + └── StreamFilter { predicate: (count >= 20:Int32) } + └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } └── StreamRowIdGen { row_id_index: 5 } └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] } └── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } ├── left table: 6 ├── right table: 7 ├── StreamFilter { predicate: (event_type = 2:Int32) } - │ └── StreamExchange NoShuffle from 2 - └── StreamExchange Broadcast from 4 + │ └── StreamExchange NoShuffle from 3 + └── StreamExchange Broadcast from 5 - Fragment 4 + Fragment 5 StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } └── StreamNow { output: [now] } { state table: 8 } @@ -1731,7 +1752,7 @@ ├── columns: [ auction_id, auction_item_name, _row_id ] ├── primary key: [ $2 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 2 ] └── read pk prefix len hint: 2 - id: nexmark_q104 @@ -1752,65 +1773,69 @@ ); stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (event_type = 1:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamProject { exprs: [$expr5] } - └─StreamFilter { predicate: (count < 20:Int32) } - └─StreamHashAgg { group_key: [$expr5], aggs: [count] } - └─StreamExchange { dist: HashShard($expr5) } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] } - └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } - ├─StreamFilter { predicate: (event_type = 2:Int32) } - │ └─StreamShare { id: 5 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard($expr2, _row_id) } + └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (event_type = 1:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamProject { exprs: [$expr5] } + └─StreamFilter { predicate: (count < 20:Int32) } + └─StreamHashAgg { group_key: [$expr5], aggs: [count] } + └─StreamExchange { dist: HashShard($expr5) } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] } + └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } + ├─StreamFilter { predicate: (event_type = 2:Int32) } + │ └─StreamShare { id: 5 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } + └─StreamNow { output: [now] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [$expr5] } - └── StreamFilter { predicate: (count < 20:Int32) } - └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 2]) from 1 Fragment 1 + StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [$expr5] } + └── StreamFilter { predicate: (count < 20:Int32) } + └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } └── StreamRowIdGen { row_id_index: 5 } └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] } └── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } ├── left table: 6 ├── right table: 7 ├── StreamFilter { predicate: (event_type = 2:Int32) } - │ └── StreamExchange NoShuffle from 2 - └── StreamExchange Broadcast from 4 + │ └── StreamExchange NoShuffle from 3 + └── StreamExchange Broadcast from 5 - Fragment 4 + Fragment 5 StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] } └── StreamNow { output: [now] } { state table: 8 } @@ -1841,7 +1866,7 @@ ├── columns: [ auction_id, auction_item_name, _row_id ] ├── primary key: [ $2 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 2 ] └── read pk prefix len hint: 2 - id: nexmark_q105 diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml index 3554e31d281ec..39adc39a16653 100644 --- a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml @@ -131,45 +131,49 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), $expr3(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, $expr3], pk_columns: [_row_id, _row_id#1, $expr3], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] } - ├─StreamExchange { dist: HashShard($expr3) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [event_type, person, auction, _row_id] } - │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamExchange { dist: HashShard($expr4) } - └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } - └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [event_type, person, auction, _row_id] } - └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard(_row_id, $expr3, _row_id) } + └─StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] } + ├─StreamExchange { dist: HashShard($expr3) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [event_type, person, auction, _row_id] } + │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr4) } + └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } + └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [event_type, person, auction, _row_id] } + └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), $expr3(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, $expr3], pk_columns: [_row_id, _row_id#1, $expr3], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([1]) from 1 - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([4, 5, 6]) from 1 Fragment 1 + StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([1]) from 2 + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, person, auction, _row_id] } └── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) } └── StreamRowIdGen { row_id_index: 5 } @@ -177,10 +181,10 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] } └── StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 1 ], read pk prefix len hint: 1 } @@ -194,7 +198,7 @@ Table 5 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ name, city, state, id, _row_id, $expr3, _row_id#1 ], primary key: [ $4 ASC, $6 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 5 ], read pk prefix len hint: 3 } + Table 4294967294 { columns: [ name, city, state, id, _row_id, $expr3, _row_id#1 ], primary key: [ $4 ASC, $6 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 4, 5, 6 ], read pk prefix len hint: 3 } eowc_stream_error: |- Not supported: The query cannot be executed in Emit-On-Window-Close mode. @@ -696,43 +700,48 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] } - └─StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] } - ├─StreamExchange { dist: HashShard($expr4) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: (event_type = 2:Int32) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamExchange { dist: HashShard(max($expr4)) } - └─StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] } - └─StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] } - └─StreamExchange { dist: HashShard($expr5) } - └─StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] } - └─StreamFilter { predicate: (event_type = 2:Int32) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr4, _row_id, $expr5) } + └─StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] } + ├─StreamExchange { dist: HashShard($expr4) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: (event_type = 2:Int32) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard(max($expr4)) } + └─StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] } + └─StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] } + └─StreamExchange { dist: HashShard($expr5) } + └─StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] } + └─StreamFilter { predicate: (event_type = 2:Int32) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 - StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] } { materialized table: 4294967294 } - └── StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] } - ├── left table: 0 - ├── right table: 2 - ├── left degree table: 1 - ├── right degree table: 3 - ├── StreamExchange Hash([2]) from 1 - └── StreamExchange Hash([1]) from 3 + StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] } + ├── materialized table: 4294967294 + └── StreamExchange Hash([1, 4, 5]) from 1 Fragment 1 - StreamNoOp - └── StreamExchange NoShuffle from 2 + StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] } + ├── left table: 0 + ├── right table: 2 + ├── left degree table: 1 + ├── right degree table: 3 + ├── StreamExchange Hash([2]) from 2 + └── StreamExchange Hash([1]) from 4 Fragment 2 + StreamNoOp + └── StreamExchange NoShuffle from 3 + + Fragment 3 StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: (event_type = 2:Int32) } └── StreamRowIdGen { row_id_index: 5 } @@ -740,14 +749,14 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] } └── StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] } { intermediate state table: 6, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 4 + └── StreamExchange Hash([0]) from 5 - Fragment 4 + Fragment 5 StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, $expr4, $expr1, _row_id ], primary key: [ $2 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4 ], distribution key: [ 2 ], read pk prefix len hint: 1 } @@ -763,7 +772,7 @@ Table 6 { columns: [ $expr5, max($expr4), count ], primary key: [ $0 ASC ], value indices: [ 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ auction, price, bidder, date_time, _row_id, $expr5 ], primary key: [ $4 ASC, $5 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 1 ], read pk prefix len hint: 3 } + Table 4294967294 { columns: [ auction, price, bidder, date_time, _row_id, $expr5 ], primary key: [ $4 ASC, $5 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 1, 4, 5 ], read pk prefix len hint: 3 } eowc_stream_plan: |- StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time] } @@ -845,52 +854,56 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime, $expr5(hidden), $expr6(hidden), $expr8(hidden)] } - └─StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } - ├─StreamExchange { dist: HashShard($expr3, $expr2, $expr5) } - │ └─StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } - │ └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) } - │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] } - │ └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] } - │ └─StreamFilter { predicate: (event_type = 0:Int32) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } - └─StreamExchange { dist: HashShard($expr7, $expr6, $expr8) } - └─StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] } - └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] } - └─StreamFilter { predicate: (event_type = 1:Int32) } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] } - └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) } + └─StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } + ├─StreamExchange { dist: HashShard($expr3, $expr2, $expr5) } + │ └─StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } + │ └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) } + │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] } + │ └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] } + │ └─StreamFilter { predicate: (event_type = 0:Int32) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } + └─StreamExchange { dist: HashShard($expr7, $expr6, $expr8) } + └─StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] } + └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] } + └─StreamFilter { predicate: (event_type = 1:Int32) } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] } + └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime, $expr5(hidden), $expr6(hidden), $expr8(hidden)] } ├── materialized table: 4294967294 - └── StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0, 2, 3]) from 1 - └── StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } { state table: 7 } - └── StreamExchange Hash([0, 1, 2]) from 4 + └── StreamExchange Hash([0, 1, 2, 3]) from 1 Fragment 1 - StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } { state table: 4 } - └── StreamExchange Hash([0, 1, 2, 3]) from 2 + StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0, 2, 3]) from 2 + └── StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } { state table: 7 } + └── StreamExchange Hash([0, 1, 2]) from 5 Fragment 2 + StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } { state table: 4 } + └── StreamExchange Hash([0, 1, 2, 3]) from 3 + + Fragment 3 StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] } └── StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] } └── StreamFilter { predicate: (event_type = 0:Int32) } - └── StreamExchange NoShuffle from 3 + └── StreamExchange NoShuffle from 4 - Fragment 3 + Fragment 4 StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) } └── StreamRowIdGen { row_id_index: 5 } @@ -898,11 +911,11 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 6 } - Fragment 4 + Fragment 5 StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] } └── StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 3 + └── StreamExchange NoShuffle from 4 Table 0 { columns: [ $expr3, $expr4, $expr2, $expr5 ], primary key: [ $2 ASC, $3 ASC, $0 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 } @@ -920,7 +933,7 @@ Table 7 { columns: [ $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 } - Table 4294967294 { columns: [ id, name, starttime, $expr5, $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 4 } + Table 4294967294 { columns: [ id, name, starttime, $expr5, $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 1, 2, 3 ], read pk prefix len hint: 4 } eowc_stream_plan: |- StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime] } @@ -1715,41 +1728,45 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck } - └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: (event_type = 2:Int32) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamExchange { dist: HashShard($expr7) } - └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] } - └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } - └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr2, _row_id, _row_id) } + └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: (event_type = 2:Int32) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr7) } + └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] } + └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } + └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 14, 15]) from 1 Fragment 1 + StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: (event_type = 2:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) } └── StreamRowIdGen { row_id_index: 5 } @@ -1757,10 +1774,10 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] } └── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, $expr4, $expr5, $expr6, $expr1, _row_id ], primary key: [ $0 ASC, $6 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 1 } @@ -1774,7 +1791,12 @@ Table 5 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ], primary key: [ $14 ASC, $15 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], distribution key: [ 0 ], read pk prefix len hint: 3 } + Table 4294967294 + ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ] + ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ] + ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] + ├── distribution key: [ 0, 14, 15 ] + └── read pk prefix len hint: 3 eowc_stream_error: |- Not supported: The query cannot be executed in Emit-On-Window-Close mode. @@ -1909,45 +1931,49 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr4(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (event_type = 1:Int32) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamProject { exprs: [$expr4, max($expr5)] } - └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } - └─StreamExchange { dist: HashShard($expr4) } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] } - └─StreamFilter { predicate: (event_type = 2:Int32) } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [event_type, auction, bid, _row_id] } - └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr2, _row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (event_type = 1:Int32) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamProject { exprs: [$expr4, max($expr5)] } + └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } + └─StreamExchange { dist: HashShard($expr4) } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] } + └─StreamFilter { predicate: (event_type = 2:Int32) } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [event_type, auction, bid, _row_id] } + └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr4(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [$expr4, max($expr5)] } - └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } { intermediate state table: 6, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 3]) from 1 Fragment 1 + StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [$expr4, max($expr5)] } + └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } { intermediate state table: 6, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, _row_id] } └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } └── StreamRowIdGen { row_id_index: 5 } @@ -1955,10 +1981,10 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] } └── StreamFilter { predicate: (event_type = 2:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 } @@ -1978,7 +2004,7 @@ ├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, $expr4 ] ├── primary key: [ $3 ASC, $0 ASC ] ├── value indices: [ 0, 1, 2, 3, 4 ] - ├── distribution key: [ 0 ] + ├── distribution key: [ 0, 3 ] └── read pk prefix len hint: 2 eowc_stream_error: |- @@ -2184,47 +2210,51 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (event_type = 1:Int32) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamProject { exprs: [$expr4] } - └─StreamFilter { predicate: (count >= 20:Int32) } - └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } - └─StreamExchange { dist: HashShard($expr4) } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] } - └─StreamFilter { predicate: (event_type = 2:Int32) } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [event_type, auction, bid, _row_id] } - └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr2, _row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (event_type = 1:Int32) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamProject { exprs: [$expr4] } + └─StreamFilter { predicate: (count >= 20:Int32) } + └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } + └─StreamExchange { dist: HashShard($expr4) } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] } + └─StreamFilter { predicate: (event_type = 2:Int32) } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [event_type, auction, bid, _row_id] } + └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [$expr4] } - └── StreamFilter { predicate: (count >= 20:Int32) } - └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 2]) from 1 Fragment 1 + StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [$expr4] } + └── StreamFilter { predicate: (count >= 20:Int32) } + └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, _row_id] } └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } └── StreamRowIdGen { row_id_index: 5 } @@ -2232,10 +2262,10 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] } └── StreamFilter { predicate: (event_type = 2:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 } @@ -2251,7 +2281,7 @@ Table 6 { columns: [ $expr4, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 2 } + Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 2 ], read pk prefix len hint: 2 } eowc_stream_error: |- Not supported: The query cannot be executed in Emit-On-Window-Close mode. @@ -2290,47 +2320,51 @@ └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } - ├─StreamExchange { dist: HashShard($expr2) } - │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } - │ └─StreamFilter { predicate: (event_type = 1:Int32) } - │ └─StreamShare { id: 6 } - │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] } - │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - │ └─StreamRowIdGen { row_id_index: 5 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } - └─StreamProject { exprs: [$expr4] } - └─StreamFilter { predicate: (count < 20:Int32) } - └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } - └─StreamExchange { dist: HashShard($expr4) } - └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] } - └─StreamFilter { predicate: (event_type = 2:Int32) } - └─StreamShare { id: 6 } - └─StreamProject { exprs: [event_type, auction, bid, _row_id] } - └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } - └─StreamRowIdGen { row_id_index: 5 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } - └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } - └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamExchange { dist: HashShard($expr2, _row_id) } + └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } + ├─StreamExchange { dist: HashShard($expr2) } + │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } + │ └─StreamFilter { predicate: (event_type = 1:Int32) } + │ └─StreamShare { id: 6 } + │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] } + │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + │ └─StreamRowIdGen { row_id_index: 5 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } + └─StreamProject { exprs: [$expr4] } + └─StreamFilter { predicate: (count < 20:Int32) } + └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } + └─StreamExchange { dist: HashShard($expr4) } + └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] } + └─StreamFilter { predicate: (event_type = 2:Int32) } + └─StreamShare { id: 6 } + └─StreamProject { exprs: [event_type, auction, bid, _row_id] } + └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } + └─StreamRowIdGen { row_id_index: 5 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] } + └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } + └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [$expr4] } - └── StreamFilter { predicate: (count < 20:Int32) } - └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } - └── StreamExchange Hash([0]) from 3 + └── StreamExchange Hash([0, 2]) from 1 Fragment 1 + StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [$expr4] } + └── StreamFilter { predicate: (count < 20:Int32) } + └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] } + └── StreamExchange Hash([0]) from 4 + + Fragment 2 StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] } └── StreamFilter { predicate: (event_type = 1:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 2 + Fragment 3 StreamProject { exprs: [event_type, auction, bid, _row_id] } └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) } └── StreamRowIdGen { row_id_index: 5 } @@ -2338,10 +2372,10 @@ └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] } └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 } - Fragment 3 + Fragment 4 StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] } └── StreamFilter { predicate: (event_type = 2:Int32) } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 } @@ -2357,7 +2391,7 @@ Table 6 { columns: [ $expr4, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 2 } + Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 2 ], read pk prefix len hint: 2 } eowc_stream_error: |- Not supported: The query cannot be executed in Emit-On-Window-Close mode. diff --git a/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml b/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml index 5cc81578f829c..733a19f4ba05c 100644 --- a/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml +++ b/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml @@ -185,15 +185,16 @@ └─BatchScan { table: t, columns: [t.x, t.y, t.w], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [x, y, sum, max, min, t._row_id(hidden), t.y(hidden)], stream_key: [t._row_id, y], pk_columns: [t._row_id, y], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.y = t.y, output: [t.x, t.y, sum(t.x), max(t.x), min(t.w), t._row_id, t.y] } - ├─StreamExchange { dist: HashShard(t.y) } - │ └─StreamShare { id: 1 } - │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamProject { exprs: [t.y, sum(t.x), max(t.x), min(t.w)] } - └─StreamHashAgg { group_key: [t.y], aggs: [sum(t.x), max(t.x), min(t.w), count] } - └─StreamExchange { dist: HashShard(t.y) } - └─StreamShare { id: 1 } - └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.y, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.y = t.y, output: [t.x, t.y, sum(t.x), max(t.x), min(t.w), t._row_id, t.y] } + ├─StreamExchange { dist: HashShard(t.y) } + │ └─StreamShare { id: 1 } + │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamProject { exprs: [t.y, sum(t.x), max(t.x), min(t.w)] } + └─StreamHashAgg { group_key: [t.y], aggs: [sum(t.x), max(t.x), min(t.w), count] } + └─StreamExchange { dist: HashShard(t.y) } + └─StreamShare { id: 1 } + └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - id: aggregate with over clause, rows frame definition with implicit current row, without ORDER BY sql: | create table t(x int, y int); @@ -913,12 +914,13 @@ └─BatchScan { table: t, columns: [t.x, t.y, t.z], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [t1x, t2x, t1z, t2y, t2z, t._row_id(hidden)], stream_key: [t1x, t._row_id], pk_columns: [t1x, t._row_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.x = t.x, output: [t.x, t.x, t.z, t.y, t.z, t._row_id] } - ├─StreamGroupTopN { order: [t.y ASC], limit: 1, offset: 0, group_key: [t.x] } - │ └─StreamExchange { dist: HashShard(t.x) } - │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(t.x) } - └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.x, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.x = t.x, output: [t.x, t.x, t.z, t.y, t.z, t._row_id] } + ├─StreamGroupTopN { order: [t.y ASC], limit: 1, offset: 0, group_key: [t.x] } + │ └─StreamExchange { dist: HashShard(t.x) } + │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.x) } + └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - id: split calls with different ORDER BY or PARTITION BY sql: | create table t(x int, y int, z int); diff --git a/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml b/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml index 55131ed1614cd..65469e7754e6b 100644 --- a/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml +++ b/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml @@ -21,15 +21,16 @@ Tone.id = Ttwo.id; stream_plan: |- StreamMaterialize { columns: [max_v1, max_v2, t1.id(hidden), t2.id(hidden)], stream_key: [t1.id], pk_columns: [t1.id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.id = t2.id, output: [max(t1.v1), max(t2.v2), t1.id, t2.id] } - ├─StreamProject { exprs: [t1.id, max(t1.v1)] } - │ └─StreamHashAgg { group_key: [t1.id], aggs: [max(t1.v1), count] } - │ └─StreamExchange { dist: HashShard(t1.id) } - │ └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamProject { exprs: [t2.id, max(t2.v2)] } - └─StreamHashAgg { group_key: [t2.id], aggs: [max(t2.v2), count] } - └─StreamExchange { dist: HashShard(t2.id) } - └─StreamTableScan { table: t2, columns: [t2.id, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.id) } + └─StreamHashJoin { type: Inner, predicate: t1.id = t2.id, output: [max(t1.v1), max(t2.v2), t1.id, t2.id] } + ├─StreamProject { exprs: [t1.id, max(t1.v1)] } + │ └─StreamHashAgg { group_key: [t1.id], aggs: [max(t1.v1), count] } + │ └─StreamExchange { dist: HashShard(t1.id) } + │ └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamProject { exprs: [t2.id, max(t2.v2)] } + └─StreamHashAgg { group_key: [t2.id], aggs: [max(t2.v2), count] } + └─StreamExchange { dist: HashShard(t2.id) } + └─StreamTableScan { table: t2, columns: [t2.id, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - sql: | create table t (id int, v int); SELECT Tone.max_v, Ttwo.min_v @@ -51,15 +52,16 @@ Tone.id = Ttwo.id; stream_plan: |- StreamMaterialize { columns: [max_v, min_v, t.id(hidden), t.id#1(hidden)], stream_key: [t.id], pk_columns: [t.id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.id = t.id, output: [max(t.v), min(t.v), t.id, t.id] } - ├─StreamProject { exprs: [t.id, max(t.v)] } - │ └─StreamHashAgg { group_key: [t.id], aggs: [max(t.v), count] } - │ └─StreamExchange { dist: HashShard(t.id) } - │ └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamProject { exprs: [t.id, min(t.v)] } - └─StreamHashAgg { group_key: [t.id], aggs: [min(t.v), count] } - └─StreamExchange { dist: HashShard(t.id) } - └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.id) } + └─StreamHashJoin { type: Inner, predicate: t.id = t.id, output: [max(t.v), min(t.v), t.id, t.id] } + ├─StreamProject { exprs: [t.id, max(t.v)] } + │ └─StreamHashAgg { group_key: [t.id], aggs: [max(t.v), count] } + │ └─StreamExchange { dist: HashShard(t.id) } + │ └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamProject { exprs: [t.id, min(t.v)] } + └─StreamHashAgg { group_key: [t.id], aggs: [min(t.v), count] } + └─StreamExchange { dist: HashShard(t.id) } + └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - sql: | create table t (v1 varchar, v2 varchar, v3 varchar); select diff --git a/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml b/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml index ae37459ef7bed..91dff73df0e6a 100644 --- a/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml +++ b/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml @@ -260,15 +260,16 @@ └─LogicalScan { table: t2, columns: [t2.v2], predicate: (t2.v2 > ('2021-04-01 00:00:00+00:00':Timestamptz + '01:00:00':Interval)) } stream_plan: |- StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1], pk_columns: [t1._row_id, t2._row_id, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true } - │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamProject { exprs: [AddWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: HashShard(t2.v2) } - └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true } + │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamProject { exprs: [AddWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard(t2.v2) } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: now() in a complex cmp expr does not get pushed down sql: | create table t1(v1 timestamp with time zone); @@ -343,14 +344,15 @@ └─LogicalScan { table: t2, columns: [t2.v2], predicate: (t2.v2 > '2021-04-01 00:00:00+00:00':Timestamptz) } stream_plan: |- StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1], pk_columns: [t1._row_id, t2._row_id, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamDynamicFilter { predicate: (t1.v1 > now), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true } - │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: HashShard(t2.v2) } - └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamDynamicFilter { predicate: (t1.v1 > now), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true } + │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard(t2.v2) } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: eq-predicate derived condition is banned for mismatching types sql: | create table t1(v1 int, v2 int); diff --git a/src/frontend/planner_test/tests/testdata/output/project_set.yaml b/src/frontend/planner_test/tests/testdata/output/project_set.yaml index 23db668a070df..676772d99d72e 100644 --- a/src/frontend/planner_test/tests/testdata/output/project_set.yaml +++ b/src/frontend/planner_test/tests/testdata/output/project_set.yaml @@ -155,17 +155,18 @@ └─BatchScan { table: t, columns: [t.x], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [unnest, t._row_id(hidden), projected_row_id(hidden), t._row_id#1(hidden), projected_row_id#1(hidden)], stream_key: [t._row_id, projected_row_id, t._row_id#1, projected_row_id#1, unnest], pk_columns: [t._row_id, projected_row_id, t._row_id#1, projected_row_id#1, unnest], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id] } - ├─StreamExchange { dist: HashShard(Unnest($0)) } - │ └─StreamShare { id: 3 } - │ └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } - │ └─StreamProjectSet { select_list: [Unnest($0), $1] } - │ └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(Unnest($0)) } - └─StreamShare { id: 3 } - └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } - └─StreamProjectSet { select_list: [Unnest($0), $1] } - └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id) } + └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id] } + ├─StreamExchange { dist: HashShard(Unnest($0)) } + │ └─StreamShare { id: 3 } + │ └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } + │ └─StreamProjectSet { select_list: [Unnest($0), $1] } + │ └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(Unnest($0)) } + └─StreamShare { id: 3 } + └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] } + └─StreamProjectSet { select_list: [Unnest($0), $1] } + └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: issue-10080 sql: | with cte as (SELECT 1 as v1, unnest(array[1,2,3,4,5]) AS v2) select v1 from cte; diff --git a/src/frontend/planner_test/tests/testdata/output/select_except.yaml b/src/frontend/planner_test/tests/testdata/output/select_except.yaml index 2193524b7076f..ffd6da30b90bc 100644 --- a/src/frontend/planner_test/tests/testdata/output/select_except.yaml +++ b/src/frontend/planner_test/tests/testdata/output/select_except.yaml @@ -34,11 +34,12 @@ └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v3, v2, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v1], pk_columns: [t._row_id, t._row_id#1, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v3, t.v2, t._row_id, t._row_id] } - ├─StreamExchange { dist: HashShard(t.v1) } - │ └─StreamTableScan { table: t, columns: [t.v1, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(t.v1) } - └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v3, t.v2, t._row_id, t._row_id] } + ├─StreamExchange { dist: HashShard(t.v1) } + │ └─StreamTableScan { table: t, columns: [t.v1, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v1) } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: qualified wildcard sql: | create table t (v1 int, v2 int, v3 int); @@ -52,11 +53,12 @@ └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, v2, v3, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v1], pk_columns: [t._row_id, t._row_id#1, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v2, t.v3, t._row_id, t._row_id] } - ├─StreamExchange { dist: HashShard(t.v1) } - │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - └─StreamExchange { dist: HashShard(t.v1) } - └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) } + └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v2, t.v3, t._row_id, t._row_id] } + ├─StreamExchange { dist: HashShard(t.v1) } + │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } + └─StreamExchange { dist: HashShard(t.v1) } + └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } - name: except with unknown column sql: | create table t (v1 int, v2 int, v3 int); diff --git a/src/frontend/planner_test/tests/testdata/output/share.yaml b/src/frontend/planner_test/tests/testdata/output/share.yaml index 2815b00784b1d..15404d6d863ab 100644 --- a/src/frontend/planner_test/tests/testdata/output/share.yaml +++ b/src/frontend/planner_test/tests/testdata/output/share.yaml @@ -343,24 +343,25 @@ └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [a_id, b_id, a_ts, b_ts, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, a_id], pk_columns: [_row_id, _row_id#1, a_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: id = id, output: [id, id, date_time, date_time, _row_id, _row_id] } - ├─StreamExchange { dist: HashShard(id) } - │ └─StreamProject { exprs: [id, date_time, _row_id] } - │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [id, date_time, $expr1, _row_id], cleaned_by_watermark: true } - │ ├─StreamProject { exprs: [id, date_time, AtTimeZone(date_time, 'UTC':Varchar) as $expr1, _row_id] } - │ │ └─StreamFilter { predicate: (initial_bid = 1:Int32) } - │ │ └─StreamShare { id: 4 } - │ │ └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] } - │ │ └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } - │ │ └─StreamRowIdGen { row_id_index: 10 } - │ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:00:01':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: HashShard(id) } - └─StreamFilter { predicate: (initial_bid = 2:Int32) } - └─StreamShare { id: 4 } - └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] } - └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } - └─StreamRowIdGen { row_id_index: 10 } - └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + └─StreamExchange { dist: HashShard(id, _row_id, _row_id) } + └─StreamHashJoin { type: Inner, predicate: id = id, output: [id, id, date_time, date_time, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard(id) } + │ └─StreamProject { exprs: [id, date_time, _row_id] } + │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [id, date_time, $expr1, _row_id], cleaned_by_watermark: true } + │ ├─StreamProject { exprs: [id, date_time, AtTimeZone(date_time, 'UTC':Varchar) as $expr1, _row_id] } + │ │ └─StreamFilter { predicate: (initial_bid = 1:Int32) } + │ │ └─StreamShare { id: 4 } + │ │ └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] } + │ │ └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } + │ │ └─StreamRowIdGen { row_id_index: 10 } + │ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:00:01':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard(id) } + └─StreamFilter { predicate: (initial_bid = 2:Int32) } + └─StreamShare { id: 4 } + └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] } + └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) } + └─StreamRowIdGen { row_id_index: 10 } + └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } diff --git a/src/frontend/planner_test/tests/testdata/output/shared_views.yaml b/src/frontend/planner_test/tests/testdata/output/shared_views.yaml index 775812f77b59c..3777705c97ced 100644 --- a/src/frontend/planner_test/tests/testdata/output/shared_views.yaml +++ b/src/frontend/planner_test/tests/testdata/output/shared_views.yaml @@ -23,19 +23,20 @@ └─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] } stream_plan: |- StreamMaterialize { columns: [z, a, b, t1._row_id(hidden), t1._row_id#1(hidden), t1._row_id#2(hidden), t1.x(hidden)], stream_key: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, z], pk_columns: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, z], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x] } - ├─StreamExchange { dist: HashShard($expr1) } - │ └─StreamShare { id: 3 } - │ └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] } - │ └─StreamFilter { predicate: (t1.y > 0:Int32) } - │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard($expr2) } - └─StreamProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3, t1._row_id, t1._row_id, t1.x] } - └─StreamHashJoin { type: Inner, predicate: t1.x = $expr1, output: [t1.x, t1.y, $expr1, t1._row_id, t1._row_id] } - ├─StreamExchange { dist: HashShard(t1.x) } - │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard($expr1) } - └─StreamShare { id: 3 } - └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] } - └─StreamFilter { predicate: (t1.y > 0:Int32) } - └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard($expr1, t1._row_id, t1._row_id, t1._row_id, t1.x) } + └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x] } + ├─StreamExchange { dist: HashShard($expr1) } + │ └─StreamShare { id: 3 } + │ └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] } + │ └─StreamFilter { predicate: (t1.y > 0:Int32) } + │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard($expr2) } + └─StreamProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3, t1._row_id, t1._row_id, t1.x] } + └─StreamHashJoin { type: Inner, predicate: t1.x = $expr1, output: [t1.x, t1.y, $expr1, t1._row_id, t1._row_id] } + ├─StreamExchange { dist: HashShard(t1.x) } + │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard($expr1) } + └─StreamShare { id: 3 } + └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] } + └─StreamFilter { predicate: (t1.y > 0:Int32) } + └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/subquery.yaml b/src/frontend/planner_test/tests/testdata/output/subquery.yaml index 003e4185ada81..e07e84e040929 100644 --- a/src/frontend/planner_test/tests/testdata/output/subquery.yaml +++ b/src/frontend/planner_test/tests/testdata/output/subquery.yaml @@ -346,21 +346,22 @@ └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [date_time, window_start, window_end, auction._row_id(hidden)], stream_key: [auction._row_id, window_start, window_end, date_time], pk_columns: [auction._row_id, window_start, window_end, date_time], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all } - ├─StreamExchange { dist: HashShard(auction.date_time) } - │ └─StreamShare { id: 3 } - │ └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] } - │ └─StreamFilter { predicate: IsNotNull(auction.date_time) } - │ └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) } - └─StreamProject { exprs: [auction.date_time] } - └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] } - └─StreamProject { exprs: [auction.date_time] } - └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] } - └─StreamExchange { dist: HashShard(auction.date_time) } - └─StreamShare { id: 3 } - └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] } - └─StreamFilter { predicate: IsNotNull(auction.date_time) } - └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) } + └─StreamExchange { dist: HashShard(auction.date_time, window_start, window_end, auction._row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all } + ├─StreamExchange { dist: HashShard(auction.date_time) } + │ └─StreamShare { id: 3 } + │ └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] } + │ └─StreamFilter { predicate: IsNotNull(auction.date_time) } + │ └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) } + └─StreamProject { exprs: [auction.date_time] } + └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] } + └─StreamProject { exprs: [auction.date_time] } + └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] } + └─StreamExchange { dist: HashShard(auction.date_time) } + └─StreamShare { id: 3 } + └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] } + └─StreamFilter { predicate: IsNotNull(auction.date_time) } + └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) } - sql: | CREATE TABLE t (v int); SELECT 1 FROM t AS t_inner WHERE EXISTS ( SELECT 1 HAVING t_inner.v > 1); @@ -535,22 +536,23 @@ └─BatchScan { table: t, columns: [t.x], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [x, y, k, sum_x, t.x(hidden)], stream_key: [k, x], pk_columns: [k, x], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, t.y, t.k, sum(Unnest($0)), t.x] } - ├─StreamExchange { dist: HashShard(t.x) } - │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) } - └─StreamProject { exprs: [t.x, sum(Unnest($0))] } - └─StreamHashAgg { group_key: [t.x], aggs: [sum(Unnest($0)), count] } - └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, Unnest($0), t.x, projected_row_id] } - ├─StreamProject { exprs: [t.x] } - │ └─StreamHashAgg { group_key: [t.x], aggs: [count] } - │ └─StreamExchange { dist: HashShard(t.x) } - │ └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) } - └─StreamProject { exprs: [t.x, Unnest($0), projected_row_id] } - └─StreamProjectSet { select_list: [$0, Unnest($0)] } - └─StreamProject { exprs: [t.x] } - └─StreamHashAgg { group_key: [t.x], aggs: [count] } - └─StreamExchange { dist: HashShard(t.x) } - └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) } + └─StreamExchange { dist: HashShard(t.x, t.k) } + └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, t.y, t.k, sum(Unnest($0)), t.x] } + ├─StreamExchange { dist: HashShard(t.x) } + │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) } + └─StreamProject { exprs: [t.x, sum(Unnest($0))] } + └─StreamHashAgg { group_key: [t.x], aggs: [sum(Unnest($0)), count] } + └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, Unnest($0), t.x, projected_row_id] } + ├─StreamProject { exprs: [t.x] } + │ └─StreamHashAgg { group_key: [t.x], aggs: [count] } + │ └─StreamExchange { dist: HashShard(t.x) } + │ └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) } + └─StreamProject { exprs: [t.x, Unnest($0), projected_row_id] } + └─StreamProjectSet { select_list: [$0, Unnest($0)] } + └─StreamProject { exprs: [t.x] } + └─StreamHashAgg { group_key: [t.x], aggs: [count] } + └─StreamExchange { dist: HashShard(t.x) } + └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) } - name: CorrelatedInputRef in ProjectSet and apply on condition is true. sql: | create table t(x int[], y int[], k int primary key); @@ -632,16 +634,17 @@ └─BatchScan { table: integers, columns: [integers.i], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [i, col, integers._row_id(hidden), integers.i(hidden)], stream_key: [integers._row_id, i], pk_columns: [i, integers._row_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, row_number, integers._row_id, integers.i] } - ├─StreamExchange { dist: HashShard(integers.i) } - │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] } - └─StreamProject { exprs: [integers.i, row_number, integers._row_id] } - └─StreamOverWindow { window_functions: [row_number() OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] } - └─StreamExchange { dist: HashShard(integers.i) } - └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] } - └─StreamFilter { predicate: IsNotNull(integers.i) } - └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamExchange { dist: HashShard(integers.i, integers._row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, row_number, integers._row_id, integers.i] } + ├─StreamExchange { dist: HashShard(integers.i) } + │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] } + └─StreamProject { exprs: [integers.i, row_number, integers._row_id] } + └─StreamOverWindow { window_functions: [row_number() OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] } + └─StreamExchange { dist: HashShard(integers.i) } + └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] } + └─StreamFilter { predicate: IsNotNull(integers.i) } + └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - name: test over window subquery 2 (with nested loop join so cannot be transformed into a stream plan) sql: | CREATE TABLE integers(i INTEGER); @@ -690,16 +693,17 @@ └─BatchScan { table: integers, columns: [integers.i], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [i, col, integers._row_id(hidden), integers.i(hidden)], stream_key: [integers._row_id, i], pk_columns: [i, integers._row_id], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, sum, integers._row_id, integers.i] } - ├─StreamExchange { dist: HashShard(integers.i) } - │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] } - └─StreamProject { exprs: [integers.i, sum, integers._row_id] } - └─StreamOverWindow { window_functions: [sum(integers.i) OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] } - └─StreamExchange { dist: HashShard(integers.i) } - └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] } - └─StreamFilter { predicate: IsNotNull(integers.i) } - └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamExchange { dist: HashShard(integers.i, integers._row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, sum, integers._row_id, integers.i] } + ├─StreamExchange { dist: HashShard(integers.i) } + │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] } + └─StreamProject { exprs: [integers.i, sum, integers._row_id] } + └─StreamOverWindow { window_functions: [sum(integers.i) OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] } + └─StreamExchange { dist: HashShard(integers.i) } + └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] } + └─StreamFilter { predicate: IsNotNull(integers.i) } + └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - name: test over window subquery 4 (with nested loop join so cannot be transformed into a stream plan) sql: | CREATE TABLE integers(i INTEGER); @@ -747,17 +751,18 @@ └─BatchScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [i, integers._row_id(hidden), $expr1(hidden), integers.correlated_col(hidden)], stream_key: [integers._row_id, $expr1, integers.correlated_col], pk_columns: [integers._row_id, $expr1, integers.correlated_col], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: $expr1 = sum AND integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.i, integers._row_id, $expr1, integers.correlated_col] } - ├─StreamExchange { dist: HashShard(integers.correlated_col, $expr1) } - │ └─StreamProject { exprs: [integers.i, integers.correlated_col, integers.i::Int64 as $expr1, integers._row_id] } - │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - └─StreamExchange { dist: HashShard(rows.correlated_col, sum) } - └─StreamProject { exprs: [rows.correlated_col, sum, rows._row_id, rows.k] } - └─StreamOverWindow { window_functions: [sum(rows.v) OVER(PARTITION BY rows.correlated_col, rows.k ORDER BY rows.v ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] } - └─StreamExchange { dist: HashShard(rows.correlated_col, rows.k) } - └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] } - └─StreamFilter { predicate: IsNotNull(rows.correlated_col) } - └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) } + └─StreamExchange { dist: HashShard(integers._row_id, $expr1, integers.correlated_col) } + └─StreamHashJoin { type: LeftSemi, predicate: $expr1 = sum AND integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.i, integers._row_id, $expr1, integers.correlated_col] } + ├─StreamExchange { dist: HashShard(integers.correlated_col, $expr1) } + │ └─StreamProject { exprs: [integers.i, integers.correlated_col, integers.i::Int64 as $expr1, integers._row_id] } + │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamExchange { dist: HashShard(rows.correlated_col, sum) } + └─StreamProject { exprs: [rows.correlated_col, sum, rows._row_id, rows.k] } + └─StreamOverWindow { window_functions: [sum(rows.v) OVER(PARTITION BY rows.correlated_col, rows.k ORDER BY rows.v ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] } + └─StreamExchange { dist: HashShard(rows.correlated_col, rows.k) } + └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] } + └─StreamFilter { predicate: IsNotNull(rows.correlated_col) } + └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) } - name: test cardinality visitor with correlated filter sql: | CREATE TABLE t1(i INT); @@ -818,21 +823,22 @@ └─BatchScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [i, correlated_col, integers._row_id(hidden), 2:Int64(hidden)], stream_key: [integers._row_id, correlated_col, 2:Int64], pk_columns: [integers._row_id, correlated_col, 2:Int64], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: integers.correlated_col IS NOT DISTINCT FROM integers.correlated_col AND 2:Int64 = $expr1, output: [integers.i, integers.correlated_col, integers._row_id, 2:Int64] } - ├─StreamExchange { dist: HashShard(integers.correlated_col) } - │ └─StreamProject { exprs: [integers.i, integers.correlated_col, 2:Int64, integers._row_id] } - │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - └─StreamProject { exprs: [integers.correlated_col, (count(distinct rows.k) + count(distinct rows.v)) as $expr1] } - └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count(distinct rows.k), count(distinct rows.v), count] } - └─StreamHashJoin { type: LeftOuter, predicate: integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.correlated_col, rows.k, rows.v, rows._row_id] } - ├─StreamProject { exprs: [integers.correlated_col] } - │ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count] } - │ └─StreamExchange { dist: HashShard(integers.correlated_col) } - │ └─StreamTableScan { table: integers, columns: [integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } - └─StreamExchange { dist: HashShard(rows.correlated_col) } - └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] } - └─StreamFilter { predicate: IsNotNull(rows.correlated_col) } - └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) } + └─StreamExchange { dist: HashShard(integers.correlated_col, integers._row_id, 2:Int64) } + └─StreamHashJoin { type: LeftSemi, predicate: integers.correlated_col IS NOT DISTINCT FROM integers.correlated_col AND 2:Int64 = $expr1, output: [integers.i, integers.correlated_col, integers._row_id, 2:Int64] } + ├─StreamExchange { dist: HashShard(integers.correlated_col) } + │ └─StreamProject { exprs: [integers.i, integers.correlated_col, 2:Int64, integers._row_id] } + │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamProject { exprs: [integers.correlated_col, (count(distinct rows.k) + count(distinct rows.v)) as $expr1] } + └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count(distinct rows.k), count(distinct rows.v), count] } + └─StreamHashJoin { type: LeftOuter, predicate: integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.correlated_col, rows.k, rows.v, rows._row_id] } + ├─StreamProject { exprs: [integers.correlated_col] } + │ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count] } + │ └─StreamExchange { dist: HashShard(integers.correlated_col) } + │ └─StreamTableScan { table: integers, columns: [integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) } + └─StreamExchange { dist: HashShard(rows.correlated_col) } + └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] } + └─StreamFilter { predicate: IsNotNull(rows.correlated_col) } + └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) } - name: test hop window subquery 1 sql: | create table t1 (k int primary key, ts timestamp); @@ -848,12 +854,13 @@ └─BatchValues { rows: [[1:Int32], [2:Int32]] } stream_plan: |- StreamMaterialize { columns: [col, k, ts, window_start, window_end], stream_key: [col, window_start, window_end], pk_columns: [col, window_start, window_end], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: 1:Int32 = t1.k, output: all } - ├─StreamAppendOnlyDedup { dedup_cols: [1:Int32] } - │ └─StreamExchange { dist: HashShard(1:Int32) } - │ └─StreamProject { exprs: [1:Int32] } - │ └─StreamValues { rows: [[1:Int32, 0:Int64], [2:Int32, 1:Int64]] } - └─StreamExchange { dist: HashShard(t1.k) } - └─StreamHopWindow { time_col: t1.ts, slide: 00:10:00, size: 00:30:00, output: all } - └─StreamFilter { predicate: IsNotNull(t1.ts) } - └─StreamTableScan { table: t1, columns: [t1.k, t1.ts], pk: [t1.k], dist: UpstreamHashShard(t1.k) } + └─StreamExchange { dist: HashShard(1:Int32, window_start, window_end) } + └─StreamHashJoin { type: Inner, predicate: 1:Int32 = t1.k, output: all } + ├─StreamAppendOnlyDedup { dedup_cols: [1:Int32] } + │ └─StreamExchange { dist: HashShard(1:Int32) } + │ └─StreamProject { exprs: [1:Int32] } + │ └─StreamValues { rows: [[1:Int32, 0:Int64], [2:Int32, 1:Int64]] } + └─StreamExchange { dist: HashShard(t1.k) } + └─StreamHopWindow { time_col: t1.ts, slide: 00:10:00, size: 00:30:00, output: all } + └─StreamFilter { predicate: IsNotNull(t1.ts) } + └─StreamTableScan { table: t1, columns: [t1.k, t1.ts], pk: [t1.k], dist: UpstreamHashShard(t1.k) } diff --git a/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml b/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml index 3b84def33abfa..0d393c378ff85 100644 --- a/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml +++ b/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml @@ -717,15 +717,16 @@ └─BatchScan { table: t2, columns: [t2.x], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [x, y, t1._row_id(hidden)], stream_key: [t1._row_id, x], pk_columns: [t1._row_id, x], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: t1.x IS NOT DISTINCT FROM t2.x, output: all } - ├─StreamExchange { dist: HashShard(t1.x) } - │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamProject { exprs: [t2.x] } - └─StreamGroupTopN { order: [t2.x ASC], limit: 1, offset: 0, group_key: [t2.x] } - └─StreamExchange { dist: HashShard(t2.x) } - └─StreamProject { exprs: [t2.x, t2.x, t2._row_id] } - └─StreamFilter { predicate: IsNotNull(t2.x) } - └─StreamTableScan { table: t2, columns: [t2.x, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.x, t1._row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: t1.x IS NOT DISTINCT FROM t2.x, output: all } + ├─StreamExchange { dist: HashShard(t1.x) } + │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamProject { exprs: [t2.x] } + └─StreamGroupTopN { order: [t2.x ASC], limit: 1, offset: 0, group_key: [t2.x] } + └─StreamExchange { dist: HashShard(t2.x) } + └─StreamProject { exprs: [t2.x, t2.x, t2._row_id] } + └─StreamFilter { predicate: IsNotNull(t2.x) } + └─StreamTableScan { table: t2, columns: [t2.x, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - sql: | create table t1(x int, y int); create table t2(x int, y int); @@ -883,14 +884,15 @@ └─BatchScan { table: t2, columns: [t2.v2, t2.k2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1, k1], pk_columns: [t1._row_id, v1, k1], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all } - ├─StreamExchange { dist: HashShard(t1.k1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [t2.k2] } - └─StreamExchange { dist: HashShard(t2.k2) } - └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] } - └─StreamFilter { predicate: IsNotNull(t2.k2) } - └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1.k1, t1._row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all } + ├─StreamExchange { dist: HashShard(t1.k1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [t2.k2] } + └─StreamExchange { dist: HashShard(t2.k2) } + └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] } + └─StreamFilter { predicate: IsNotNull(t2.k2) } + └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: test ApplyTopNTransposeRule case 2 sql: | create table t1 (v1 int, k1 int); @@ -908,16 +910,17 @@ └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1], pk_columns: [t1._row_id, v1], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2, output: all } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v2) } - └─StreamProject { exprs: [t2.v2, t2._row_id] } - └─StreamTopN { order: [t2.v2 ASC], limit: 1, offset: 0 } - └─StreamExchange { dist: Single } - └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [$expr1] } - └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } - └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2, output: all } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v2) } + └─StreamProject { exprs: [t2.v2, t2._row_id] } + └─StreamTopN { order: [t2.v2 ASC], limit: 1, offset: 0 } + └─StreamExchange { dist: Single } + └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [$expr1] } + └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] } + └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: test ApplyLimitTransposeRule case 1 sql: | create table t1 (v1 int, k1 int); @@ -935,11 +938,12 @@ └─BatchScan { table: t2, columns: [t2.v2, t2.k2], distribution: SomeShard } stream_plan: |- StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1, k1], pk_columns: [t1._row_id, v1, k1], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all } - ├─StreamExchange { dist: HashShard(t1.k1) } - │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamGroupTopN { order: [t2.k2 ASC], limit: 1, offset: 0, group_key: [t2.k2] } - └─StreamExchange { dist: HashShard(t2.k2) } - └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] } - └─StreamFilter { predicate: IsNotNull(t2.k2) } - └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1.k1, t1._row_id) } + └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all } + ├─StreamExchange { dist: HashShard(t1.k1) } + │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamGroupTopN { order: [t2.k2 ASC], limit: 1, offset: 0, group_key: [t2.k2] } + └─StreamExchange { dist: HashShard(t2.k2) } + └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] } + └─StreamFilter { predicate: IsNotNull(t2.k2) } + └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml index 6673d86fd9745..29e391853cf8a 100644 --- a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml +++ b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml @@ -122,19 +122,20 @@ select * from t1 join t2 on a = b AND ta < now() - interval '1 hour' and ta >= now() - interval '2 hour'; stream_plan: |- StreamMaterialize { columns: [a, ta, b, tb, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, a], pk_columns: [t1._row_id, t2._row_id, a], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.a) } - │ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] } - │ ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true } - │ │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ │ └─StreamExchange { dist: Broadcast } - │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } - │ │ └─StreamNow { output: [now] } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: HashShard(t2.b) } - └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.a, t1._row_id, t2._row_id) } + └─StreamHashJoin { type: Inner, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.a) } + │ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] } + │ ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true } + │ │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ │ └─StreamExchange { dist: Broadcast } + │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } + │ │ └─StreamNow { output: [now] } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard(t2.b) } + └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: Temporal filter in on clause for left join's left side sql: | create table t1 (a int, ta timestamp with time zone); @@ -150,19 +151,20 @@ select * from t1 right join t2 on a = b AND ta < now() - interval '1 hour' and ta >= now() - interval '2 hour'; stream_plan: |- StreamMaterialize { columns: [a, ta, b, tb, t2._row_id(hidden), t1._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, b], pk_columns: [t2._row_id, t1._row_id, b], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: t2.b = t1.a, output: [t1.a, t1.ta, t2.b, t2.tb, t2._row_id, t1._row_id] } - ├─StreamExchange { dist: HashShard(t2.b) } - │ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - └─StreamExchange { dist: HashShard(t1.a) } - └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] } - ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true } - │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard(t2.b, t2._row_id, t1._row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: t2.b = t1.a, output: [t1.a, t1.ta, t2.b, t2.tb, t2._row_id, t1._row_id] } + ├─StreamExchange { dist: HashShard(t2.b) } + │ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.a) } + └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] } + ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true } + │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } + └─StreamNow { output: [now] } - name: Temporal filter in on clause for full join's left side sql: | create table t1 (a int, ta timestamp with time zone); @@ -178,19 +180,20 @@ select * from t1 left join t2 on a = b AND tb < now() - interval '1 hour' and tb >= now() - interval '2 hour'; stream_plan: |- StreamMaterialize { columns: [a, ta, b, tb, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, a], pk_columns: [t1._row_id, t2._row_id, a], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftOuter, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.a) } - │ └─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.b) } - └─StreamDynamicFilter { predicate: (t2.tb < $expr2), output: [t2.b, t2.tb, t2._row_id] } - ├─StreamDynamicFilter { predicate: (t2.tb >= $expr1), output_watermarks: [t2.tb], output: [t2.b, t2.tb, t2._row_id], cleaned_by_watermark: true } - │ ├─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - │ └─StreamExchange { dist: Broadcast } - │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } - │ └─StreamNow { output: [now] } - └─StreamExchange { dist: Broadcast } - └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } - └─StreamNow { output: [now] } + └─StreamExchange { dist: HashShard(t1.a, t1._row_id, t2._row_id) } + └─StreamHashJoin { type: LeftOuter, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.a) } + │ └─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.b) } + └─StreamDynamicFilter { predicate: (t2.tb < $expr2), output: [t2.b, t2.tb, t2._row_id] } + ├─StreamDynamicFilter { predicate: (t2.tb >= $expr1), output_watermarks: [t2.tb], output: [t2.b, t2.tb, t2._row_id], cleaned_by_watermark: true } + │ ├─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + │ └─StreamExchange { dist: Broadcast } + │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] } + │ └─StreamNow { output: [now] } + └─StreamExchange { dist: Broadcast } + └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] } + └─StreamNow { output: [now] } - name: Temporal filter in on clause for right join's right side sql: | create table t1 (a int, ta timestamp with time zone); diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml index a1020b8d16ee5..f49a82be2dd78 100644 --- a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml @@ -6,11 +6,12 @@ select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on id1= id2 stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2, output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } - ├─StreamExchange { dist: HashShard(stream.id1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } - └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } + └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2, output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } + ├─StreamExchange { dist: HashShard(stream.id1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } + └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } batch_error: |- Not supported: do not support temporal join for batch queries HINT: please use temporal join in streaming queries @@ -21,11 +22,12 @@ select id1, a1, id2, a2 from stream join version FOR SYSTEM_TIME AS OF PROCTIME() on id1 = id2 where a2 < 10; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } - ├─StreamExchange { dist: HashShard(stream.id1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } - └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } + └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) } + └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } + ├─StreamExchange { dist: HashShard(stream.id1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } + └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } - name: implicit join with temporal tables sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -33,11 +35,12 @@ select id1, a1, id2, a2 from stream, version FOR SYSTEM_TIME AS OF PROCTIME() where id1 = id2 AND a2 < 10; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } - ├─StreamExchange { dist: HashShard(stream.id1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } - └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } + └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) } + └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } + ├─StreamExchange { dist: HashShard(stream.id1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } + └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } - name: Multi join key for temporal join sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -45,11 +48,12 @@ select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and id1 = id2 where b2 != a2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1, a1], pk_columns: [stream._row_id, id1, a1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND stream.a1 = version.a2 AND (version.b2 <> version.a2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } - ├─StreamExchange { dist: HashShard(stream.id1, stream.a1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2, version.a2) } - └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2], pk: [version.id2, version.a2], dist: UpstreamHashShard(version.id2, version.a2) } + └─StreamExchange { dist: HashShard(stream.id1, stream.a1, stream._row_id) } + └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND stream.a1 = version.a2 AND (version.b2 <> version.a2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] } + ├─StreamExchange { dist: HashShard(stream.id1, stream.a1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2, version.a2) } + └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2], pk: [version.id2, version.a2], dist: UpstreamHashShard(version.id2, version.a2) } - name: Temporal join with Aggregation sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -101,15 +105,16 @@ join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.k = version2.k where a1 < 10; stream_plan: |- StreamMaterialize { columns: [k, x1, x2, a1, b1, stream._row_id(hidden), version2.k(hidden)], stream_key: [stream._row_id, k], pk_columns: [stream._row_id, k], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: stream.k = version2.k, output: [stream.k, version1.x1, version2.x2, stream.a1, stream.b1, stream._row_id, version2.k] } - ├─StreamTemporalJoin { type: Inner, predicate: stream.k = version1.k, output: [stream.k, stream.a1, stream.b1, version1.x1, stream._row_id, version1.k] } - │ ├─StreamExchange { dist: HashShard(stream.k) } - │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) } - │ │ └─StreamTableScan { table: stream, columns: [stream.k, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.k) } - │ └─StreamTableScan { table: version1, columns: [version1.k, version1.x1], pk: [version1.k], dist: UpstreamHashShard(version1.k) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.k) } - └─StreamTableScan { table: version2, columns: [version2.k, version2.x2], pk: [version2.k], dist: UpstreamHashShard(version2.k) } + └─StreamExchange { dist: HashShard(stream.k, stream._row_id) } + └─StreamTemporalJoin { type: Inner, predicate: stream.k = version2.k, output: [stream.k, version1.x1, version2.x2, stream.a1, stream.b1, stream._row_id, version2.k] } + ├─StreamTemporalJoin { type: Inner, predicate: stream.k = version1.k, output: [stream.k, stream.a1, stream.b1, version1.x1, stream._row_id, version1.k] } + │ ├─StreamExchange { dist: HashShard(stream.k) } + │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) } + │ │ └─StreamTableScan { table: stream, columns: [stream.k, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.k) } + │ └─StreamTableScan { table: version1, columns: [version1.k, version1.x1], pk: [version1.k], dist: UpstreamHashShard(version1.k) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.k) } + └─StreamTableScan { table: version2, columns: [version2.k, version2.x2], pk: [version2.k], dist: UpstreamHashShard(version2.k) } - name: multi-way temporal join with different keys sql: | create table stream(id1 int, id2 int, a1 int, b1 int) APPEND ONLY; @@ -121,16 +126,17 @@ join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.id2 = version2.id2 where a1 < 10; stream_plan: |- StreamMaterialize { columns: [id1, x1, id2, x2, a1, b1, stream._row_id(hidden), version2.id2(hidden)], stream_key: [stream._row_id, id1, id2], pk_columns: [stream._row_id, id1, id2], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] } - ├─StreamExchange { dist: HashShard(stream.id2) } - │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] } - │ ├─StreamExchange { dist: HashShard(stream.id1) } - │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) } - │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) } - │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) } - └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) } + └─StreamExchange { dist: HashShard(stream.id1, stream.id2, stream._row_id) } + └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] } + ├─StreamExchange { dist: HashShard(stream.id2) } + │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] } + │ ├─StreamExchange { dist: HashShard(stream.id1) } + │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) } + │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) } + │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) } + └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) } - name: multi-way temporal join with different keys sql: | create table stream(id1 int, id2 int, a1 int, b1 int) APPEND ONLY; @@ -142,16 +148,17 @@ join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.id2 = version2.id2 where a1 < 10; stream_plan: |- StreamMaterialize { columns: [id1, x1, id2, x2, a1, b1, stream._row_id(hidden), version2.id2(hidden)], stream_key: [stream._row_id, id1, id2], pk_columns: [stream._row_id, id1, id2], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] } - ├─StreamExchange { dist: HashShard(stream.id2) } - │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] } - │ ├─StreamExchange { dist: HashShard(stream.id1) } - │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) } - │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) } - │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) } - └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) } + └─StreamExchange { dist: HashShard(stream.id1, stream.id2, stream._row_id) } + └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] } + ├─StreamExchange { dist: HashShard(stream.id2) } + │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] } + │ ├─StreamExchange { dist: HashShard(stream.id1) } + │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) } + │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) } + │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) } + └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) } - name: temporal join with an index (distribution key size = 1) sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -160,11 +167,12 @@ select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } - ├─StreamExchange { dist: HashShard(stream.a1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) } - └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) } + └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } + ├─StreamExchange { dist: HashShard(stream.a1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) } + └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) } - name: temporal join with an index (distribution key size = 2) sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -173,11 +181,12 @@ select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } - ├─StreamExchange { dist: HashShard(stream.a1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) } - └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) } + └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } + ├─StreamExchange { dist: HashShard(stream.a1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) } + └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) } - name: temporal join with an index (index column size = 1) sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -186,11 +195,12 @@ select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, stream.b1, a1], pk_columns: [stream._row_id, id2, stream.b1, a1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.b1 = idx2.b2 AND (stream.a1 = idx2.a2), output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } - ├─StreamExchange { dist: HashShard(stream.b1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.b2) } - └─StreamTableScan { table: idx2, columns: [idx2.b2, idx2.id2, idx2.a2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.b2) } + └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.b1 = idx2.b2 AND (stream.a1 = idx2.a2), output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } + ├─StreamExchange { dist: HashShard(stream.b1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.b2) } + └─StreamTableScan { table: idx2, columns: [idx2.b2, idx2.id2, idx2.a2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.b2) } - name: temporal join with singleton table sql: | create table t (a int) append only; @@ -212,11 +222,12 @@ select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx.a2 AND stream.b1 = idx.b2, output: [stream.id1, stream.a1, idx.id2, idx.a2, stream._row_id, stream.b1] } - ├─StreamExchange { dist: HashShard(stream.a1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx.a2) } - └─StreamTableScan { table: idx, columns: [idx.id2, idx.a2, idx.b2], pk: [idx.id2], dist: UpstreamHashShard(idx.a2) } + └─StreamExchange { dist: HashShard(stream.a1, idx.id2, stream._row_id, stream.b1) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx.a2 AND stream.b1 = idx.b2, output: [stream.id1, stream.a1, idx.id2, idx.a2, stream._row_id, stream.b1] } + ├─StreamExchange { dist: HashShard(stream.a1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx.a2) } + └─StreamTableScan { table: idx, columns: [idx.id2, idx.a2, idx.b2], pk: [idx.id2], dist: UpstreamHashShard(idx.a2) } - name: index selection for temporal join (with two indexes) and should choose the index with a longer prefix.. sql: | create table stream(id1 int, a1 int, b1 int) APPEND ONLY; @@ -226,11 +237,12 @@ select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } - ├─StreamExchange { dist: HashShard(stream.a1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) } - └─StreamTableScan { table: idx2, columns: [idx2.id2, idx2.a2, idx2.b2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) } + └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] } + ├─StreamExchange { dist: HashShard(stream.a1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) } + └─StreamTableScan { table: idx2, columns: [idx2.id2, idx2.a2, idx2.b2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) } - name: index selection for temporal join (with three indexes) and should choose primary table. sql: | create table stream(id1 int, a1 int, b1 int, c1 int) APPEND ONLY; @@ -241,11 +253,12 @@ select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2 and c1 = c2 and id1 = id2; stream_plan: |- StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden), stream.c1(hidden)], stream_key: [stream._row_id, id1, a1, stream.b1, stream.c1], pk_columns: [stream._row_id, id1, a1, stream.b1, stream.c1], pk_conflict: NoCheck } - └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2 AND (stream.a1 = version.a2) AND (stream.b1 = version.b2) AND (stream.c1 = version.c2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id, stream.b1, stream.c1] } - ├─StreamExchange { dist: HashShard(stream.id1) } - │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream.c1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } - └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } - └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2, version.c2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } + └─StreamExchange { dist: HashShard(stream.id1, stream.a1, stream._row_id, stream.b1, stream.c1) } + └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2 AND (stream.a1 = version.a2) AND (stream.b1 = version.b2) AND (stream.c1 = version.c2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id, stream.b1, stream.c1] } + ├─StreamExchange { dist: HashShard(stream.id1) } + │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream.c1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) } + └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) } + └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2, version.c2], pk: [version.id2], dist: UpstreamHashShard(version.id2) } - name: index selection for temporal join (two index) and no one matches. sql: | create table stream(id1 int, a1 int, b1 int, c1 int) APPEND ONLY; diff --git a/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml b/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml index eaaa1f8e5c8d5..fdf928a0c9c84 100644 --- a/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml +++ b/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml @@ -243,214 +243,218 @@ └─BatchSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id(hidden), _row_id#1(hidden), r_regionkey(hidden), _row_id#2(hidden), _row_id#3(hidden), _row_id#4(hidden), ps_suppkey(hidden), n_nationkey(hidden), ps_supplycost(hidden), p_partkey#1(hidden)], stream_key: [_row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, p_partkey, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_columns: [s_acctbal, n_name, s_name, p_partkey, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_conflict: NoCheck } - └─StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } - ├─StreamExchange { dist: HashShard(p_partkey) } - │ └─StreamShare { id: 26 } - │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } - │ ├─StreamExchange { dist: HashShard(n_nationkey) } - │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } - │ │ ├─StreamExchange { dist: HashShard(r_regionkey) } - │ │ │ └─StreamShare { id: 3 } - │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] } - │ │ │ └─StreamRowIdGen { row_id_index: 3 } - │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } - │ │ └─StreamExchange { dist: HashShard(n_regionkey) } - │ │ └─StreamShare { id: 7 } - │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } - │ │ └─StreamRowIdGen { row_id_index: 4 } - │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(s_nationkey) } - │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } - │ ├─StreamExchange { dist: HashShard(ps_suppkey) } - │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } - │ │ ├─StreamExchange { dist: HashShard(p_partkey) } - │ │ │ └─StreamRowIdGen { row_id_index: 9 } - │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } - │ │ └─StreamExchange { dist: HashShard(ps_partkey) } - │ │ └─StreamShare { id: 15 } - │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } - │ │ └─StreamRowIdGen { row_id_index: 5 } - │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(s_suppkey) } - │ └─StreamShare { id: 21 } - │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - │ └─StreamRowIdGen { row_id_index: 7 } - │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - └─StreamProject { exprs: [p_partkey, min(ps_supplycost)] } - └─StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } - └─StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } - ├─StreamAppendOnlyDedup { dedup_cols: [p_partkey] } - │ └─StreamExchange { dist: HashShard(p_partkey) } - │ └─StreamProject { exprs: [p_partkey] } - │ └─StreamShare { id: 26 } - │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } - │ ├─StreamExchange { dist: HashShard(n_nationkey) } - │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } - │ │ ├─StreamExchange { dist: HashShard(r_regionkey) } - │ │ │ └─StreamShare { id: 3 } - │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] } - │ │ │ └─StreamRowIdGen { row_id_index: 3 } - │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } - │ │ └─StreamExchange { dist: HashShard(n_regionkey) } - │ │ └─StreamShare { id: 7 } - │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } - │ │ └─StreamRowIdGen { row_id_index: 4 } - │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(s_nationkey) } - │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } - │ ├─StreamExchange { dist: HashShard(ps_suppkey) } - │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } - │ │ ├─StreamExchange { dist: HashShard(p_partkey) } - │ │ │ └─StreamRowIdGen { row_id_index: 9 } - │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } - │ │ └─StreamExchange { dist: HashShard(ps_partkey) } - │ │ └─StreamShare { id: 15 } - │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } - │ │ └─StreamRowIdGen { row_id_index: 5 } - │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(s_suppkey) } - │ └─StreamShare { id: 21 } - │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - │ └─StreamRowIdGen { row_id_index: 7 } - │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - └─StreamExchange { dist: HashShard(ps_partkey) } - └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] } - ├─StreamExchange { dist: HashShard(s_nationkey) } - │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] } - │ ├─StreamExchange { dist: HashShard(ps_suppkey) } - │ │ └─StreamFilter { predicate: IsNotNull(ps_partkey) } - │ │ └─StreamShare { id: 15 } - │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } - │ │ └─StreamRowIdGen { row_id_index: 5 } - │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(s_suppkey) } - │ └─StreamShare { id: 21 } - │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - │ └─StreamRowIdGen { row_id_index: 7 } - │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - └─StreamExchange { dist: HashShard(n_nationkey) } - └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] } - ├─StreamExchange { dist: HashShard(r_regionkey) } - │ └─StreamShare { id: 3 } - │ └─StreamProject { exprs: [r_regionkey, _row_id] } - │ └─StreamRowIdGen { row_id_index: 3 } - │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } - └─StreamExchange { dist: HashShard(n_regionkey) } - └─StreamShare { id: 7 } - └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } - └─StreamRowIdGen { row_id_index: 4 } - └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } + └─StreamExchange { dist: HashShard(p_partkey, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost) } + └─StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } + ├─StreamExchange { dist: HashShard(p_partkey) } + │ └─StreamShare { id: 26 } + │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } + │ ├─StreamExchange { dist: HashShard(n_nationkey) } + │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } + │ │ ├─StreamExchange { dist: HashShard(r_regionkey) } + │ │ │ └─StreamShare { id: 3 } + │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] } + │ │ │ └─StreamRowIdGen { row_id_index: 3 } + │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } + │ │ └─StreamExchange { dist: HashShard(n_regionkey) } + │ │ └─StreamShare { id: 7 } + │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } + │ │ └─StreamRowIdGen { row_id_index: 4 } + │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(s_nationkey) } + │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } + │ ├─StreamExchange { dist: HashShard(ps_suppkey) } + │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } + │ │ ├─StreamExchange { dist: HashShard(p_partkey) } + │ │ │ └─StreamRowIdGen { row_id_index: 9 } + │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } + │ │ └─StreamExchange { dist: HashShard(ps_partkey) } + │ │ └─StreamShare { id: 15 } + │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } + │ │ └─StreamRowIdGen { row_id_index: 5 } + │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(s_suppkey) } + │ └─StreamShare { id: 21 } + │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + │ └─StreamRowIdGen { row_id_index: 7 } + │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + └─StreamProject { exprs: [p_partkey, min(ps_supplycost)] } + └─StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } + └─StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } + ├─StreamAppendOnlyDedup { dedup_cols: [p_partkey] } + │ └─StreamExchange { dist: HashShard(p_partkey) } + │ └─StreamProject { exprs: [p_partkey] } + │ └─StreamShare { id: 26 } + │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } + │ ├─StreamExchange { dist: HashShard(n_nationkey) } + │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } + │ │ ├─StreamExchange { dist: HashShard(r_regionkey) } + │ │ │ └─StreamShare { id: 3 } + │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] } + │ │ │ └─StreamRowIdGen { row_id_index: 3 } + │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } + │ │ └─StreamExchange { dist: HashShard(n_regionkey) } + │ │ └─StreamShare { id: 7 } + │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } + │ │ └─StreamRowIdGen { row_id_index: 4 } + │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(s_nationkey) } + │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } + │ ├─StreamExchange { dist: HashShard(ps_suppkey) } + │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } + │ │ ├─StreamExchange { dist: HashShard(p_partkey) } + │ │ │ └─StreamRowIdGen { row_id_index: 9 } + │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } + │ │ └─StreamExchange { dist: HashShard(ps_partkey) } + │ │ └─StreamShare { id: 15 } + │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } + │ │ └─StreamRowIdGen { row_id_index: 5 } + │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(s_suppkey) } + │ └─StreamShare { id: 21 } + │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + │ └─StreamRowIdGen { row_id_index: 7 } + │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + └─StreamExchange { dist: HashShard(ps_partkey) } + └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] } + ├─StreamExchange { dist: HashShard(s_nationkey) } + │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] } + │ ├─StreamExchange { dist: HashShard(ps_suppkey) } + │ │ └─StreamFilter { predicate: IsNotNull(ps_partkey) } + │ │ └─StreamShare { id: 15 } + │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } + │ │ └─StreamRowIdGen { row_id_index: 5 } + │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(s_suppkey) } + │ └─StreamShare { id: 21 } + │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + │ └─StreamRowIdGen { row_id_index: 7 } + │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + └─StreamExchange { dist: HashShard(n_nationkey) } + └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] } + ├─StreamExchange { dist: HashShard(r_regionkey) } + │ └─StreamShare { id: 3 } + │ └─StreamProject { exprs: [r_regionkey, _row_id] } + │ └─StreamRowIdGen { row_id_index: 3 } + │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } + └─StreamExchange { dist: HashShard(n_regionkey) } + └─StreamShare { id: 7 } + └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } + └─StreamRowIdGen { row_id_index: 4 } + └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id(hidden), _row_id#1(hidden), r_regionkey(hidden), _row_id#2(hidden), _row_id#3(hidden), _row_id#4(hidden), ps_suppkey(hidden), n_nationkey(hidden), ps_supplycost(hidden), p_partkey#1(hidden)], stream_key: [_row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, p_partkey, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_columns: [s_acctbal, n_name, s_name, p_partkey, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamProject { exprs: [p_partkey, min(ps_supplycost)] } - └── StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } { intermediate state table: 26, state tables: [ 25 ], distinct tables: [] } - └── StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } { left table: 27, right table: 29, left degree table: 28, right degree table: 30 } - ├── StreamAppendOnlyDedup { dedup_cols: [p_partkey] } { state table: 31 } - │ └── StreamExchange Hash([0]) from 15 - └── StreamExchange Hash([0]) from 16 + └── StreamExchange Hash([3, 8, 9, 10, 11, 12, 13, 14, 15, 16]) from 1 Fragment 1 - StreamNoOp - └── StreamExchange NoShuffle from 2 + StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamProject { exprs: [p_partkey, min(ps_supplycost)] } + └── StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } { intermediate state table: 26, state tables: [ 25 ], distinct tables: [] } + └── StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } { left table: 27, right table: 29, left degree table: 28, right degree table: 30 } + ├── StreamAppendOnlyDedup { dedup_cols: [p_partkey] } { state table: 31 } + │ └── StreamExchange Hash([0]) from 16 + └── StreamExchange Hash([0]) from 17 Fragment 2 - StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } - ├── StreamExchange Hash([0]) from 3 - └── StreamExchange Hash([5]) from 8 + StreamNoOp + └── StreamExchange NoShuffle from 3 Fragment 3 - StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } { left table: 8, right table: 10, left degree table: 9, right degree table: 11 } + StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } ├── StreamExchange Hash([0]) from 4 - └── StreamExchange Hash([2]) from 6 + └── StreamExchange Hash([5]) from 9 Fragment 4 - StreamNoOp - └── StreamExchange NoShuffle from 5 + StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } { left table: 8, right table: 10, left degree table: 9, right degree table: 11 } + ├── StreamExchange Hash([0]) from 5 + └── StreamExchange Hash([2]) from 7 Fragment 5 + StreamNoOp + └── StreamExchange NoShuffle from 6 + + Fragment 6 StreamProject { exprs: [r_regionkey, _row_id] } └── StreamRowIdGen { row_id_index: 3 } └── StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } { source state table: 12 } - Fragment 6 + Fragment 7 StreamNoOp - └── StreamExchange NoShuffle from 7 + └── StreamExchange NoShuffle from 8 - Fragment 7 + Fragment 8 StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] } └── StreamRowIdGen { row_id_index: 4 } └── StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } { source state table: 13 } - Fragment 8 + Fragment 9 StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 } - ├── StreamExchange Hash([2]) from 9 - └── StreamExchange Hash([0]) from 13 + ├── StreamExchange Hash([2]) from 10 + └── StreamExchange Hash([0]) from 14 - Fragment 9 + Fragment 10 StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } { left table: 18, right table: 20, left degree table: 19, right degree table: 21 } - ├── StreamExchange Hash([0]) from 10 - └── StreamExchange Hash([0]) from 11 + ├── StreamExchange Hash([0]) from 11 + └── StreamExchange Hash([0]) from 12 - Fragment 10 + Fragment 11 StreamRowIdGen { row_id_index: 9 } └── StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } { source state table: 22 } - Fragment 11 + Fragment 12 StreamNoOp - └── StreamExchange NoShuffle from 12 + └── StreamExchange NoShuffle from 13 - Fragment 12 + Fragment 13 StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] } └── StreamRowIdGen { row_id_index: 5 } └── StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } { source state table: 23 } - Fragment 13 + Fragment 14 StreamNoOp - └── StreamExchange NoShuffle from 14 + └── StreamExchange NoShuffle from 15 - Fragment 14 + Fragment 15 StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } └── StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } { source state table: 24 } - Fragment 15 + Fragment 16 StreamProject { exprs: [p_partkey] } - └── StreamExchange NoShuffle from 2 + └── StreamExchange NoShuffle from 3 - Fragment 16 + Fragment 17 StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] } { left table: 32, right table: 34, left degree table: 33, right degree table: 35 } - ├── StreamExchange Hash([2]) from 17 - └── StreamExchange Hash([0]) from 20 + ├── StreamExchange Hash([2]) from 18 + └── StreamExchange Hash([0]) from 21 - Fragment 17 + Fragment 18 StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] } { left table: 36, right table: 38, left degree table: 37, right degree table: 39 } - ├── StreamExchange Hash([1]) from 18 - └── StreamExchange Hash([0]) from 19 + ├── StreamExchange Hash([1]) from 19 + └── StreamExchange Hash([0]) from 20 - Fragment 18 + Fragment 19 StreamFilter { predicate: IsNotNull(ps_partkey) } - └── StreamExchange NoShuffle from 12 + └── StreamExchange NoShuffle from 13 - Fragment 19 + Fragment 20 StreamNoOp - └── StreamExchange NoShuffle from 14 + └── StreamExchange NoShuffle from 15 - Fragment 20 + Fragment 21 StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] } { left table: 40, right table: 42, left degree table: 41, right degree table: 43 } - ├── StreamExchange Hash([0]) from 21 - └── StreamExchange Hash([2]) from 22 + ├── StreamExchange Hash([0]) from 22 + └── StreamExchange Hash([2]) from 23 - Fragment 21 + Fragment 22 StreamNoOp - └── StreamExchange NoShuffle from 5 + └── StreamExchange NoShuffle from 6 - Fragment 22 + Fragment 23 StreamNoOp - └── StreamExchange NoShuffle from 7 + └── StreamExchange NoShuffle from 8 Table 0 { columns: [ p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id_0, r_regionkey, n_nationkey, _row_id_1, _row_id_2, _row_id_3, ps_suppkey ], primary key: [ $0 ASC, $7 ASC, $9 ASC, $10 ASC, $11 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC, $12 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], distribution key: [ 0 ], read pk prefix len hint: 2 } @@ -540,7 +544,7 @@ Table 43 { columns: [ n_regionkey, _row_id, _degree ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost, p_partkey#1 ], primary key: [ $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ], distribution key: [ 3 ], read pk prefix len hint: 13 } + Table 4294967294 { columns: [ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost, p_partkey#1 ], primary key: [ $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ], distribution key: [ 3, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], read pk prefix len hint: 13 } - id: tpch_q5 before: @@ -1797,76 +1801,80 @@ └─BatchSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id], filter: (None, None) } stream_plan: |- StreamMaterialize { columns: [s_name, s_address, _row_id(hidden), _row_id#1(hidden), s_nationkey(hidden), s_suppkey(hidden)], stream_key: [_row_id, _row_id#1, s_nationkey, s_suppkey], pk_columns: [s_name, _row_id, _row_id#1, s_nationkey, s_suppkey], pk_conflict: NoCheck } - └─StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } - ├─StreamExchange { dist: HashShard(s_suppkey) } - │ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } - │ ├─StreamExchange { dist: HashShard(s_nationkey) } - │ │ └─StreamRowIdGen { row_id_index: 7 } - │ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(n_nationkey) } - │ └─StreamRowIdGen { row_id_index: 4 } - │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } - └─StreamExchange { dist: HashShard(ps_suppkey) } - └─StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] } - └─StreamFilter { predicate: ($expr1 > $expr2) } - └─StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all } - ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) } - │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] } - │ └─StreamShare { id: 13 } - │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } - │ ├─StreamExchange { dist: HashShard(ps_partkey) } - │ │ └─StreamRowIdGen { row_id_index: 5 } - │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(p_partkey) } - │ └─StreamProject { exprs: [p_partkey, _row_id] } - │ └─StreamRowIdGen { row_id_index: 9 } - │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } - └─StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] } - └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] } - └─StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] } - ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) } - │ └─StreamProject { exprs: [ps_partkey, ps_suppkey] } - │ └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] } - │ └─StreamShare { id: 13 } - │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } - │ ├─StreamExchange { dist: HashShard(ps_partkey) } - │ │ └─StreamRowIdGen { row_id_index: 5 } - │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } - │ └─StreamExchange { dist: HashShard(p_partkey) } - │ └─StreamProject { exprs: [p_partkey, _row_id] } - │ └─StreamRowIdGen { row_id_index: 9 } - │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } - └─StreamExchange { dist: HashShard(l_partkey, l_suppkey) } - └─StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] } - └─StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) } - └─StreamRowIdGen { row_id_index: 16 } - └─StreamSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id] } + └─StreamExchange { dist: HashShard(_row_id, _row_id, s_nationkey, s_suppkey) } + └─StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } + ├─StreamExchange { dist: HashShard(s_suppkey) } + │ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } + │ ├─StreamExchange { dist: HashShard(s_nationkey) } + │ │ └─StreamRowIdGen { row_id_index: 7 } + │ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(n_nationkey) } + │ └─StreamRowIdGen { row_id_index: 4 } + │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } + └─StreamExchange { dist: HashShard(ps_suppkey) } + └─StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] } + └─StreamFilter { predicate: ($expr1 > $expr2) } + └─StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all } + ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) } + │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] } + │ └─StreamShare { id: 13 } + │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } + │ ├─StreamExchange { dist: HashShard(ps_partkey) } + │ │ └─StreamRowIdGen { row_id_index: 5 } + │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(p_partkey) } + │ └─StreamProject { exprs: [p_partkey, _row_id] } + │ └─StreamRowIdGen { row_id_index: 9 } + │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } + └─StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] } + └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] } + └─StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] } + ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) } + │ └─StreamProject { exprs: [ps_partkey, ps_suppkey] } + │ └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] } + │ └─StreamShare { id: 13 } + │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } + │ ├─StreamExchange { dist: HashShard(ps_partkey) } + │ │ └─StreamRowIdGen { row_id_index: 5 } + │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } + │ └─StreamExchange { dist: HashShard(p_partkey) } + │ └─StreamProject { exprs: [p_partkey, _row_id] } + │ └─StreamRowIdGen { row_id_index: 9 } + │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } + └─StreamExchange { dist: HashShard(l_partkey, l_suppkey) } + └─StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] } + └─StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) } + └─StreamRowIdGen { row_id_index: 16 } + └─StreamSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id] } stream_dist_plan: |+ Fragment 0 StreamMaterialize { columns: [s_name, s_address, _row_id(hidden), _row_id#1(hidden), s_nationkey(hidden), s_suppkey(hidden)], stream_key: [_row_id, _row_id#1, s_nationkey, s_suppkey], pk_columns: [s_name, _row_id, _row_id#1, s_nationkey, s_suppkey], pk_conflict: NoCheck } ├── materialized table: 4294967294 - └── StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } - ├── StreamExchange Hash([0]) from 1 - └── StreamExchange Hash([0]) from 4 + └── StreamExchange Hash([2, 3, 4, 5]) from 1 Fragment 1 - StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } - ├── StreamExchange Hash([3]) from 2 - └── StreamExchange Hash([0]) from 3 + StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 } + ├── StreamExchange Hash([0]) from 2 + └── StreamExchange Hash([0]) from 5 Fragment 2 + StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 } + ├── StreamExchange Hash([3]) from 3 + └── StreamExchange Hash([0]) from 4 + + Fragment 3 StreamRowIdGen { row_id_index: 7 } └── StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } { source state table: 8 } - Fragment 3 + Fragment 4 StreamRowIdGen { row_id_index: 4 } └── StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } { source state table: 9 } - Fragment 4 + Fragment 5 StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] } └── StreamFilter { predicate: ($expr1 > $expr2) } └── StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 } - ├── StreamExchange Hash([0, 1]) from 5 + ├── StreamExchange Hash([0, 1]) from 6 └── StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] } └── StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] } { intermediate state table: 20, state tables: [], distinct tables: [] } └── StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] } @@ -1874,33 +1882,33 @@ ├── right table: 23 ├── left degree table: 22 ├── right degree table: 24 - ├── StreamExchange Hash([0, 1]) from 9 - └── StreamExchange Hash([0, 1]) from 10 + ├── StreamExchange Hash([0, 1]) from 10 + └── StreamExchange Hash([0, 1]) from 11 - Fragment 5 + Fragment 6 StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] } - └── StreamExchange NoShuffle from 6 + └── StreamExchange NoShuffle from 7 - Fragment 6 + Fragment 7 StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 } - ├── StreamExchange Hash([0]) from 7 - └── StreamExchange Hash([0]) from 8 + ├── StreamExchange Hash([0]) from 8 + └── StreamExchange Hash([0]) from 9 - Fragment 7 + Fragment 8 StreamRowIdGen { row_id_index: 5 } └── StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } { source state table: 18 } - Fragment 8 + Fragment 9 StreamProject { exprs: [p_partkey, _row_id] } └── StreamRowIdGen { row_id_index: 9 } └── StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } { source state table: 19 } - Fragment 9 + Fragment 10 StreamProject { exprs: [ps_partkey, ps_suppkey] } └── StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] } { intermediate state table: 25, state tables: [], distinct tables: [] } - └── StreamExchange NoShuffle from 6 + └── StreamExchange NoShuffle from 7 - Fragment 10 + Fragment 11 StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] } └── StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) } └── StreamRowIdGen { row_id_index: 16 } @@ -1961,7 +1969,7 @@ Table 26 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 } - Table 4294967294 { columns: [ s_name, s_address, _row_id, _row_id#1, s_nationkey, s_suppkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 5 ], read pk prefix len hint: 5 } + Table 4294967294 { columns: [ s_name, s_address, _row_id, _row_id#1, s_nationkey, s_suppkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 2, 3, 4, 5 ], read pk prefix len hint: 5 } - id: tpch_q21 before: diff --git a/src/frontend/planner_test/tests/testdata/output/watermark.yaml b/src/frontend/planner_test/tests/testdata/output/watermark.yaml index e4ef42b121528..d57d41fa76bc3 100644 --- a/src/frontend/planner_test/tests/testdata/output/watermark.yaml +++ b/src/frontend/planner_test/tests/testdata/output/watermark.yaml @@ -79,11 +79,12 @@ select t1.ts as t1_ts, t2.ts as ts2, t1.v1 as t1_v1, t1.v2 as t1_v2, t2.v1 as t2_v1, t2.v2 as t2_v2 from t1, t2 where t1.ts = t2.ts; stream_plan: |- StreamMaterialize { columns: [t1_ts, ts2, t1_v1, t1_v2, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_ts], pk_columns: [t1._row_id, t2._row_id, t1_ts], pk_conflict: NoCheck, watermark_columns: [t1_ts, ts2] } - └─StreamHashJoin [window, append_only] { type: Inner, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t2.ts, t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.ts) } - │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.ts) } - └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.ts, t1._row_id, t2._row_id) } + └─StreamHashJoin [window, append_only] { type: Inner, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t2.ts, t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.ts) } + │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.ts) } + └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: left semi window join sql: | create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only; @@ -91,11 +92,12 @@ select t1.ts as t1_ts, t1.v1 as t1_v1, t1.v2 as t1_v2 from t1 where exists (select * from t2 where t1.ts = t2.ts); stream_plan: |- StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t1._row_id(hidden)], stream_key: [t1._row_id, t1_ts], pk_columns: [t1._row_id, t1_ts], pk_conflict: NoCheck, watermark_columns: [t1_ts] } - └─StreamHashJoin [window] { type: LeftSemi, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts], output: all } - ├─StreamExchange { dist: HashShard(t1.ts) } - │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.ts) } - └─StreamTableScan { table: t2, columns: [t2.ts, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.ts, t1._row_id) } + └─StreamHashJoin [window] { type: LeftSemi, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts], output: all } + ├─StreamExchange { dist: HashShard(t1.ts) } + │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.ts) } + └─StreamTableScan { table: t2, columns: [t2.ts, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: interval join(left outer join) sql: | create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only; @@ -108,13 +110,14 @@ └─LogicalScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id] } stream_plan: |- StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t2_ts, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1], pk_columns: [t1._row_id, t2._row_id, t1_v1], pk_conflict: NoCheck, watermark_columns: [t1_ts, t2_ts] } - └─StreamHashJoin [interval] { type: LeftOuter, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] } - │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v1) } - └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] } - └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) } + └─StreamHashJoin [interval] { type: LeftOuter, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] } + │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v1) } + └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] } + └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: interval join (inner join) sql: | create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only; @@ -127,13 +130,14 @@ └─LogicalScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id] } stream_plan: |- StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t2_ts, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1], pk_columns: [t1._row_id, t2._row_id, t1_v1], pk_conflict: NoCheck, watermark_columns: [t1_ts, t2_ts] } - └─StreamHashJoin [interval, append_only] { type: Inner, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] } - ├─StreamExchange { dist: HashShard(t1.v1) } - │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] } - │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } - └─StreamExchange { dist: HashShard(t2.v1) } - └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] } - └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } + └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) } + └─StreamHashJoin [interval, append_only] { type: Inner, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] } + ├─StreamExchange { dist: HashShard(t1.v1) } + │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] } + │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) } + └─StreamExchange { dist: HashShard(t2.v1) } + └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] } + └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) } - name: union all sql: | create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only; diff --git a/src/frontend/planner_test/tests/testdata/output/window_join.yaml b/src/frontend/planner_test/tests/testdata/output/window_join.yaml index 4113a6021e866..17c5e76f6e806 100644 --- a/src/frontend/planner_test/tests/testdata/output/window_join.yaml +++ b/src/frontend/planner_test/tests/testdata/output/window_join.yaml @@ -12,15 +12,16 @@ select * from t1, t2 where ts1 = ts2 and a1 = a2; stream_plan: |- StreamMaterialize { columns: [ts1, a1, b1, ts2, a2, b2, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, ts1, a1], pk_columns: [_row_id, _row_id#1, ts1, a1], pk_conflict: NoCheck, watermark_columns: [ts1, ts2] } - └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] } - ├─StreamExchange { dist: HashShard(ts1, a1) } - │ └─StreamRowIdGen { row_id_index: 3 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] } - │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] } - └─StreamExchange { dist: HashShard(ts2, a2) } - └─StreamRowIdGen { row_id_index: 3 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] } - └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] } + └─StreamExchange { dist: HashShard(ts1, a1, _row_id, _row_id) } + └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard(ts1, a1) } + │ └─StreamRowIdGen { row_id_index: 3 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] } + │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] } + └─StreamExchange { dist: HashShard(ts2, a2) } + └─StreamRowIdGen { row_id_index: 3 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] } + └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] } - name: Window join expression reorder sql: | create source t1 (ts1 timestamp with time zone, a1 int, b1 int, watermark for ts1 as ts1 - INTERVAL '1' SECOND) with ( @@ -34,12 +35,13 @@ select * from t1, t2 where a1 = a2 and ts1 = ts2; stream_plan: |- StreamMaterialize { columns: [ts1, a1, b1, ts2, a2, b2, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, a1, ts1], pk_columns: [_row_id, _row_id#1, a1, ts1], pk_conflict: NoCheck, watermark_columns: [ts1, ts2] } - └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] } - ├─StreamExchange { dist: HashShard(ts1, a1) } - │ └─StreamRowIdGen { row_id_index: 3 } - │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] } - │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] } - └─StreamExchange { dist: HashShard(ts2, a2) } - └─StreamRowIdGen { row_id_index: 3 } - └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] } - └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] } + └─StreamExchange { dist: HashShard(ts1, a1, _row_id, _row_id) } + └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] } + ├─StreamExchange { dist: HashShard(ts1, a1) } + │ └─StreamRowIdGen { row_id_index: 3 } + │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] } + │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] } + └─StreamExchange { dist: HashShard(ts2, a2) } + └─StreamRowIdGen { row_id_index: 3 } + └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] } + └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] } diff --git a/src/frontend/src/binder/mod.rs b/src/frontend/src/binder/mod.rs index 974730cd16237..f1038f9bf5943 100644 --- a/src/frontend/src/binder/mod.rs +++ b/src/frontend/src/binder/mod.rs @@ -363,6 +363,13 @@ impl Binder { } } +/// The column name stored in [`BindContext`] for a column without an alias. +pub const UNNAMED_COLUMN: &str = "?column?"; +/// The table name stored in [`BindContext`] for a subquery without an alias. +const UNNAMED_SUBQUERY: &str = "?subquery?"; +/// The table name stored in [`BindContext`] for a column group. +const COLUMN_GROUP_PREFIX: &str = "?column_group_id?"; + #[cfg(test)] pub mod test_utils { use risingwave_common::types::DataType; @@ -380,10 +387,3 @@ pub mod test_utils { Binder::new_with_param_types(&SessionImpl::mock(), param_types) } } - -/// The column name stored in [`BindContext`] for a column without an alias. -pub const UNNAMED_COLUMN: &str = "?column?"; -/// The table name stored in [`BindContext`] for a subquery without an alias. -const UNNAMED_SUBQUERY: &str = "?subquery?"; -/// The table name stored in [`BindContext`] for a column group. -const COLUMN_GROUP_PREFIX: &str = "?column_group_id?"; diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs index 32279dd4e70eb..ddb1d697b856d 100644 --- a/src/frontend/src/handler/create_sink.rs +++ b/src/frontend/src/handler/create_sink.rs @@ -244,6 +244,7 @@ fn bind_sink_format_desc(value: SinkSchema) -> Result { E::Json => SinkEncode::Json, E::Protobuf => SinkEncode::Protobuf, E::Avro => SinkEncode::Avro, + E::Template => SinkEncode::Template, e @ (E::Native | E::Csv | E::Bytes) => { return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into()) } @@ -262,6 +263,7 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock vec![Encode::Json], Format::Debezium => vec![Encode::Json], ), + RedisSink::SINK_NAME => hashmap!( + Format::Plain => vec![Encode::Json,Encode::Template], + Format::Upsert => vec![Encode::Json,Encode::Template], + ), )) }); pub fn validate_compatibility(connector: &str, format_desc: &SinkSchema) -> Result<()> { diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs index 149f39bead330..174ed23e03ec5 100644 --- a/src/frontend/src/handler/mod.rs +++ b/src/frontend/src/handler/mod.rs @@ -73,6 +73,7 @@ mod show; mod transaction; pub mod util; pub mod variable; +mod wait; /// The [`PgResponseBuilder`] used by RisingWave. pub type RwPgResponseBuilder = PgResponseBuilder; @@ -419,6 +420,7 @@ pub async fn handle( } } Statement::Flush => flush::handle_flush(handler_args).await, + Statement::Wait => wait::handle_wait(handler_args).await, Statement::SetVariable { local: _, variable, diff --git a/src/frontend/src/handler/wait.rs b/src/frontend/src/handler/wait.rs new file mode 100644 index 0000000000000..83f2784ec8c17 --- /dev/null +++ b/src/frontend/src/handler/wait.rs @@ -0,0 +1,31 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use pgwire::pg_response::{PgResponse, StatementType}; +use risingwave_common::error::Result; + +use super::RwPgResponse; +use crate::handler::HandlerArgs; +use crate::session::SessionImpl; + +pub(super) async fn handle_wait(handler_args: HandlerArgs) -> Result { + do_wait(&handler_args.session).await?; + Ok(PgResponse::empty_result(StatementType::WAIT)) +} + +pub(crate) async fn do_wait(session: &SessionImpl) -> Result<()> { + let client = session.env().meta_client(); + client.wait().await?; + Ok(()) +} diff --git a/src/frontend/src/lib.rs b/src/frontend/src/lib.rs index 0a036b8e96233..450f49b6394cf 100644 --- a/src/frontend/src/lib.rs +++ b/src/frontend/src/lib.rs @@ -15,7 +15,7 @@ #![allow(clippy::derive_partial_eq_without_eq)] #![feature(map_try_insert)] #![feature(negative_impls)] -#![feature(generators)] +#![feature(coroutines)] #![feature(proc_macro_hygiene, stmt_expr_attributes)] #![feature(trait_alias)] #![feature(extract_if)] @@ -32,7 +32,6 @@ #![feature(extend_one)] #![feature(type_alias_impl_trait)] #![feature(impl_trait_in_assoc_type)] -#![feature(async_fn_in_trait)] #![feature(result_flattening)] #![recursion_limit = "256"] diff --git a/src/frontend/src/meta_client.rs b/src/frontend/src/meta_client.rs index ae90c2e345f9f..d37c5dec127f1 100644 --- a/src/frontend/src/meta_client.rs +++ b/src/frontend/src/meta_client.rs @@ -43,6 +43,8 @@ pub trait FrontendMetaClient: Send + Sync { async fn flush(&self, checkpoint: bool) -> Result; + async fn wait(&self) -> Result<()>; + async fn cancel_creating_jobs(&self, jobs: PbJobs) -> Result>; async fn list_table_fragments( @@ -111,6 +113,10 @@ impl FrontendMetaClient for FrontendMetaClientImpl { self.0.flush(checkpoint).await } + async fn wait(&self) -> Result<()> { + self.0.wait().await + } + async fn cancel_creating_jobs(&self, infos: PbJobs) -> Result> { self.0.cancel_creating_jobs(infos).await } diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs index d8972436d5c78..9c87f1a34abbd 100644 --- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs +++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs @@ -151,7 +151,22 @@ impl StreamMaterialize { TableType::MaterializedView => { assert_matches!(user_distributed_by, RequiredDist::Any); // ensure the same pk will not shuffle to different node - RequiredDist::shard_by_key(input.schema().len(), input.expect_stream_key()) + let required_dist = + RequiredDist::shard_by_key(input.schema().len(), input.expect_stream_key()); + + // If the input is a stream join, enforce the stream key as the materialized + // view distribution key to avoid slow backfilling caused by + // data skew of the dimension table join key. + // See for more information. + let is_stream_join = matches!(input.as_stream_hash_join(), Some(_join)) + || matches!(input.as_stream_temporal_join(), Some(_join)) + || matches!(input.as_stream_delta_join(), Some(_join)); + + if is_stream_join { + return Ok(required_dist.enforce(input, &Order::any())); + } + + required_dist } TableType::Index => { assert_matches!( diff --git a/src/frontend/src/optimizer/property/distribution.rs b/src/frontend/src/optimizer/property/distribution.rs index b6e7715dd155f..2df1d7ae00bc3 100644 --- a/src/frontend/src/optimizer/property/distribution.rs +++ b/src/frontend/src/optimizer/property/distribution.rs @@ -331,7 +331,7 @@ impl RequiredDist { } } - fn enforce(&self, plan: PlanRef, required_order: &Order) -> PlanRef { + pub fn enforce(&self, plan: PlanRef, required_order: &Order) -> PlanRef { let dist = self.to_dist(); match plan.convention() { Convention::Batch => BatchExchange::new(plan, required_order.clone(), dist).into(), diff --git a/src/frontend/src/test_utils.rs b/src/frontend/src/test_utils.rs index 035ec872a6c6d..0f3df3074f4a5 100644 --- a/src/frontend/src/test_utils.rs +++ b/src/frontend/src/test_utils.rs @@ -750,6 +750,10 @@ impl FrontendMetaClient for MockFrontendMetaClient { }) } + async fn wait(&self) -> RpcResult<()> { + Ok(()) + } + async fn cancel_creating_jobs(&self, _infos: PbJobs) -> RpcResult> { Ok(vec![]) } diff --git a/src/jni_core/Cargo.toml b/src/jni_core/Cargo.toml index 69c11a7f21e24..77cafd155000d 100644 --- a/src/jni_core/Cargo.toml +++ b/src/jni_core/Cargo.toml @@ -10,6 +10,7 @@ ignored = ["workspace-hack"] normal = ["workspace-hack"] [dependencies] +anyhow = "1" bytes = "1" cfg-or-panic = "0.2" futures = { version = "0.3", default-features = false, features = ["alloc"] } diff --git a/src/jni_core/src/lib.rs b/src/jni_core/src/lib.rs index 29bbf76929b45..4815cd7368370 100644 --- a/src/jni_core/src/lib.rs +++ b/src/jni_core/src/lib.rs @@ -902,14 +902,17 @@ pub extern "system" fn Java_com_risingwave_java_binding_Binding_sendSinkWriterRe 'a, >( env: EnvParam<'a>, - channel: Pointer<'a, Sender>, + channel: Pointer<'a, Sender>>, msg: JByteArray<'a>, ) -> jboolean { execute_and_catch(env, move |env| { let sink_writer_stream_response: SinkWriterStreamResponse = Message::decode(to_guarded_slice(&msg, env)?.deref())?; - match channel.as_ref().blocking_send(sink_writer_stream_response) { + match channel + .as_ref() + .blocking_send(Ok(sink_writer_stream_response)) + { Ok(_) => Ok(JNI_TRUE), Err(e) => { tracing::info!("send error. {:?}", e); diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs index 55c7b27b0c80a..bf1bddad2070f 100644 --- a/src/meta/node/src/lib.rs +++ b/src/meta/node/src/lib.rs @@ -14,7 +14,7 @@ #![feature(lint_reasons)] #![feature(let_chains)] -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] mod server; use std::time::Duration; diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs index 935d398aeacb0..061ff93589163 100644 --- a/src/meta/service/src/ddl_service.rs +++ b/src/meta/service/src/ddl_service.rs @@ -717,7 +717,7 @@ impl DdlService for DdlServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn get_tables( &self, request: Request, @@ -732,6 +732,11 @@ impl DdlService for DdlServiceImpl { } Ok(Response::new(GetTablesResponse { tables })) } + + async fn wait(&self, _request: Request) -> Result, Status> { + self.ddl_controller.wait().await; + Ok(Response::new(WaitResponse {})) + } } impl DdlServiceImpl { diff --git a/src/meta/service/src/heartbeat_service.rs b/src/meta/service/src/heartbeat_service.rs index 7c51b39346894..e31058ff2bdc5 100644 --- a/src/meta/service/src/heartbeat_service.rs +++ b/src/meta/service/src/heartbeat_service.rs @@ -32,7 +32,7 @@ impl HeartbeatServiceImpl { #[async_trait::async_trait] impl HeartbeatService for HeartbeatServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn heartbeat( &self, request: Request, diff --git a/src/meta/service/src/lib.rs b/src/meta/service/src/lib.rs index 0d473a6ed031f..6c8cc11f8971c 100644 --- a/src/meta/service/src/lib.rs +++ b/src/meta/service/src/lib.rs @@ -16,7 +16,7 @@ #![feature(let_chains)] #![feature(lazy_cell)] #![feature(impl_trait_in_assoc_type)] -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] use risingwave_meta::*; diff --git a/src/meta/service/src/meta_member_service.rs b/src/meta/service/src/meta_member_service.rs index 25c4c7ad4cc84..5753061176e8c 100644 --- a/src/meta/service/src/meta_member_service.rs +++ b/src/meta/service/src/meta_member_service.rs @@ -36,7 +36,7 @@ impl MetaMemberServiceImpl { #[async_trait::async_trait] impl MetaMemberService for MetaMemberServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn members( &self, _request: Request, diff --git a/src/meta/service/src/notification_service.rs b/src/meta/service/src/notification_service.rs index bd247c1e18980..0fcc470a70e39 100644 --- a/src/meta/service/src/notification_service.rs +++ b/src/meta/service/src/notification_service.rs @@ -207,7 +207,7 @@ impl NotificationServiceImpl { impl NotificationService for NotificationServiceImpl { type SubscribeStream = UnboundedReceiverStream; - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn subscribe( &self, request: Request, diff --git a/src/meta/service/src/scale_service.rs b/src/meta/service/src/scale_service.rs index f231ea5f4955d..676180adc7581 100644 --- a/src/meta/service/src/scale_service.rs +++ b/src/meta/service/src/scale_service.rs @@ -59,7 +59,7 @@ impl ScaleServiceImpl { #[async_trait::async_trait] impl ScaleService for ScaleServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn get_cluster_info( &self, _: Request, @@ -110,7 +110,7 @@ impl ScaleService for ScaleServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn reschedule( &self, request: Request, @@ -174,7 +174,7 @@ impl ScaleService for ScaleServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn get_reschedule_plan( &self, request: Request, diff --git a/src/meta/service/src/stream_service.rs b/src/meta/service/src/stream_service.rs index ef232d9b04ffd..92af1d4beb707 100644 --- a/src/meta/service/src/stream_service.rs +++ b/src/meta/service/src/stream_service.rs @@ -59,7 +59,7 @@ impl StreamServiceImpl { #[async_trait::async_trait] impl StreamManagerService for StreamServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn flush(&self, request: Request) -> TonicResponse { self.env.idle_manager().record_activity(); let req = request.into_inner(); @@ -71,7 +71,7 @@ impl StreamManagerService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn pause(&self, _: Request) -> Result, Status> { let i = self .barrier_scheduler @@ -83,7 +83,7 @@ impl StreamManagerService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn resume(&self, _: Request) -> Result, Status> { let i = self .barrier_scheduler @@ -122,7 +122,7 @@ impl StreamManagerService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn list_table_fragments( &self, request: Request, @@ -165,7 +165,7 @@ impl StreamManagerService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn list_table_fragment_states( &self, _request: Request, @@ -186,7 +186,7 @@ impl StreamManagerService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn list_fragment_distribution( &self, _request: Request, @@ -215,7 +215,7 @@ impl StreamManagerService for StreamServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn list_actor_states( &self, _request: Request, diff --git a/src/meta/service/src/user_service.rs b/src/meta/service/src/user_service.rs index 8c982521b112a..cb290766e6fd1 100644 --- a/src/meta/service/src/user_service.rs +++ b/src/meta/service/src/user_service.rs @@ -107,7 +107,7 @@ impl UserServiceImpl { #[async_trait::async_trait] impl UserService for UserServiceImpl { - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn create_user( &self, request: Request, @@ -128,7 +128,7 @@ impl UserService for UserServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn drop_user( &self, request: Request, @@ -142,7 +142,7 @@ impl UserService for UserServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn update_user( &self, request: Request, @@ -165,7 +165,7 @@ impl UserService for UserServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn grant_privilege( &self, request: Request, @@ -185,7 +185,7 @@ impl UserService for UserServiceImpl { })) } - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] async fn revoke_privilege( &self, request: Request, diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs index 2b0c3e3db87dc..1b3a284e9ccc9 100644 --- a/src/meta/src/hummock/manager/mod.rs +++ b/src/meta/src/hummock/manager/mod.rs @@ -1761,7 +1761,7 @@ impl HummockManager { } /// Get version deltas from meta store - #[cfg_attr(coverage, no_coverage)] + #[cfg_attr(coverage, coverage(off))] #[named] pub async fn list_version_deltas( &self, diff --git a/src/meta/src/lib.rs b/src/meta/src/lib.rs index afe66d27ad8e8..f549578f079c6 100644 --- a/src/meta/src/lib.rs +++ b/src/meta/src/lib.rs @@ -26,13 +26,12 @@ #![feature(error_generic_member_access)] #![feature(assert_matches)] #![feature(try_blocks)] -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] #![feature(custom_test_frameworks)] #![test_runner(risingwave_test_runner::test_runner::run_failpont_tests)] #![feature(is_sorted)] #![feature(impl_trait_in_assoc_type)] #![feature(type_name_of_val)] -#![feature(async_fn_in_trait)] pub mod backup_restore; pub mod barrier; diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs index 04b9729c5a5b8..36615bd93b757 100644 --- a/src/meta/src/rpc/ddl_controller.rs +++ b/src/meta/src/rpc/ddl_controller.rs @@ -15,6 +15,7 @@ use std::cmp::Ordering; use std::num::NonZeroUsize; use std::sync::Arc; +use std::time::Duration; use itertools::Itertools; use risingwave_common::config::DefaultParallelism; @@ -29,6 +30,7 @@ use risingwave_pb::ddl_service::alter_relation_name_request::Relation; use risingwave_pb::ddl_service::DdlProgress; use risingwave_pb::stream_plan::StreamFragmentGraph as StreamFragmentGraphProto; use tokio::sync::Semaphore; +use tokio::time::sleep; use tracing::log::warn; use tracing::Instrument; @@ -1094,4 +1096,18 @@ impl DdlController { } } } + + pub async fn wait(&self) { + for _ in 0..30 * 60 { + if self + .catalog_manager + .list_creating_background_mvs() + .await + .is_empty() + { + break; + } + sleep(Duration::from_secs(1)).await; + } + } } diff --git a/src/prost/helpers/src/lib.rs b/src/prost/helpers/src/lib.rs index f4d1d1a45baa1..5796e14273fe9 100644 --- a/src/prost/helpers/src/lib.rs +++ b/src/prost/helpers/src/lib.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] #![feature(iterator_try_collect)] use proc_macro::TokenStream; @@ -24,7 +24,7 @@ mod generate; /// This attribute will be placed before any pb types, including messages and enums. /// See `prost/helpers/README.md` for more details. -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] #[proc_macro_derive(AnyPB)] pub fn any_pb(input: TokenStream) -> TokenStream { // Parse the string representation @@ -37,7 +37,7 @@ pub fn any_pb(input: TokenStream) -> TokenStream { } // Procedure macros can not be tested from the same crate. -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] fn produce(ast: &DeriveInput) -> Result { let name = &ast.ident; diff --git a/src/rpc_client/src/lib.rs b/src/rpc_client/src/lib.rs index 3e744bb61608d..6afa67ef88efe 100644 --- a/src/rpc_client/src/lib.rs +++ b/src/rpc_client/src/lib.rs @@ -19,7 +19,7 @@ #![feature(result_option_inspect)] #![feature(type_alias_impl_trait)] #![feature(associated_type_defaults)] -#![feature(generators)] +#![feature(coroutines)] #![feature(iterator_try_collect)] #![feature(hash_extract_if)] #![feature(try_blocks)] @@ -30,12 +30,11 @@ use std::any::type_name; use std::fmt::{Debug, Formatter}; use std::future::Future; use std::iter::repeat; -use std::pin::pin; use std::sync::Arc; use anyhow::anyhow; use async_trait::async_trait; -use futures::future::{select, try_join_all, Either}; +use futures::future::try_join_all; use futures::stream::{BoxStream, Peekable}; use futures::{Stream, StreamExt}; use moka::future::Cache; @@ -58,13 +57,12 @@ mod sink_coordinate_client; mod stream_client; mod tracing; -use std::pin::Pin; - pub use compactor_client::{CompactorClient, GrpcCompactorProxyClient}; pub use compute_client::{ComputeClient, ComputeClientPool, ComputeClientPoolRef}; pub use connector_client::{ConnectorClient, SinkCoordinatorStreamHandle, SinkWriterStreamHandle}; pub use hummock_meta_client::{CompactionEventItem, HummockMetaClient}; pub use meta_client::{MetaClient, SinkCoordinationRpcClient}; +use risingwave_common::util::await_future_with_monitor_error_stream; pub use sink_coordinate_client::CoordinatorStreamHandle; pub use stream_client::{StreamClient, StreamClientPool, StreamClientPoolRef}; @@ -240,25 +238,16 @@ impl BidiStreamHandle { } pub async fn send_request(&mut self, request: REQ) -> Result<()> { - // Poll the response stream to early see the error - let send_request_result = match select( - pin!(self.request_sender.send(request)), - pin!(Pin::new(&mut self.response_stream).peek()), + match await_future_with_monitor_error_stream( + &mut self.response_stream, + self.request_sender.send(request), ) .await { - Either::Left((result, _)) => result, - Either::Right((response_result, send_future)) => match response_result { - None => { - return Err(anyhow!("end of response stream").into()); - } - Some(Err(e)) => { - return Err(e.clone().into()); - } - Some(Ok(_)) => send_future.await, - }, - }; - send_request_result - .map_err(|_| anyhow!("unable to send request {}", type_name::()).into()) + Ok(send_result) => send_result + .map_err(|_| anyhow!("unable to send request {}", type_name::()).into()), + Err(None) => Err(anyhow!("end of response stream").into()), + Err(Some(e)) => Err(e.into()), + } } } diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs index 827860d1af7b3..95b746ea33e6c 100644 --- a/src/rpc_client/src/meta_client.rs +++ b/src/rpc_client/src/meta_client.rs @@ -698,6 +698,12 @@ impl MetaClient { Ok(resp.snapshot.unwrap()) } + pub async fn wait(&self) -> Result<()> { + let request = WaitRequest {}; + self.inner.wait(request).await?; + Ok(()) + } + pub async fn cancel_creating_jobs(&self, jobs: PbJobs) -> Result> { let request = CancelCreatingJobsRequest { jobs: Some(jobs) }; let resp = self.inner.cancel_creating_jobs(request).await?; @@ -1719,6 +1725,7 @@ macro_rules! for_all_meta_rpc { ,{ ddl_client, list_connections, ListConnectionsRequest, ListConnectionsResponse } ,{ ddl_client, drop_connection, DropConnectionRequest, DropConnectionResponse } ,{ ddl_client, get_tables, GetTablesRequest, GetTablesResponse } + ,{ ddl_client, wait, WaitRequest, WaitResponse } ,{ hummock_client, unpin_version_before, UnpinVersionBeforeRequest, UnpinVersionBeforeResponse } ,{ hummock_client, get_current_version, GetCurrentVersionRequest, GetCurrentVersionResponse } ,{ hummock_client, replay_version_delta, ReplayVersionDeltaRequest, ReplayVersionDeltaResponse } diff --git a/src/source/src/lib.rs b/src/source/src/lib.rs index 1a32888cdf651..aaa045c607c95 100644 --- a/src/source/src/lib.rs +++ b/src/source/src/lib.rs @@ -16,7 +16,7 @@ #![feature(trait_alias)] #![feature(lint_reasons)] #![feature(result_option_inspect)] -#![feature(generators)] +#![feature(coroutines)] #![feature(hash_extract_if)] #![feature(type_alias_impl_trait)] #![feature(box_patterns)] diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index ecae5a9663a88..5d802bae99cdc 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -1294,6 +1294,9 @@ pub enum Statement { /// /// Note: RisingWave specific statement. Flush, + /// WAIT for ALL running stream jobs to finish. + /// It will block the current session the condition is met. + Wait, } impl fmt::Display for Statement { @@ -1787,6 +1790,9 @@ impl fmt::Display for Statement { Statement::Flush => { write!(f, "FLUSH") } + Statement::Wait => { + write!(f, "WAIT") + } Statement::Begin { modes } => { write!(f, "BEGIN")?; if !modes.is_empty() { diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs index 76de970a919a9..58fb2d50c6287 100644 --- a/src/sqlparser/src/ast/statement.rs +++ b/src/sqlparser/src/ast/statement.rs @@ -294,6 +294,7 @@ pub enum Encode { Json, // Keyword::JSON Bytes, // Keyword::BYTES Native, + Template, } // TODO: unify with `from_keyword` @@ -309,6 +310,7 @@ impl fmt::Display for Encode { Encode::Json => "JSON", Encode::Bytes => "BYTES", Encode::Native => "NATIVE", + Encode::Template => "TEMPLATE", } ) } @@ -322,13 +324,12 @@ impl Encode { "CSV" => Encode::Csv, "PROTOBUF" => Encode::Protobuf, "JSON" => Encode::Json, + "TEMPLATE" => Encode::Template, "NATIVE" => Encode::Native, // used internally for schema change - _ => { - return Err(ParserError::ParserError( - "expected AVRO | BYTES | CSV | PROTOBUF | JSON | NATIVE after Encode" - .to_string(), - )) - } + _ => return Err(ParserError::ParserError( + "expected AVRO | BYTES | CSV | PROTOBUF | JSON | NATIVE | TEMPLATE after Encode" + .to_string(), + )), }) } } diff --git a/src/sqlparser/src/keywords.rs b/src/sqlparser/src/keywords.rs index 5c2fedb0ea547..4188f06f76ae3 100644 --- a/src/sqlparser/src/keywords.rs +++ b/src/sqlparser/src/keywords.rs @@ -540,6 +540,7 @@ define_keywords!( VIEWS, VIRTUAL, VOLATILE, + WAIT, WATERMARK, WHEN, WHENEVER, diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index ee054f7d17031..5cc094a204268 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -259,6 +259,7 @@ impl Parser { Keyword::PREPARE => Ok(self.parse_prepare()?), Keyword::COMMENT => Ok(self.parse_comment()?), Keyword::FLUSH => Ok(Statement::Flush), + Keyword::WAIT => Ok(Statement::Wait), _ => self.expected( "an SQL statement", Token::Word(w).with_location(token.location), diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml index c6fc5531acd33..fc01eba294564 100644 --- a/src/storage/Cargo.toml +++ b/src/storage/Cargo.toml @@ -25,7 +25,7 @@ dyn-clone = "1.0.14" either = "1" enum-as-inner = "0.6" fail = "0.5" -foyer = { git = "https://github.com/mrcroxx/foyer", rev = "5d0134b" } +foyer = { git = "https://github.com/MrCroxx/foyer", rev = "2261151" } futures = { version = "0.3", default-features = false, features = ["alloc"] } futures-async-stream = { workspace = true } hex = "0.4" diff --git a/src/storage/backup/src/lib.rs b/src/storage/backup/src/lib.rs index 3e0549db188a2..1daacbf691c0d 100644 --- a/src/storage/backup/src/lib.rs +++ b/src/storage/backup/src/lib.rs @@ -25,7 +25,7 @@ #![feature(lazy_cell)] #![feature(let_chains)] #![feature(error_generic_member_access)] -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] pub mod error; pub mod meta_snapshot; diff --git a/src/storage/hummock_test/Cargo.toml b/src/storage/hummock_test/Cargo.toml index 600a5249ddf1b..8abf2f45e6855 100644 --- a/src/storage/hummock_test/Cargo.toml +++ b/src/storage/hummock_test/Cargo.toml @@ -20,7 +20,7 @@ bytes = { version = "1" } clap = { version = "4", features = ["derive"] } fail = "0.5" futures = { version = "0.3", default-features = false, features = ["alloc"] } -futures-async-stream = "0.2" +futures-async-stream = "0.2.9" itertools = "0.11" parking_lot = "0.12" rand = "0.8" @@ -47,7 +47,7 @@ futures = { version = "0.3", default-features = false, features = [ "executor", ] } -futures-async-stream = "0.2" +futures-async-stream = "0.2.9" risingwave_test_runner = { workspace = true } serial_test = "2.0" sync-point = { path = "../../utils/sync-point" } diff --git a/src/storage/hummock_test/src/bin/replay/main.rs b/src/storage/hummock_test/src/bin/replay/main.rs index 1e9c9591bc864..ae6038d8b5d16 100644 --- a/src/storage/hummock_test/src/bin/replay/main.rs +++ b/src/storage/hummock_test/src/bin/replay/main.rs @@ -13,7 +13,7 @@ // limitations under the License. #![feature(bound_map)] -#![feature(generators)] +#![feature(coroutines)] #![feature(stmt_expr_attributes)] #![feature(proc_macro_hygiene)] diff --git a/src/storage/hummock_test/src/lib.rs b/src/storage/hummock_test/src/lib.rs index 73e1d8cd0eaad..593771435f1e0 100644 --- a/src/storage/hummock_test/src/lib.rs +++ b/src/storage/hummock_test/src/lib.rs @@ -17,7 +17,6 @@ #![feature(bound_map)] #![feature(type_alias_impl_trait)] #![feature(associated_type_bounds)] -#![feature(return_position_impl_trait_in_trait)] #[cfg(test)] mod compactor_tests; diff --git a/src/storage/hummock_trace/Cargo.toml b/src/storage/hummock_trace/Cargo.toml index 46eabf17835e4..150b35b79cda0 100644 --- a/src/storage/hummock_trace/Cargo.toml +++ b/src/storage/hummock_trace/Cargo.toml @@ -14,7 +14,7 @@ bincode = { version = "=2.0.0-rc.3", features = ["serde"] } byteorder = "1" bytes = { version = "1", features = ["serde"] } futures = { version = "0.3", default-features = false, features = ["alloc"] } -futures-async-stream = "0.2" +futures-async-stream = "0.2.9" parking_lot = "0.12" prost = { workspace = true } risingwave_common = { workspace = true } diff --git a/src/storage/hummock_trace/src/lib.rs b/src/storage/hummock_trace/src/lib.rs index df757c58cc4fa..8c6c8913205ab 100644 --- a/src/storage/hummock_trace/src/lib.rs +++ b/src/storage/hummock_trace/src/lib.rs @@ -16,7 +16,7 @@ #![feature(cursor_remaining)] #![feature(bound_map)] #![feature(trait_alias)] -#![feature(generators)] +#![feature(coroutines)] mod collector; mod error; diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs index 583bab3d10b3c..a21016014d247 100644 --- a/src/storage/src/hummock/compactor/compactor_runner.rs +++ b/src/storage/src/hummock/compactor/compactor_runner.rs @@ -140,9 +140,6 @@ impl CompactorRunner { Ok((self.split_index, ssts, compaction_stat)) } - // This is a clippy bug, see https://github.com/rust-lang/rust-clippy/issues/11380. - // TODO: remove `allow` here after the issued is closed. - #[expect(clippy::needless_pass_by_ref_mut)] pub async fn build_delete_range_iter( sstable_infos: &Vec, sstable_store: &SstableStoreRef, diff --git a/src/storage/src/hummock/compactor/mod.rs b/src/storage/src/hummock/compactor/mod.rs index d2f36167675e7..137682d6f7825 100644 --- a/src/storage/src/hummock/compactor/mod.rs +++ b/src/storage/src/hummock/compactor/mod.rs @@ -325,7 +325,7 @@ impl Compactor { /// The background compaction thread that receives compaction tasks from hummock compaction /// manager and runs compaction tasks. -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] pub fn start_compactor( compactor_context: CompactorContext, hummock_meta_client: Arc, @@ -618,7 +618,7 @@ pub fn start_compactor( /// The background compaction thread that receives compaction tasks from hummock compaction /// manager and runs compaction tasks. -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] pub fn start_shared_compactor( grpc_proxy_client: GrpcCompactorProxyClient, mut receiver: mpsc::UnboundedReceiver>, diff --git a/src/storage/src/hummock/event_handler/uploader.rs b/src/storage/src/hummock/event_handler/uploader.rs index 995a9d181e2f5..a07da55fb7046 100644 --- a/src/storage/src/hummock/event_handler/uploader.rs +++ b/src/storage/src/hummock/event_handler/uploader.rs @@ -1641,9 +1641,6 @@ mod tests { (buffer_tracker, uploader, new_task_notifier) } - // This is a clippy bug, see https://github.com/rust-lang/rust-clippy/issues/11380. - // TODO: remove `allow` here after the issued is closed. - #[expect(clippy::needless_pass_by_ref_mut)] async fn assert_uploader_pending(uploader: &mut HummockUploader) { for _ in 0..10 { yield_now().await; diff --git a/src/storage/src/lib.rs b/src/storage/src/lib.rs index c5ffe656ab893..0f2f155f6a903 100644 --- a/src/storage/src/lib.rs +++ b/src/storage/src/lib.rs @@ -18,7 +18,7 @@ #![feature(bound_map)] #![feature(custom_test_frameworks)] #![feature(extract_if)] -#![feature(generators)] +#![feature(coroutines)] #![feature(hash_extract_if)] #![feature(lint_reasons)] #![feature(proc_macro_hygiene)] @@ -35,15 +35,13 @@ #![feature(btree_extract_if)] #![feature(exact_size_is_empty)] #![feature(lazy_cell)] -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] #![recursion_limit = "256"] #![feature(error_generic_member_access)] #![feature(let_chains)] #![feature(associated_type_bounds)] #![feature(exclusive_range_pattern)] #![feature(impl_trait_in_assoc_type)] -#![feature(async_fn_in_trait)] -#![feature(return_position_impl_trait_in_trait)] pub mod hummock; pub mod memory; diff --git a/src/stream/src/common/table/state_table_cache.rs b/src/stream/src/common/table/state_table_cache.rs index 156637a41a1a4..b458ef52537e4 100644 --- a/src/stream/src/common/table/state_table_cache.rs +++ b/src/stream/src/common/table/state_table_cache.rs @@ -67,9 +67,9 @@ type WatermarkCacheKey = DefaultOrdered; /// Issue delete ranges. /// /// B. Refreshing the cache: -/// On barrier, do table scan from most_recently_cleaned_watermark (inclusive) to +inf. +/// On barrier, do table scan from `most_recently_cleaned_watermark` (inclusive) to +inf. /// Take the Top N rows and insert into cache. -/// This has to be implemented in state_table. +/// This has to be implemented in `state_table`. /// We do not need to store any values, just the keys. /// /// TODO(kwannoel): diff --git a/src/stream/src/executor/backfill/utils.rs b/src/stream/src/executor/backfill/utils.rs index 259b67d5f202b..663f9be94cf5e 100644 --- a/src/stream/src/executor/backfill/utils.rs +++ b/src/stream/src/executor/backfill/utils.rs @@ -309,9 +309,6 @@ pub(crate) async fn get_progress_per_vnode( table: &mut StateTableInner, epoch: EpochPair, diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs index 4178012cb9d9e..75414fe24a379 100644 --- a/src/stream/src/executor/hash_join.rs +++ b/src/stream/src/executor/hash_join.rs @@ -202,11 +202,11 @@ impl std::fmt::Debug for JoinSide { impl JoinSide { // WARNING: Please do not call this until we implement it. - #[expect(dead_code)] fn is_dirty(&self) -> bool { unimplemented!() } + #[expect(dead_code)] fn clear_cache(&mut self) { assert!( !self.is_dirty(), diff --git a/src/stream/src/lib.rs b/src/stream/src/lib.rs index 5a68b1b712b26..389dfae7b8c0c 100644 --- a/src/stream/src/lib.rs +++ b/src/stream/src/lib.rs @@ -21,8 +21,8 @@ #![feature(let_chains)] #![feature(hash_extract_if)] #![feature(extract_if)] -#![feature(generators)] -#![feature(iter_from_generator)] +#![feature(coroutines)] +#![feature(iter_from_coroutine)] #![feature(proc_macro_hygiene)] #![feature(stmt_expr_attributes)] #![feature(allocator_api)] @@ -36,13 +36,11 @@ #![feature(bound_map)] #![feature(iter_order_by)] #![feature(exact_size_is_empty)] -#![feature(return_position_impl_trait_in_trait)] #![feature(impl_trait_in_assoc_type)] #![feature(test)] #![feature(is_sorted)] #![feature(btree_cursors)] #![feature(assert_matches)] -#![feature(async_fn_in_trait)] #[macro_use] extern crate tracing; diff --git a/src/stream/tests/integration_tests/hash_agg.rs b/src/stream/tests/integration_tests/hash_agg.rs index 1b61bc5cd1d7f..9f4908f252532 100644 --- a/src/stream/tests/integration_tests/hash_agg.rs +++ b/src/stream/tests/integration_tests/hash_agg.rs @@ -284,7 +284,7 @@ async fn test_hash_agg_emit_on_window_close() { }; check_with_script( - || create_executor(), + create_executor, &format!( r###" - !barrier 1 diff --git a/src/tests/compaction_test/src/bin/compaction.rs b/src/tests/compaction_test/src/bin/compaction.rs index 443b79ad625b8..d9ba16f7437b8 100644 --- a/src/tests/compaction_test/src/bin/compaction.rs +++ b/src/tests/compaction_test/src/bin/compaction.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] fn main() { use clap::Parser; diff --git a/src/tests/compaction_test/src/bin/delete_range.rs b/src/tests/compaction_test/src/bin/delete_range.rs index 348a71dc3cce5..592f61a3db4fa 100644 --- a/src/tests/compaction_test/src/bin/delete_range.rs +++ b/src/tests/compaction_test/src/bin/delete_range.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![cfg_attr(coverage, feature(no_coverage))] +#![cfg_attr(coverage, feature(coverage_attribute))] -#[cfg_attr(coverage, no_coverage)] +#[cfg_attr(coverage, coverage(off))] fn main() { use clap::Parser; diff --git a/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs b/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs index c05e52c927424..776692b2fab90 100644 --- a/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs +++ b/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs @@ -25,7 +25,7 @@ const ROOT_TABLE_CREATE: &str = "create table t1 (v1 int);"; const MV1: &str = "create materialized view m1 as select * from t1 where v1 > 5;"; const MV2: &str = "create materialized view m2 as select * from t1 where v1 > 10;"; const MV3: &str = "create materialized view m3 as select * from m2 where v1 < 15;"; -const MV4: &str = "create materialized view m4 as select m1.v1 as m1v, m3.v1 as m3v from m1 join m3 on m1.v1 = m3.v1;"; +const MV4: &str = "create materialized view m4 as select m1.v1 as m1v, m3.v1 as m3v from m1 join m3 on m1.v1 = m3.v1 limit 100;"; const MV5: &str = "create materialized view m5 as select * from m4;"; #[tokio::test] @@ -40,6 +40,7 @@ async fn test_simple_cascade_materialized_view() -> Result<()> { .locate_one_fragment([ identity_contains("materialize"), no_identity_contains("chain"), + no_identity_contains("topn"), no_identity_contains("hashjoin"), ]) .await?; @@ -129,6 +130,7 @@ async fn test_diamond_cascade_materialized_view() -> Result<()> { .locate_one_fragment([ identity_contains("materialize"), no_identity_contains("chain"), + no_identity_contains("topn"), no_identity_contains("hashjoin"), ]) .await?; diff --git a/src/tests/simulation/tests/integration_tests/scale/plan.rs b/src/tests/simulation/tests/integration_tests/scale/plan.rs index c7244dc826b42..8b62a58998a3f 100644 --- a/src/tests/simulation/tests/integration_tests/scale/plan.rs +++ b/src/tests/simulation/tests/integration_tests/scale/plan.rs @@ -39,10 +39,7 @@ async fn test_resize_normal() -> Result<()> { .await?; let join_fragment = cluster - .locate_one_fragment([ - identity_contains("hashJoin"), - identity_contains("materialize"), - ]) + .locate_one_fragment([identity_contains("hashJoin")]) .await?; let join_fragment_id = join_fragment.inner.fragment_id; @@ -270,7 +267,7 @@ async fn test_resize_no_shuffle() -> Result<()> { session .run( "create materialized view mv7 as select mv1.v as mv1v, mv5.v as mv5v from mv1 -join mv5 on mv1.v = mv5.v;", +join mv5 on mv1.v = mv5.v limit 1;", ) .await?; @@ -316,6 +313,7 @@ join mv5 on mv1.v = mv5.v;", let top_materialize_fragment = cluster .locate_one_fragment([ identity_contains("materialize"), + no_identity_contains("topn"), no_identity_contains("chain"), no_identity_contains("hashJoin"), ]) diff --git a/src/utils/pgwire/src/lib.rs b/src/utils/pgwire/src/lib.rs index 1cda373ee9568..84a17d9907879 100644 --- a/src/utils/pgwire/src/lib.rs +++ b/src/utils/pgwire/src/lib.rs @@ -17,8 +17,6 @@ #![feature(result_option_inspect)] #![feature(iterator_try_collect)] #![feature(trusted_len)] -#![feature(async_fn_in_trait)] -#![feature(return_position_impl_trait_in_trait)] #![feature(lazy_cell)] #![expect(clippy::doc_markdown, reason = "FIXME: later")] diff --git a/src/utils/pgwire/src/pg_response.rs b/src/utils/pgwire/src/pg_response.rs index 29ea77f83b71b..eeec929732f50 100644 --- a/src/utils/pgwire/src/pg_response.rs +++ b/src/utils/pgwire/src/pg_response.rs @@ -92,6 +92,7 @@ pub enum StatementType { ROLLBACK, SET_TRANSACTION, CANCEL_COMMAND, + WAIT, } impl std::fmt::Display for StatementType { @@ -278,6 +279,7 @@ impl StatementType { }, Statement::Explain { .. } => Ok(StatementType::EXPLAIN), Statement::Flush => Ok(StatementType::FLUSH), + Statement::Wait => Ok(StatementType::WAIT), _ => Err("unsupported statement type".to_string()), } } diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml index 6c08e08490f7d..67b218c787652 100644 --- a/src/workspace-hack/Cargo.toml +++ b/src/workspace-hack/Cargo.toml @@ -37,7 +37,7 @@ combine = { version = "4", features = ["tokio"] } crossbeam-epoch = { version = "0.9" } crossbeam-queue = { version = "0.3" } crossbeam-utils = { version = "0.8" } -deranged = { version = "0.3", default-features = false, features = ["serde", "std"] } +deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] } digest = { version = "0.10", features = ["mac", "oid", "std"] } either = { version = "1", features = ["serde"] } fail = { version = "0.5", default-features = false, features = ["failpoints"] } @@ -147,7 +147,7 @@ auto_enums = { version = "0.8", features = ["futures03"] } bitflags = { version = "2", default-features = false, features = ["serde", "std"] } bytes = { version = "1", features = ["serde"] } cc = { version = "1", default-features = false, features = ["parallel"] } -deranged = { version = "0.3", default-features = false, features = ["serde", "std"] } +deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] } either = { version = "1", features = ["serde"] } fixedbitset = { version = "0.4" } frunk_core = { version = "0.4", default-features = false, features = ["std"] }