diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 235fb3bbacfbb..acd75f253b699 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -3,5 +3,5 @@ contact_links:
url: https://github.com/risingwavelabs/risingwave/discussions
about: Have questions? Welcome to open a discussion.
- name: Community Chat
- url: https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw
+ url: https://risingwave.com/slack
about: Join the RisingWave Slack community and chat with us.
diff --git a/.github/workflows/auto-create-doc-issue-by-issue.yml b/.github/workflows/auto-create-doc-issue-by-issue.yml
new file mode 100644
index 0000000000000..0c8d78062977a
--- /dev/null
+++ b/.github/workflows/auto-create-doc-issue-by-issue.yml
@@ -0,0 +1,31 @@
+name: Issue Documentation Checker
+
+on:
+ issues:
+ types:
+ - closed
+ - labeled
+
+jobs:
+ create-issue:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v3
+ - name: Log the event payload
+ run: echo "${{ toJSON(github.event) }}"
+ - name: Check if issue is done and labeled 'user-facing-changes'
+ uses: dacbd/create-issue-action@main
+ if: ${{ github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user-facing-changes') }}
+ with:
+ token: ${{ secrets.ACCESS_TOKEN }}
+ owner: risingwavelabs
+ repo: risingwave-docs
+ title: |
+ Document: ${{ github.event.issue.title }}
+ body: |
+ ## Context
+ Source Issue URL: ${{ github.event.issue.html_url }}
+ Created At: ${{ github.event.issue.created_at }}
+ Created By: ${{ github.event.issue.user.login }}
+ Closed At: ${{ github.event.issue.closed_at }}
diff --git a/.github/workflows/auto-create-docs-pr.yml b/.github/workflows/auto-create-doc-issue-by-pr.yml
similarity index 100%
rename from .github/workflows/auto-create-docs-pr.yml
rename to .github/workflows/auto-create-doc-issue-by-pr.yml
diff --git a/.github/workflows/nightly-rust.yml b/.github/workflows/nightly-rust.yml
index a89d86ac9e560..5219b4805c74d 100644
--- a/.github/workflows/nightly-rust.yml
+++ b/.github/workflows/nightly-rust.yml
@@ -21,6 +21,13 @@ jobs:
remove-docker-images: 'true'
root-reserve-mb: 2048
- uses: actions/checkout@v3
+ if: ${{ github.event_name == 'schedule' }}
+ with:
+ # For daily scheduled run, we use a fixed branch, so that we can apply patches to fix compile errors earlier.
+ # We can also ensure the regression is due to new rust instead of new RisingWave code.
+ ref: xxchan/latest-nightly-rust
+ - uses: actions/checkout@v3
+ if: ${{ !(github.event_name == 'schedule') }}
- name: Setup Rust toolchain
run: |
rustup override set nightly
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9b519c16010ba..c0b3991fc1f61 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,7 +4,7 @@ Thanks for your interest in contributing to RisingWave! We welcome and appreciat
This document describes how to submit your code changes. To learn about the development process, see the [developer guide](docs/developer-guide.md). To understand the design and implementation of RisingWave, refer to the design docs listed in [docs/README.md](docs/README.md).
-If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw) to join the channel.
+If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://risingwave.com/slack) to join the channel.
To report bugs, create a [GitHub issue](https://github.com/risingwavelabs/risingwave/issues/new/choose).
diff --git a/Cargo.lock b/Cargo.lock
index b2875296b683a..99a5a675c0342 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2448,10 +2448,11 @@ dependencies = [
[[package]]
name = "deranged"
-version = "0.3.8"
+version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
+checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
dependencies = [
+ "powerfmt",
"serde",
]
@@ -2554,8 +2555,7 @@ checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632"
[[package]]
name = "dlv-list"
version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aead04dc46b5f263c25721cf25c9e595951d15055f8063f92392fa0d7f64cf4"
+source = "git+https://github.com/sgodwincs/dlv-list-rs.git?rev=5bbc5d0#5bbc5d0cc84f257e173d851f8dc1674fb6e46f95"
dependencies = [
"const-random",
]
@@ -3018,7 +3018,7 @@ dependencies = [
[[package]]
name = "foyer"
version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
dependencies = [
"foyer-common",
"foyer-intrusive",
@@ -3029,10 +3029,11 @@ dependencies = [
[[package]]
name = "foyer-common"
version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
dependencies = [
"bytes",
"foyer-workspace-hack",
+ "itertools 0.11.0",
"madsim-tokio",
"parking_lot 0.12.1",
"paste",
@@ -3043,13 +3044,13 @@ dependencies = [
[[package]]
name = "foyer-intrusive"
version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
dependencies = [
"bytes",
"cmsketch",
"foyer-common",
"foyer-workspace-hack",
- "itertools 0.10.5",
+ "itertools 0.11.0",
"memoffset",
"parking_lot 0.12.1",
"paste",
@@ -3060,7 +3061,7 @@ dependencies = [
[[package]]
name = "foyer-storage"
version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
dependencies = [
"anyhow",
"async-channel",
@@ -3089,7 +3090,7 @@ dependencies = [
[[package]]
name = "foyer-workspace-hack"
version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
dependencies = [
"crossbeam-utils",
"either",
@@ -3098,7 +3099,7 @@ dependencies = [
"futures-sink",
"futures-util",
"hyper",
- "itertools 0.10.5",
+ "itertools 0.11.0",
"libc",
"memchr",
"parking_lot 0.12.1",
@@ -3227,9 +3228,9 @@ dependencies = [
[[package]]
name = "futures-async-stream"
-version = "0.2.7"
+version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f529ccdeacfa2446a9577041686cf1abb839b1b3e15fee4c1b1232ab3b7d799f"
+checksum = "379790776b0d953337df4ab7ecc51936c66ea112484cad7912907b1d34253ebf"
dependencies = [
"futures-async-stream-macro",
"futures-core",
@@ -3238,13 +3239,13 @@ dependencies = [
[[package]]
name = "futures-async-stream-macro"
-version = "0.2.7"
+version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2b48ee06dc8d2808ba5ebad075d06c3406085bb19deaac33be64c39113bf80"
+checksum = "5df2c13d48c8cb8a3ec093ede6f0f4482f327d7bb781120c5fb483ef0f17e758"
dependencies = [
"proc-macro2",
"quote",
- "syn 1.0.109",
+ "syn 2.0.37",
]
[[package]]
@@ -5494,8 +5495,7 @@ dependencies = [
[[package]]
name = "ordered-multimap"
version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ed8acf08e98e744e5384c8bc63ceb0364e68a6854187221c18df61c4797690e"
+source = "git+https://github.com/risingwavelabs/ordered-multimap-rs.git?rev=19c743f#19c743f3e3d106c99ba37628f06a2ca6faa2284f"
dependencies = [
"dlv-list",
"hashbrown 0.13.2",
@@ -6025,6 +6025,12 @@ dependencies = [
"serde_json",
]
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
[[package]]
name = "pprof"
version = "0.13.0"
@@ -7615,6 +7621,7 @@ dependencies = [
name = "risingwave_jni_core"
version = "0.1.0"
dependencies = [
+ "anyhow",
"bytes",
"cfg-or-panic",
"futures",
@@ -7688,7 +7695,6 @@ dependencies = [
"sea-orm",
"serde",
"serde_json",
- "sqlx",
"sync-point",
"thiserror",
"tokio-retry",
@@ -9806,14 +9812,15 @@ dependencies = [
[[package]]
name = "time"
-version = "0.3.28"
+version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48"
+checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
dependencies = [
"deranged",
"itoa",
"libc",
"num_threads",
+ "powerfmt",
"serde",
"time-core",
"time-macros",
@@ -9821,15 +9828,15 @@ dependencies = [
[[package]]
name = "time-core"
-version = "0.1.1"
+version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
-version = "0.2.14"
+version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572"
+checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20"
dependencies = [
"time-core",
]
@@ -10978,7 +10985,6 @@ dependencies = [
"futures-util",
"hashbrown 0.12.3",
"hashbrown 0.14.0",
- "heck 0.4.1",
"hyper",
"indexmap 1.9.3",
"itertools 0.10.5",
diff --git a/Cargo.toml b/Cargo.toml
index ef09221b818a2..f8a9b7d0e2fa5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -97,7 +97,7 @@ aws-smithy-types = "0.55"
aws-endpoint = "0.55"
aws-types = "0.55"
etcd-client = { package = "madsim-etcd-client", version = "0.4" }
-futures-async-stream = "0.2"
+futures-async-stream = "0.2.9"
hytra = "0.1"
rdkafka = { package = "madsim-rdkafka", version = "0.3.0", features = [
"cmake-build",
@@ -165,6 +165,8 @@ unused_must_use = "forbid"
future_incompatible = "warn"
nonstandard_style = "warn"
rust_2018_idioms = "warn"
+# Backward compatibility is not important for an application.
+async_fn_in_trait = "allow"
[workspace.lints.clippy]
uninlined_format_args = "allow"
@@ -229,8 +231,8 @@ opt-level = 2
incremental = false
debug = 1
-# Patch third-party crates for deterministic simulation.
[patch.crates-io]
+# Patch third-party crates for deterministic simulation.
quanta = { git = "https://github.com/madsim-rs/quanta.git", rev = "948bdc3" }
getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "8daf97e" }
tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "fe39bb8e" }
@@ -238,3 +240,8 @@ tokio-retry = { git = "https://github.com/madsim-rs/rust-tokio-retry.git", rev =
tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88" }
# patch: unlimit 4MB message size for grpc client
etcd-client = { git = "https://github.com/risingwavelabs/etcd-client.git", rev = "4e84d40" }
+
+# Patch for coverage_attribute.
+# https://github.com/sgodwincs/dlv-list-rs/pull/19#issuecomment-1774786289
+dlv-list = { git = "https://github.com/sgodwincs/dlv-list-rs.git", rev = "5bbc5d0" }
+ordered-multimap = { git = "https://github.com/risingwavelabs/ordered-multimap-rs.git", rev = "19c743f" }
diff --git a/README.md b/README.md
index c1878a2717159..29a7d7e51888a 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+
@@ -5,23 +6,110 @@
-[![Slack](https://badgen.net/badge/Slack/Join%20RisingWave/0abd59?icon=slack)](https://risingwave.com/slack)
-[![Build status](https://badge.buildkite.com/9394d2bca0f87e2e97aa78b25f765c92d4207c0b65e7f6648f.svg)](https://buildkite.com/risingwavelabs/main)
-[![codecov](https://codecov.io/gh/risingwavelabs/risingwave/branch/main/graph/badge.svg?token=EB44K9K38B)](https://codecov.io/gh/risingwavelabs/risingwave)
-
-RisingWave is a distributed SQL streaming database. It is designed to reduce the complexity and cost of building stream processing applications. RisingWave consumes streaming data, performs incremental computations when new data comes in, and updates results dynamically. As a database system, RisingWave maintains results inside its own storage so that users can access data efficiently.
-RisingWave offers wire compatibility with PostgreSQL and demonstrates exceptional performance surpassing the previous generation of stream processing systems, including Apache Flink, by several orders of magnitude.
-It particularly excels in handling complex stateful operations like multi-stream joins.
+
-RisingWave ingests data from sources like Apache Kafka, Apache Pulsar, Amazon Kinesis, Redpanda, and materialized CDC sources. Data in RisingWave can be delivered to external targets such as message brokers, data warehouses, and data lakes for storage or additional processing.
+### 🌊Stream Processing Redefined.
-RisingWave 1.0 is a battle-tested version that has undergone rigorous stress tests and performance evaluations. It has proven its reliability and efficiency through successful deployments in numerous production environments across dozens of companies.
+
-Learn more at [Introduction to RisingWave](https://docs.risingwave.com/docs/current/intro/).
+
+ Documentation 📑
+ Hands-on Tutorials 🎯
+ RisingWave Cloud 🚀
+
+ Get Instant Help
+
+
+
+
+RisingWave is a distributed SQL streaming database that enables simple , efficient , and reliable processing of streaming data.
![RisingWave](https://github.com/risingwavelabs/risingwave-docs/blob/0f7e1302b22493ba3c1c48e78810750ce9a5ff42/docs/images/archi_simple.png)
+## How to install
+**Ubuntu**
+```
+wget https://github.com/risingwavelabs/risingwave/releases/download/v1.3.0/risingwave-v1.3.0-x86_64-unknown-linux.tar.gz
+tar xvf risingwave-v1.3.0-x86_64-unknown-linux.tar.gz
+./risingwave playground
+```
+**Mac**
+```
+brew tap risingwavelabs/risingwave
+brew install risingwave
+risingwave playground
+```
+Now connect to RisingWave using `psql`:
+```
+psql -h localhost -p 4566 -d dev -U root
+```
+
+Learn more at [Quick Start](https://docs.risingwave.com/docs/current/get-started/).
+
+## Why RisingWave for stream processing?
+RisingWave adaptly tackles some of the most challenging problems in stream processing. Compared to existing stream processing systems, RisingWave shines through with the following key features:
+* **Easy to learn**
+ * RisingWave speaks PostgreSQL-style SQL, enabling users to dive into stream processing in much the same way as operating a PostgreSQL database.
+* **Highly efficient in multi-stream joins**
+ * RisingWave has made significant optimizations for multiple stream join scenarios. Users can easily join 10-20 streams (or more) efficiently in a production environment.
+* **High resource utilization**
+ * Queries in RisingWave leverage shared computational resources, eliminating the need for users to manually allocate resources for each query.
+* **No compromise on large state management**
+ * The decoupled compute-storage architecture of RisingWave ensures remote persistence of internal states, and users never need to worry about the size of internal states when handling complex queries.
+* **Transparent dynamic scaling**
+ * RisingWave supports near-instantaneous dynamic scaling without any service interruptions.
+* **Instant failure recovery**
+ * RisingWave's state management mechanism allows it to recover from failure in seconds, not minutes or hours.
+* **Easy to verify correctness**
+ * RisingWave persists results in materialized views and allow users to break down complex stream computation programs into stacked materialized views, simplifying program development and result verification.
+* **Simplified data stack**
+ * RisingWave's ability to store data and serve queries eliminates the need for separate maintenance of stream processors and databases. Users can effortlessly connect RisingWave to their preferred BI tools or through client libraries.
+* **Simple to maintain and operate**
+ * RisingWave abstracts away unnecessary low-level details, allowing users to concentrate solely on SQL code-level issues.
+* **Rich ecosystem**
+ * With integrations to a diverse range of cloud systems and the PostgreSQL ecosystem, RisingWave boasts a rich and expansive ecosystem.
+
+## RisingWave's limitations
+RisingWave isn’t a panacea for all data engineering hurdles. It has its own set of limitations:
+* **No programmable interfaces**
+ * RisingWave does not provide low-level APIs in languages like Java and Scala, and does not allow users to manage internal states manually (unless you want to hack!). For coding in Java, Scala, and other languages, please consider using RisingWave's User-Defined Functions (UDF).
+* **No support for transaction processing**
+ * RisingWave isn’t cut out for transactional workloads, thus it’s not a viable substitute for operational databases dedicated to transaction processing. However, it supports read-only transactions, ensuring data freshness and consistency. It also comprehends the transactional semantics of upstream database Change Data Capture (CDC).
+* **Not tailored for ad-hoc analytical queries**
+ * RisingWave's row store design is tailored for optimal stream processing performance rather than interactive analytical workloads. Hence, it's not a suitable replacement for OLAP databases. Yet, a reliable integration with many OLAP databases exists, and a collaborative use of RisingWave and OLAP databases is a common practice among many users.
+
## RisingWave Cloud
@@ -29,19 +117,10 @@ RisingWave Cloud is a fully-managed and scalable stream processing platform powe
## Notes on telemetry
-RisingWave collects anonymous usage statistics to better understand how the community is using RisingWave. The sole intention of this exercise is to help improve the product. These statistics are related to system resource usage, OS versions and system uptime. RisingWave doesn't have access to any user data or metadata running on RisingWave clusters including source and sink connection parameters, sources, sinks, materialized views, and tables. Users have the option to opt out of this collection using a system parameter. Please refer to the RisingWave user documentation for more details.
-
-## Get started
-
-- To learn about how to install and run RisingWave, see [Get started](https://docs.risingwave.com/docs/current/get-started/).
-- To learn about how to ingest data and the supported data sources, see [Sources](https://docs.risingwave.com/docs/current/data-ingestion/).
-- To learn about how to transform data using the PostgreSQL-compatible SQL of RisingWave, see [SQL reference](https://docs.risingwave.com/docs/current/sql-references/).
-- To learn about how to deliver data and the supported data sinks, see [Sinks](https://docs.risingwave.com/docs/current/data-delivery/).
-- To learn about new features and changes in the current and previous versions, see [Release notes](https://docs.risingwave.com/release-notes/).
-
-## Documentation
+RisingWave collects anonymous usage statistics to better understand how the community is using RisingWave. The sole intention of this exercise is to help improve the product. Users may opt out easily at any time. Please refer to the [user documentation](https://docs.risingwave.com/docs/current/telemetry/) for more details.
-To learn about how to use RisingWave, refer to [RisingWave User Documentation](https://docs.risingwave.com/). To learn about the development process, see the [developer guide](docs/developer-guide.md). To understand the design and implementation of RisingWave, refer to the design docs listed in [readme.md](docs/README.md).
+## In-production use cases
+Like other stream processing systems, the primary use cases of RisingWave include monitoring, alerting, real-time dashboard reporting, streaming ETL (Extract, Transform, Load), machine learning feature engineering, and more. It has already been adopted in fields such as financial trading, manufacturing, new media, logistics, gaming, and more. Check out [customer stories](https://www.risingwave.com/use-cases/).
## Community
diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh
index 43ff81ade2b85..59c88e5e9a9ae 100755
--- a/ci/build-ci-image.sh
+++ b/ci/build-ci-image.sh
@@ -13,7 +13,7 @@ cat ../rust-toolchain
# !!! CHANGE THIS WHEN YOU WANT TO BUMP CI IMAGE !!! #
# AND ALSO docker-compose.yml #
######################################################
-export BUILD_ENV_VERSION=v20230919
+export BUILD_ENV_VERSION=v20231022
export BUILD_TAG="public.ecr.aws/x5u3w5h6/rw-build-env:${BUILD_ENV_VERSION}"
diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
index 6fe7cfbfdeca2..66dd2d175e675 100644
--- a/ci/docker-compose.yml
+++ b/ci/docker-compose.yml
@@ -71,7 +71,7 @@ services:
retries: 5
source-test-env:
- image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+ image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
depends_on:
- mysql
- db
@@ -81,10 +81,11 @@ services:
- ..:/risingwave
sink-test-env:
- image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+ image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
depends_on:
- mysql
- db
+ - message_queue
- elasticsearch
- clickhouse-server
- pulsar
@@ -92,12 +93,12 @@ services:
- ..:/risingwave
rw-build-env:
- image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+ image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
volumes:
- ..:/risingwave
ci-flamegraph-env:
- image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+ image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
# NOTE(kwannoel): This is used in order to permit
# syscalls for `nperf` (perf_event_open),
# so it can do CPU profiling.
@@ -108,7 +109,7 @@ services:
- ..:/risingwave
regress-test-env:
- image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+ image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
depends_on:
db:
condition: service_healthy
diff --git a/ci/rust-toolchain b/ci/rust-toolchain
index ebc0b6c285a4e..fe2a026f6e40f 100644
--- a/ci/rust-toolchain
+++ b/ci/rust-toolchain
@@ -1,2 +1,2 @@
[toolchain]
-channel = "nightly-2023-09-09"
+channel = "nightly-2023-10-21"
diff --git a/ci/scripts/deterministic-recovery-test.sh b/ci/scripts/deterministic-recovery-test.sh
index 6514fe1f7c0c3..c5f89a2bbc7e0 100755
--- a/ci/scripts/deterministic-recovery-test.sh
+++ b/ci/scripts/deterministic-recovery-test.sh
@@ -11,6 +11,7 @@ chmod +x ./risingwave_simulation
export RUST_LOG="info,\
risingwave_meta::barrier::recovery=debug,\
+risingwave_meta::manager::catalog=debug,\
risingwave_meta::rpc::ddl_controller=debug,\
risingwave_meta::barrier::mod=debug,\
risingwave_simulation=debug"
diff --git a/ci/scripts/e2e-kafka-sink-test.sh b/ci/scripts/e2e-kafka-sink-test.sh
index 06ef185f46e8b..71a91f2d8fba9 100755
--- a/ci/scripts/e2e-kafka-sink-test.sh
+++ b/ci/scripts/e2e-kafka-sink-test.sh
@@ -3,10 +3,10 @@
# Exits as soon as any line fails.
set -euo pipefail
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --create > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --create > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --create > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --create > /dev/null 2>&1
sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/create_sink.slt'
sleep 2
@@ -14,7 +14,7 @@ sleep 2
# test append-only kafka sink
echo "testing append-only kafka sink"
diff ./e2e_test/sink/kafka/append_only1.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 10 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 10 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for append-only sink is not as expected."
exit 1
@@ -23,7 +23,7 @@ fi
# test upsert kafka sink
echo "testing upsert kafka sink"
diff ./e2e_test/sink/kafka/upsert1.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for upsert sink is not as expected."
exit 1
@@ -32,7 +32,7 @@ fi
# test upsert kafka sink with schema
echo "testing upsert kafka sink with schema"
diff ./e2e_test/sink/kafka/upsert_schema1.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for upsert sink with schema is not as expected."
exit 1
@@ -40,7 +40,7 @@ fi
# test debezium kafka sink
echo "testing debezium kafka sink"
-(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 10 | sort) > ./e2e_test/sink/kafka/debezium1.tmp.result 2> /dev/null
+(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 10 | sort) > ./e2e_test/sink/kafka/debezium1.tmp.result 2> /dev/null
python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium1.result e2e_test/sink/kafka/debezium1.tmp.result
if [ $? -ne 0 ]; then
echo "The output for debezium sink is not as expected."
@@ -57,7 +57,7 @@ psql -h localhost -p 4566 -d dev -U root -c "update t_kafka set v_varchar = '',
# test append-only kafka sink after update
echo "testing append-only kafka sink after updating data"
diff ./e2e_test/sink/kafka/append_only2.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 11 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 11 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for append-only sink after update is not as expected."
exit 1
@@ -66,7 +66,7 @@ fi
# test upsert kafka sink after update
echo "testing upsert kafka sink after updating data"
diff ./e2e_test/sink/kafka/upsert2.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for upsert sink after update is not as expected."
exit 1
@@ -75,7 +75,7 @@ fi
# test upsert kafka sink with schema after update
echo "testing upsert kafka sink with schema after updating data"
diff ./e2e_test/sink/kafka/upsert_schema2.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for upsert sink with schema is not as expected."
exit 1
@@ -83,7 +83,7 @@ fi
# test debezium kafka sink after update
echo "testing debezium kafka sink after updating data"
-(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 11 | sort) > ./e2e_test/sink/kafka/debezium2.tmp.result 2> /dev/null
+(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 11 | sort) > ./e2e_test/sink/kafka/debezium2.tmp.result 2> /dev/null
python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium2.result e2e_test/sink/kafka/debezium2.tmp.result
if [ $? -ne 0 ]; then
echo "The output for debezium sink after update is not as expected."
@@ -100,7 +100,7 @@ psql -h localhost -p 4566 -d dev -U root -c "delete from t_kafka where id = 1;"
# test upsert kafka sink after delete
echo "testing upsert kafka sink after deleting data"
diff ./e2e_test/sink/kafka/upsert3.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for upsert sink after update is not as expected."
exit 1
@@ -109,7 +109,7 @@ fi
# test upsert kafka sink with schema after delete
echo "testing upsert kafka sink with schema after deleting data"
diff ./e2e_test/sink/kafka/upsert_schema3.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
if [ $? -ne 0 ]; then
echo "The output for upsert sink with schema is not as expected."
exit 1
@@ -117,7 +117,7 @@ fi
# test debezium kafka sink after delete
echo "testing debezium kafka sink after deleting data"
-(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 13 | sort) > ./e2e_test/sink/kafka/debezium3.tmp.result 2> /dev/null
+(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 13 | sort) > ./e2e_test/sink/kafka/debezium3.tmp.result 2> /dev/null
python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium3.result e2e_test/sink/kafka/debezium3.tmp.result
if [ $? -ne 0 ]; then
echo "The output for debezium sink after delete is not as expected."
@@ -128,13 +128,13 @@ else
fi
sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/drop_sink.slt'
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --delete > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --delete > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --delete > /dev/null 2>&1
# test different encoding
echo "testing protobuf"
cp src/connector/src/test_data/proto_recursive/recursive.pb ./proto-recursive
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only-protobuf --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only-protobuf --create > /dev/null 2>&1
sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/protobuf.slt'
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only-protobuf --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only-protobuf --delete > /dev/null 2>&1
diff --git a/ci/scripts/e2e-sink-test.sh b/ci/scripts/e2e-sink-test.sh
index 2dc02f0eada7a..ce2cc46381eba 100755
--- a/ci/scripts/e2e-sink-test.sh
+++ b/ci/scripts/e2e-sink-test.sh
@@ -57,7 +57,7 @@ node_port=50051
node_timeout=10
echo "--- starting risingwave cluster with connector node"
-cargo make ci-start ci-kafka
+cargo make ci-start ci-1cn-1fe
./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-node.log 2>&1 &
echo "waiting for connector node to start"
diff --git a/ci/scripts/run-micro-benchmarks.sh b/ci/scripts/run-micro-benchmarks.sh
index 568c90de425ca..371cc416e7ac5 100755
--- a/ci/scripts/run-micro-benchmarks.sh
+++ b/ci/scripts/run-micro-benchmarks.sh
@@ -46,6 +46,8 @@ main() {
echo "--- Getting aws instance type"
local instance_type=$(get_instance_type)
echo "instance_type: $instance_type"
+ echo "$instance_type" > microbench_instance_type.txt
+ buildkite-agent artifact upload ./microbench_instance_type.txt
if [[ $instance_type != "m6i.4xlarge" ]]; then
echo "Only m6i.4xlarge is supported, skipping microbenchmark"
exit 0
diff --git a/ci/scripts/upload-micro-bench-results.sh b/ci/scripts/upload-micro-bench-results.sh
index 2644ca936c5da..e72b69950bb7b 100755
--- a/ci/scripts/upload-micro-bench-results.sh
+++ b/ci/scripts/upload-micro-bench-results.sh
@@ -36,6 +36,19 @@ get_commit() {
| sed 's/\"//g'
}
+get_machine() {
+ buildkite-agent artifact download microbench_instance_type.txt ./
+ cat ./microbench_instance_type.txt
+}
+
+echo "--- Checking microbench_instance_type"
+INSTANCE_TYPE=$(get_machine)
+echo "instance type: $INSTANCE_TYPE"
+if [[ $INSTANCE_TYPE != "m6i.4xlarge" ]]; then
+ echo "Only m6i.4xlarge is supported, microbenchmark was skipped"
+ exit 0
+fi
+
setup
BUILDKITE_BUILD_URL="https://buildkite.com/risingwavelabs/main-cron/builds/$BUILDKITE_BUILD_NUMBER"
diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml
index 0008f2cf0bb7c..985bd0be4b822 100644
--- a/ci/workflows/pull-request.yml
+++ b/ci/workflows/pull-request.yml
@@ -82,7 +82,7 @@ steps:
config: ci/docker-compose.yml
mount-buildkite-agent: true
- ./ci/plugins/upload-failure-logs
- timeout_in_minutes: 17
+ timeout_in_minutes: 15
retry: *auto-retry
- label: "end-to-end test (parallel)"
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 89aa99a1c8b5d..d25c94daf2670 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -2,7 +2,7 @@
version: "3"
services:
compactor-0:
- image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+ image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
command:
- compactor-node
- "--listen-addr"
@@ -37,7 +37,7 @@ services:
timeout: 5s
retries: 5
compute-node-0:
- image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+ image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
command:
- compute-node
- "--listen-addr"
@@ -122,7 +122,7 @@ services:
timeout: 5s
retries: 5
frontend-node-0:
- image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+ image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
command:
- frontend-node
- "--listen-addr"
@@ -179,7 +179,7 @@ services:
timeout: 5s
retries: 5
meta-node-0:
- image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+ image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
command:
- meta-node
- "--listen-addr"
@@ -295,7 +295,7 @@ services:
timeout: 5s
retries: 5
connector-node:
- image: ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}
+ image: ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}
entrypoint: "/risingwave/bin/connector-node/start-service.sh"
ports:
- 50051
diff --git a/docs/developer-guide.md b/docs/developer-guide.md
index 4ecc756131dff..7d072e7da2e44 100644
--- a/docs/developer-guide.md
+++ b/docs/developer-guide.md
@@ -2,7 +2,7 @@
This guide is intended to be used by contributors to learn about how to develop RisingWave. The instructions about how to submit code changes are included in [contributing guidelines](../CONTRIBUTING.md).
-If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw) to join the channel.
+If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://risingwave.com/slack) to join the channel.
To report bugs, create a [GitHub issue](https://github.com/risingwavelabs/risingwave/issues/new/choose).
diff --git a/e2e_test/batch/catalog/pg_size.slt.part b/e2e_test/batch/catalog/pg_size.slt.part
index 2dab96a689642..c274b7f297ee3 100644
--- a/e2e_test/batch/catalog/pg_size.slt.part
+++ b/e2e_test/batch/catalog/pg_size.slt.part
@@ -4,7 +4,7 @@ create table t (v1 int);
statement ok
insert into t values (3);
-sleep 30s
+sleep 3s
skipif in-memory
query T
@@ -52,7 +52,7 @@ t
statement ok
create index t_idx on t (v1);
-sleep 10s
+sleep 3s
skipif in-memory
query T
diff --git a/e2e_test/sink/kafka/create_sink.slt b/e2e_test/sink/kafka/create_sink.slt
index 25e3a59fdff3a..a1f296774f526 100644
--- a/e2e_test/sink/kafka/create_sink.slt
+++ b/e2e_test/sink/kafka/create_sink.slt
@@ -31,7 +31,7 @@ create connection mock with (
statement error
create sink si_kafka_append_only_conn from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-append-only',
type = 'append-only',
force_append_only = 'true',
@@ -42,7 +42,7 @@ create sink si_kafka_append_only_conn from t_kafka with (
statement ok
create sink si_kafka_append_only_conn from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-append-only',
type = 'append-only',
force_append_only = 'true',
@@ -66,7 +66,7 @@ drop connection mock;
statement error sink cannot be append-only
create sink si_kafka_append_only from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-append-only',
type = 'append-only',
);
@@ -74,7 +74,7 @@ create sink si_kafka_append_only from t_kafka with (
statement ok
create sink si_kafka_append_only from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-append-only',
type = 'append-only',
force_append_only = 'true'
@@ -83,7 +83,7 @@ create sink si_kafka_append_only from t_kafka with (
statement error primary key not defined
create sink si_kafka_upsert from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-upsert',
type = 'upsert',
);
@@ -91,7 +91,7 @@ create sink si_kafka_upsert from t_kafka with (
statement ok
create sink si_kafka_upsert from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-upsert',
type = 'upsert',
primary_key = 'id',
@@ -100,7 +100,7 @@ create sink si_kafka_upsert from t_kafka with (
statement ok
create sink si_kafka_upsert_schema from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-upsert-schema',
primary_key = 'id',
) format upsert encode json (
@@ -110,7 +110,7 @@ create sink si_kafka_upsert_schema from t_kafka with (
statement ok
create sink si_kafka_debezium from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-debezium',
type = 'debezium',
primary_key = 'id',
@@ -119,7 +119,7 @@ create sink si_kafka_debezium from t_kafka with (
statement error primary key not defined
create sink debezium_without_pk from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-debezium',
type = 'debezium',
);
@@ -127,7 +127,7 @@ create sink debezium_without_pk from t_kafka with (
statement ok
create sink multiple_pk from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-debezium',
type = 'debezium',
primary_key = 'id,v_varchar'
@@ -139,7 +139,7 @@ drop sink multiple_pk;
statement error Sink primary key column not found: invalid.
create sink invalid_pk_column from t_kafka with (
connector = 'kafka',
- properties.bootstrap.server = '127.0.0.1:29092',
+ properties.bootstrap.server = 'message_queue:29092',
topic = 'test-rw-sink-debezium',
type = 'debezium',
primary_key = 'id,invalid'
diff --git a/e2e_test/sink/kafka/protobuf.slt b/e2e_test/sink/kafka/protobuf.slt
index f69c4a9d07110..87ab884eddbde 100644
--- a/e2e_test/sink/kafka/protobuf.slt
+++ b/e2e_test/sink/kafka/protobuf.slt
@@ -2,7 +2,7 @@ statement ok
create table from_kafka with (
connector = 'kafka',
topic = 'test-rw-sink-append-only-protobuf',
- properties.bootstrap.server = '127.0.0.1:29092')
+ properties.bootstrap.server = 'message_queue:29092')
format plain encode protobuf (
schema.location = 'file:///risingwave/proto-recursive',
message = 'recursive.AllTypes');
@@ -37,7 +37,7 @@ statement ok
create sink sink0 from into_kafka with (
connector = 'kafka',
topic = 'test-rw-sink-append-only-protobuf',
- properties.bootstrap.server = '127.0.0.1:29092')
+ properties.bootstrap.server = 'message_queue:29092')
format plain encode protobuf (
force_append_only = true,
schema.location = 'file:///risingwave/proto-recursive',
@@ -70,7 +70,7 @@ statement error failed to read file
create sink sink_err from into_kafka with (
connector = 'kafka',
topic = 'test-rw-sink-append-only-protobuf',
- properties.bootstrap.server = '127.0.0.1:29092')
+ properties.bootstrap.server = 'message_queue:29092')
format plain encode protobuf (
force_append_only = true,
schema.location = 'file:///risingwave/proto-recursiv',
@@ -80,7 +80,7 @@ statement error encode extra_column error: field not in proto
create sink sink_err as select 1 as extra_column with (
connector = 'kafka',
topic = 'test-rw-sink-append-only-protobuf',
- properties.bootstrap.server = '127.0.0.1:29092')
+ properties.bootstrap.server = 'message_queue:29092')
format plain encode protobuf (
force_append_only = true,
schema.location = 'file:///risingwave/proto-recursive',
@@ -90,7 +90,7 @@ statement error s3 URL not supported yet
create sink sink_err from into_kafka with (
connector = 'kafka',
topic = 'test-rw-sink-append-only-protobuf',
- properties.bootstrap.server = '127.0.0.1:29092')
+ properties.bootstrap.server = 'message_queue:29092')
format plain encode protobuf (
force_append_only = true,
schema.location = 's3:///risingwave/proto-recursive',
diff --git a/integration_tests/redis-sink/create_sink.sql b/integration_tests/redis-sink/create_sink.sql
index 03bfc2d0b0df1..2ba9ba67feb39 100644
--- a/integration_tests/redis-sink/create_sink.sql
+++ b/integration_tests/redis-sink/create_sink.sql
@@ -3,19 +3,13 @@ FROM
bhv_mv WITH (
primary_key = 'user_id',
connector = 'redis',
- type = 'append-only',
- force_append_only='true',
redis.url= 'redis://127.0.0.1:6379/',
-);
+)FORMAT PLAIN ENCODE JSON(force_append_only='true');
CREATE SINK bhv_redis_sink_2
FROM
bhv_mv WITH (
primary_key = 'user_id',
connector = 'redis',
- type = 'append-only',
- force_append_only='true',
redis.url= 'redis://127.0.0.1:6379/',
- redis.keyformat='user_id:{user_id}',
- redis.valueformat='username:{username},event_timestamp{event_timestamp}'
-);
\ No newline at end of file
+)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'UserID:{user_id}', value_format = 'TargetID:{target_id},EventTimestamp{event_timestamp}');
\ No newline at end of file
diff --git a/proto/expr.proto b/proto/expr.proto
index 769532d8dbe19..2f252d67c8400 100644
--- a/proto/expr.proto
+++ b/proto/expr.proto
@@ -348,6 +348,7 @@ message AggCall {
MODE = 24;
LAST_VALUE = 25;
GROUPING = 26;
+ INTERNAL_LAST_SEEN_VALUE = 27;
}
Type type = 1;
repeated InputRef args = 2;
diff --git a/proto/plan_common.proto b/proto/plan_common.proto
index a88242a572693..d4c7a2e04f138 100644
--- a/proto/plan_common.proto
+++ b/proto/plan_common.proto
@@ -106,6 +106,7 @@ enum EncodeType {
ENCODE_TYPE_PROTOBUF = 4;
ENCODE_TYPE_JSON = 5;
ENCODE_TYPE_BYTES = 6;
+ ENCODE_TYPE_TEMPLATE = 7;
}
enum RowFormatType {
diff --git a/risedev.yml b/risedev.yml
index 8367c9fe99d9b..135a33f602a6a 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -270,6 +270,21 @@ profile:
exporter-port: 21250
- use: compactor
+ 3meta:
+ steps:
+ - use: meta-node
+ port: 5690
+ dashboard-port: 5691
+ exporter-port: 1250
+ - use: meta-node
+ port: 15690
+ dashboard-port: 15691
+ exporter-port: 11250
+ - use: meta-node
+ port: 25690
+ dashboard-port: 25691
+ exporter-port: 21250
+
3etcd-3meta-1cn-1fe:
steps:
- use: minio
@@ -670,40 +685,6 @@ profile:
- use: pubsub
persist-data: true
- ci-kafka:
- config-path: src/config/ci.toml
- steps:
- - use: minio
- - use: etcd
- unsafe-no-fsync: true
- - use: meta-node
- - use: compute-node
- enable-tiered-cache: true
- - use: frontend
- - use: compactor
- - use: zookeeper
- persist-data: true
- - use: kafka
- persist-data: true
-
- ci-kafka-plus-pubsub:
- config-path: src/config/ci.toml
- steps:
- - use: minio
- - use: etcd
- unsafe-no-fsync: true
- - use: meta-node
- - use: compute-node
- enable-tiered-cache: true
- - use: frontend
- - use: compactor
- - use: zookeeper
- persist-data: true
- - use: kafka
- persist-data: true
- - use: pubsub
- persist-data: true
-
ci-redis:
config-path: src/config/ci.toml
steps:
diff --git a/src/batch/src/executor/aggregation/filter.rs b/src/batch/src/executor/aggregation/filter.rs
index 2db2320ed3534..9cfbeabffe417 100644
--- a/src/batch/src/executor/aggregation/filter.rs
+++ b/src/batch/src/executor/aggregation/filter.rs
@@ -75,7 +75,7 @@ impl AggregateFunction for Filter {
mod tests {
use risingwave_common::test_prelude::StreamChunkTestExt;
use risingwave_expr::aggregate::{build_append_only, AggCall};
- use risingwave_expr::expr::{build_from_pretty, Expression, LiteralExpression};
+ use risingwave_expr::expr::{build_from_pretty, ExpressionBoxExt, LiteralExpression};
use super::*;
diff --git a/src/batch/src/executor/project_set.rs b/src/batch/src/executor/project_set.rs
index 670933a6bb50c..fa3dfac917e8a 100644
--- a/src/batch/src/executor/project_set.rs
+++ b/src/batch/src/executor/project_set.rs
@@ -171,7 +171,7 @@ mod tests {
use risingwave_common::catalog::{Field, Schema};
use risingwave_common::test_prelude::*;
use risingwave_common::types::DataType;
- use risingwave_expr::expr::{Expression, InputRefExpression, LiteralExpression};
+ use risingwave_expr::expr::{ExpressionBoxExt, InputRefExpression, LiteralExpression};
use risingwave_expr::table_function::repeat;
use super::*;
diff --git a/src/batch/src/lib.rs b/src/batch/src/lib.rs
index 9104c96c951f5..809c096eb49df 100644
--- a/src/batch/src/lib.rs
+++ b/src/batch/src/lib.rs
@@ -17,8 +17,8 @@
#![feature(trait_alias)]
#![feature(exact_size_is_empty)]
#![feature(type_alias_impl_trait)]
-#![cfg_attr(coverage, feature(no_coverage))]
-#![feature(generators)]
+#![cfg_attr(coverage, feature(coverage_attribute))]
+#![feature(coroutines)]
#![feature(proc_macro_hygiene, stmt_expr_attributes)]
#![feature(iterator_try_collect)]
#![feature(lint_reasons)]
@@ -27,13 +27,11 @@
#![feature(let_chains)]
#![feature(bound_map)]
#![feature(int_roundings)]
-#![feature(async_fn_in_trait)]
#![feature(allocator_api)]
#![feature(impl_trait_in_assoc_type)]
#![feature(result_option_inspect)]
#![feature(assert_matches)]
#![feature(lazy_cell)]
-#![feature(return_position_impl_trait_in_trait)]
mod error;
pub mod exchange_source;
diff --git a/src/batch/src/rpc/service/task_service.rs b/src/batch/src/rpc/service/task_service.rs
index b49a023acb22b..fb60e352ec293 100644
--- a/src/batch/src/rpc/service/task_service.rs
+++ b/src/batch/src/rpc/service/task_service.rs
@@ -53,7 +53,7 @@ impl TaskService for BatchServiceImpl {
type CreateTaskStream = ReceiverStream;
type ExecuteStream = ReceiverStream;
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn create_task(
&self,
request: Request,
@@ -97,7 +97,7 @@ impl TaskService for BatchServiceImpl {
}
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn cancel_task(
&self,
req: Request,
@@ -109,7 +109,7 @@ impl TaskService for BatchServiceImpl {
Ok(Response::new(CancelTaskResponse { status: None }))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn execute(
&self,
req: Request,
diff --git a/src/batch/src/task/task_execution.rs b/src/batch/src/task/task_execution.rs
index 6bd83c5d62c67..445c71ee51d66 100644
--- a/src/batch/src/task/task_execution.rs
+++ b/src/batch/src/task/task_execution.rs
@@ -656,7 +656,7 @@ impl BatchTaskExecution {
let error = error.map(Arc::new);
*self.failure.lock() = error.clone().map(to_rw_error);
- let err_str = error.as_ref().map(|e| format!("{:?}", e));
+ let err_str = error.as_ref().map(|e| e.to_string());
if let Err(e) = sender.close(error).await {
match e {
SenderError => {
diff --git a/src/cmd/src/bin/compactor.rs b/src/cmd/src/bin/compactor.rs
index 21b7db2405e2d..554168d8a6683 100644
--- a/src/cmd/src/bin/compactor.rs
+++ b/src/cmd/src/bin/compactor.rs
@@ -12,6 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
risingwave_cmd::main!(compactor);
diff --git a/src/cmd/src/bin/compute_node.rs b/src/cmd/src/bin/compute_node.rs
index 0bb1e5211ac57..a24d132b70b94 100644
--- a/src/cmd/src/bin/compute_node.rs
+++ b/src/cmd/src/bin/compute_node.rs
@@ -12,6 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
risingwave_cmd::main!(compute);
diff --git a/src/cmd/src/bin/ctl.rs b/src/cmd/src/bin/ctl.rs
index 38345c7a3fc2e..7b4c3132e747d 100644
--- a/src/cmd/src/bin/ctl.rs
+++ b/src/cmd/src/bin/ctl.rs
@@ -12,6 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
risingwave_cmd::main!(ctl);
diff --git a/src/cmd/src/bin/frontend_node.rs b/src/cmd/src/bin/frontend_node.rs
index 32d563be109fc..546bacbf1a901 100644
--- a/src/cmd/src/bin/frontend_node.rs
+++ b/src/cmd/src/bin/frontend_node.rs
@@ -12,6 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
risingwave_cmd::main!(frontend);
diff --git a/src/cmd/src/bin/meta_node.rs b/src/cmd/src/bin/meta_node.rs
index 032cc6bc28285..4bebfc5f915a2 100644
--- a/src/cmd/src/bin/meta_node.rs
+++ b/src/cmd/src/bin/meta_node.rs
@@ -12,6 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
risingwave_cmd::main!(meta);
diff --git a/src/cmd/src/lib.rs b/src/cmd/src/lib.rs
index 12de26657bd33..93df94a63816a 100644
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -30,7 +30,7 @@ macro_rules! main {
#[cfg(not(enable_task_local_alloc))]
risingwave_common::enable_jemalloc!();
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
fn main() {
let opts = clap::Parser::parse();
$crate::$component(opts);
diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs
index 3e9088e16b9e2..b7693c6fa06a2 100644
--- a/src/cmd_all/src/bin/risingwave.rs
+++ b/src/cmd_all/src/bin/risingwave.rs
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
use std::str::FromStr;
@@ -158,7 +158,7 @@ impl Component {
}
}
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
fn main() -> Result<()> {
let risingwave = || {
command!(BINARY_NAME)
diff --git a/src/common/proc_macro/src/config.rs b/src/common/proc_macro/src/config.rs
index 285834eb123cf..6e369fbad33eb 100644
--- a/src/common/proc_macro/src/config.rs
+++ b/src/common/proc_macro/src/config.rs
@@ -41,7 +41,7 @@ fn type_is_option(ty: &syn::Type) -> bool {
false
}
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
pub fn produce_override_config(input: DeriveInput) -> TokenStream {
let syn::Data::Struct(syn::DataStruct { fields, .. }) = input.data else {
abort!(input, "Only struct is supported");
diff --git a/src/common/proc_macro/src/lib.rs b/src/common/proc_macro/src/lib.rs
index 060ee1950624e..a11e407c6c053 100644
--- a/src/common/proc_macro/src/lib.rs
+++ b/src/common/proc_macro/src/lib.rs
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
use estimate_size::{
add_trait_bounds, extract_ignored_generics_list, has_nested_flag_attribute_list,
@@ -52,7 +52,7 @@ mod estimate_size;
/// }
/// }
/// ```
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
#[proc_macro_derive(OverrideConfig, attributes(override_opts))]
#[proc_macro_error]
pub fn override_config(input: TokenStream) -> TokenStream {
diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs
index 2a3575d8dae78..fbcd3854fa572 100644
--- a/src/common/src/lib.rs
+++ b/src/common/src/lib.rs
@@ -24,12 +24,11 @@
#![feature(trusted_len)]
#![feature(allocator_api)]
#![feature(lint_reasons)]
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(map_try_insert)]
#![feature(lazy_cell)]
#![feature(error_generic_member_access)]
#![feature(let_chains)]
-#![feature(return_position_impl_trait_in_trait)]
#![feature(portable_simd)]
#![feature(array_chunks)]
#![feature(inline_const_pat)]
@@ -43,7 +42,6 @@
#![feature(result_option_inspect)]
#![feature(map_entry_replace)]
#![feature(negative_impls)]
-#![feature(async_fn_in_trait)]
#![feature(bound_map)]
#![feature(array_methods)]
diff --git a/src/common/src/types/ordered.rs b/src/common/src/types/ordered.rs
index 75b07e529d7b9..68cd6329287e2 100644
--- a/src/common/src/types/ordered.rs
+++ b/src/common/src/types/ordered.rs
@@ -138,7 +138,7 @@ impl From for DefaultOrdered {
}
}
-#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)]
+#[allow(clippy::non_canonical_partial_ord_impl)]
impl PartialOrd for DefaultOrdered {
fn partial_cmp(&self, other: &Self) -> Option {
self.0.default_partial_cmp(other.as_inner())
diff --git a/src/common/src/util/epoch.rs b/src/common/src/util/epoch.rs
index 86ed158c2e206..4d57c97b054b3 100644
--- a/src/common/src/util/epoch.rs
+++ b/src/common/src/util/epoch.rs
@@ -73,6 +73,10 @@ impl Epoch {
Epoch(time << EPOCH_PHYSICAL_SHIFT_BITS)
}
+ pub fn from_unix_millis(mi: u64) -> Self {
+ Epoch((mi - UNIX_RISINGWAVE_DATE_SEC * 1000) << EPOCH_PHYSICAL_SHIFT_BITS)
+ }
+
pub fn physical_now() -> u64 {
UNIX_RISINGWAVE_DATE_EPOCH
.elapsed()
diff --git a/src/common/src/util/future_utils.rs b/src/common/src/util/future_utils.rs
index 75c38488457ac..20844d8cd15d4 100644
--- a/src/common/src/util/future_utils.rs
+++ b/src/common/src/util/future_utils.rs
@@ -13,9 +13,11 @@
// limitations under the License.
use std::future::pending;
+use std::pin::{pin, Pin};
-use futures::future::Either;
-use futures::{Future, FutureExt, Stream};
+use futures::future::{select, Either};
+use futures::stream::Peekable;
+use futures::{Future, FutureExt, Stream, StreamExt};
/// Convert a list of streams into a [`Stream`] of results from the streams.
pub fn select_all(
@@ -43,3 +45,34 @@ pub fn drop_either_future(
Either::Right((right, _)) => Either::Right(right),
}
}
+
+/// Await on a future while monitoring on a peekable stream that may return error.
+/// The peekable stream is polled at a higher priority than the future.
+///
+/// When the peekable stream returns with a error and end of stream, the future will
+/// return the error immediately. Otherwise, it will keep polling the given future.
+///
+/// Return:
+/// - Ok(output) as the output of the given future.
+/// - Err(None) to indicate that the stream has reached the end.
+/// - Err(e) to indicate that the stream returns an error.
+pub async fn await_future_with_monitor_error_stream(
+ peek_stream: &mut Peekable> + Unpin>,
+ future: F,
+) -> Result> {
+ // Poll the response stream to early see the error
+ match select(pin!(Pin::new(&mut *peek_stream).peek()), pin!(future)).await {
+ Either::Left((response_result, send_future)) => match response_result {
+ None => Err(None),
+ Some(Err(_)) => {
+ let err = match peek_stream.next().now_or_never() {
+ Some(Some(Err(err))) => err,
+ _ => unreachable!("peek has output, peek output not None, have check err"),
+ };
+ Err(Some(err))
+ }
+ Some(Ok(_)) => Ok(send_future.await),
+ },
+ Either::Right((output, _)) => Ok(output),
+ }
+}
diff --git a/src/common/src/util/mod.rs b/src/common/src/util/mod.rs
index f4140b558faa7..e1f85263e1415 100644
--- a/src/common/src/util/mod.rs
+++ b/src/common/src/util/mod.rs
@@ -45,7 +45,9 @@ pub mod tracing;
pub mod value_encoding;
pub mod worker_util;
-pub use future_utils::{drop_either_future, pending_on_none, select_all};
+pub use future_utils::{
+ await_future_with_monitor_error_stream, drop_either_future, pending_on_none, select_all,
+};
#[macro_use]
pub mod match_util;
diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs
index 65bf59eedf19e..fc5ae9ff19854 100644
--- a/src/compute/src/lib.rs
+++ b/src/compute/src/lib.rs
@@ -13,14 +13,14 @@
// limitations under the License.
#![feature(trait_alias)]
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(type_alias_impl_trait)]
#![feature(let_chains)]
#![feature(result_option_inspect)]
#![feature(lint_reasons)]
#![feature(impl_trait_in_assoc_type)]
#![feature(lazy_cell)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
#[macro_use]
extern crate tracing;
diff --git a/src/compute/src/rpc/service/exchange_service.rs b/src/compute/src/rpc/service/exchange_service.rs
index b59cc39587c2f..6225cef2a7e30 100644
--- a/src/compute/src/rpc/service/exchange_service.rs
+++ b/src/compute/src/rpc/service/exchange_service.rs
@@ -49,7 +49,7 @@ impl ExchangeService for ExchangeServiceImpl {
type GetDataStream = BatchDataStream;
type GetStreamStream = StreamDataStream;
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn get_data(
&self,
request: Request,
diff --git a/src/compute/src/rpc/service/monitor_service.rs b/src/compute/src/rpc/service/monitor_service.rs
index 97a0b80773791..8fc24664ec016 100644
--- a/src/compute/src/rpc/service/monitor_service.rs
+++ b/src/compute/src/rpc/service/monitor_service.rs
@@ -53,7 +53,7 @@ impl MonitorServiceImpl {
#[async_trait::async_trait]
impl MonitorService for MonitorServiceImpl {
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn stack_trace(
&self,
request: Request,
@@ -85,7 +85,7 @@ impl MonitorService for MonitorServiceImpl {
}))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn profiling(
&self,
request: Request,
@@ -115,7 +115,7 @@ impl MonitorService for MonitorServiceImpl {
}
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn heap_profiling(
&self,
request: Request,
@@ -166,7 +166,7 @@ impl MonitorService for MonitorServiceImpl {
}
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn list_heap_profiling(
&self,
_request: Request,
@@ -206,7 +206,7 @@ impl MonitorService for MonitorServiceImpl {
}))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn analyze_heap(
&self,
request: Request,
diff --git a/src/compute/src/rpc/service/stream_service.rs b/src/compute/src/rpc/service/stream_service.rs
index 525364b60dc1c..1c1448b3d1e45 100644
--- a/src/compute/src/rpc/service/stream_service.rs
+++ b/src/compute/src/rpc/service/stream_service.rs
@@ -45,7 +45,7 @@ impl StreamServiceImpl {
#[async_trait::async_trait]
impl StreamService for StreamServiceImpl {
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn update_actors(
&self,
request: Request,
@@ -61,7 +61,7 @@ impl StreamService for StreamServiceImpl {
}
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn build_actors(
&self,
request: Request,
@@ -85,7 +85,7 @@ impl StreamService for StreamServiceImpl {
}
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn broadcast_actor_info_table(
&self,
request: Request,
@@ -104,7 +104,7 @@ impl StreamService for StreamServiceImpl {
}
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn drop_actors(
&self,
request: Request,
@@ -118,7 +118,7 @@ impl StreamService for StreamServiceImpl {
}))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn force_stop_actors(
&self,
request: Request,
@@ -132,7 +132,7 @@ impl StreamService for StreamServiceImpl {
}))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn inject_barrier(
&self,
request: Request,
@@ -173,7 +173,7 @@ impl StreamService for StreamServiceImpl {
}))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn barrier_complete(
&self,
request: Request,
@@ -243,7 +243,7 @@ impl StreamService for StreamServiceImpl {
}))
}
- #[cfg_attr(coverage, no_coverage)]
+ #[cfg_attr(coverage, coverage(off))]
async fn wait_epoch_commit(
&self,
request: Request,
diff --git a/src/compute/tests/cdc_tests.rs b/src/compute/tests/cdc_tests.rs
index b3e39ece95002..6a50b8410bbd4 100644
--- a/src/compute/tests/cdc_tests.rs
+++ b/src/compute/tests/cdc_tests.rs
@@ -13,7 +13,7 @@
// limitations under the License.
#![feature(let_chains)]
-#![feature(generators)]
+#![feature(coroutines)]
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
diff --git a/src/compute/tests/integration_tests.rs b/src/compute/tests/integration_tests.rs
index a43ae2e5762da..6d7e93365c275 100644
--- a/src/compute/tests/integration_tests.rs
+++ b/src/compute/tests/integration_tests.rs
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(proc_macro_hygiene, stmt_expr_attributes)]
use std::sync::atomic::AtomicU64;
diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
index 4886b1b52fcc5..87d2a0bdef689 100644
--- a/src/connector/Cargo.toml
+++ b/src/connector/Cargo.toml
@@ -113,7 +113,7 @@ strum = "0.25"
strum_macros = "0.25"
tempfile = "3"
thiserror = "1"
-time = "0.3.28"
+time = "0.3.30"
tokio = { version = "0.2", package = "madsim-tokio", features = [
"rt",
"rt-multi-thread",
diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs
index 8ccf62486ce65..aa613b4043c23 100644
--- a/src/connector/src/lib.rs
+++ b/src/connector/src/lib.rs
@@ -14,7 +14,7 @@
#![expect(dead_code)]
#![allow(clippy::derive_partial_eq_without_eq)]
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(proc_macro_hygiene)]
#![feature(stmt_expr_attributes)]
#![feature(box_patterns)]
@@ -25,11 +25,9 @@
#![feature(let_chains)]
#![feature(box_into_inner)]
#![feature(type_alias_impl_trait)]
-#![feature(return_position_impl_trait_in_trait)]
-#![feature(async_fn_in_trait)]
#![feature(associated_type_defaults)]
#![feature(impl_trait_in_assoc_type)]
-#![feature(iter_from_generator)]
+#![feature(iter_from_coroutine)]
#![feature(if_let_guard)]
#![feature(iterator_try_collect)]
diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs
index c18dd7d10a92c..ca3a09e7f2eda 100644
--- a/src/connector/src/sink/catalog/mod.rs
+++ b/src/connector/src/sink/catalog/mod.rs
@@ -132,6 +132,7 @@ pub enum SinkEncode {
Json,
Protobuf,
Avro,
+ Template,
}
impl SinkFormatDesc {
@@ -177,6 +178,7 @@ impl SinkFormatDesc {
SinkEncode::Json => E::Json,
SinkEncode::Protobuf => E::Protobuf,
SinkEncode::Avro => E::Avro,
+ SinkEncode::Template => E::Template,
};
let options = self
.options
@@ -212,6 +214,7 @@ impl TryFrom for SinkFormatDesc {
let encode = match value.encode() {
E::Json => SinkEncode::Json,
E::Protobuf => SinkEncode::Protobuf,
+ E::Template => SinkEncode::Template,
E::Avro => SinkEncode::Avro,
e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => {
return Err(SinkError::Config(anyhow!(
diff --git a/src/connector/src/sink/encoder/template.rs b/src/connector/src/sink/encoder/template.rs
index 85f085989b6c4..97d8271f9e83a 100644
--- a/src/connector/src/sink/encoder/template.rs
+++ b/src/connector/src/sink/encoder/template.rs
@@ -12,11 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+use std::collections::HashSet;
+
+use regex::Regex;
use risingwave_common::catalog::Schema;
use risingwave_common::row::Row;
use risingwave_common::types::ToText;
use super::{Result, RowEncoder};
+use crate::sink::SinkError;
/// Encode a row according to a specified string template `user_id:{user_id}`
pub struct TemplateEncoder {
@@ -34,6 +38,24 @@ impl TemplateEncoder {
template,
}
}
+
+ pub fn check_string_format(format: &str, set: &HashSet) -> Result<()> {
+ // We will check if the string inside {} corresponds to a column name in rw.
+ // In other words, the content within {} should exclusively consist of column names from rw,
+ // which means '{{column_name}}' or '{{column_name1},{column_name2}}' would be incorrect.
+ let re = Regex::new(r"\{([^}]*)\}").unwrap();
+ if !re.is_match(format) {
+ return Err(SinkError::Redis(
+ "Can't find {} in key_format or value_format".to_string(),
+ ));
+ }
+ for capture in re.captures_iter(format) {
+ if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){
+ return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str())))
+ }
+ }
+ Ok(())
+ }
}
impl RowEncoder for TemplateEncoder {
diff --git a/src/connector/src/sink/formatter/append_only.rs b/src/connector/src/sink/formatter/append_only.rs
index 523a52dab91bb..f0efcc21d9009 100644
--- a/src/connector/src/sink/formatter/append_only.rs
+++ b/src/connector/src/sink/formatter/append_only.rs
@@ -40,7 +40,7 @@ impl SinkFormatter for AppendOnlyFormatter impl Iterator- , Option
)>> {
- std::iter::from_generator(|| {
+ std::iter::from_coroutine(|| {
for (op, row) in chunk.rows() {
if op != Op::Insert {
continue;
diff --git a/src/connector/src/sink/formatter/debezium_json.rs b/src/connector/src/sink/formatter/debezium_json.rs
index 637aa23f06410..ce98daab88756 100644
--- a/src/connector/src/sink/formatter/debezium_json.rs
+++ b/src/connector/src/sink/formatter/debezium_json.rs
@@ -85,7 +85,7 @@ impl SinkFormatter for DebeziumJsonFormatter {
&self,
chunk: &StreamChunk,
) -> impl Iterator- , Option
)>> {
- std::iter::from_generator(|| {
+ std::iter::from_coroutine(|| {
let DebeziumJsonFormatter {
schema,
pk_indices,
diff --git a/src/connector/src/sink/formatter/mod.rs b/src/connector/src/sink/formatter/mod.rs
index a7463f7e3b306..17cb708292890 100644
--- a/src/connector/src/sink/formatter/mod.rs
+++ b/src/connector/src/sink/formatter/mod.rs
@@ -29,6 +29,7 @@ pub use upsert::UpsertFormatter;
use super::catalog::{SinkEncode, SinkFormat, SinkFormatDesc};
use super::encoder::template::TemplateEncoder;
use super::encoder::KafkaConnectParams;
+use super::redis::{KEY_FORMAT, VALUE_FORMAT};
use crate::sink::encoder::{JsonEncoder, ProtoEncoder, TimestampHandlingMode};
/// Transforms a `StreamChunk` into a sequence of key-value pairs according a specific format,
@@ -92,7 +93,7 @@ impl SinkFormatterImpl {
let key_encoder = (!pk_indices.is_empty()).then(|| {
JsonEncoder::new(
schema.clone(),
- Some(pk_indices),
+ Some(pk_indices.clone()),
TimestampHandlingMode::Milli,
)
});
@@ -115,6 +116,28 @@ impl SinkFormatterImpl {
Ok(SinkFormatterImpl::AppendOnlyProto(formatter))
}
SinkEncode::Avro => err_unsupported(),
+ SinkEncode::Template => {
+ let key_format = format_desc.options.get(KEY_FORMAT).ok_or_else(|| {
+ SinkError::Config(anyhow!(
+ "Cannot find 'key_format',please set it or use JSON"
+ ))
+ })?;
+ let value_format =
+ format_desc.options.get(VALUE_FORMAT).ok_or_else(|| {
+ SinkError::Config(anyhow!(
+ "Cannot find 'redis_value_format',please set it or use JSON"
+ ))
+ })?;
+ let key_encoder = TemplateEncoder::new(
+ schema.clone(),
+ Some(pk_indices),
+ key_format.clone(),
+ );
+ let val_encoder = TemplateEncoder::new(schema, None, value_format.clone());
+ Ok(SinkFormatterImpl::AppendOnlyTemplate(
+ AppendOnlyFormatter::new(Some(key_encoder), val_encoder),
+ ))
+ }
}
}
SinkFormat::Debezium => {
@@ -131,85 +154,66 @@ impl SinkFormatterImpl {
)))
}
SinkFormat::Upsert => {
- if format_desc.encode != SinkEncode::Json {
- return err_unsupported();
- }
+ match format_desc.encode {
+ SinkEncode::Json => {
+ let mut key_encoder = JsonEncoder::new(
+ schema.clone(),
+ Some(pk_indices),
+ TimestampHandlingMode::Milli,
+ );
+ let mut val_encoder =
+ JsonEncoder::new(schema, None, TimestampHandlingMode::Milli);
- let mut key_encoder = JsonEncoder::new(
- schema.clone(),
- Some(pk_indices),
- TimestampHandlingMode::Milli,
- );
- let mut val_encoder = JsonEncoder::new(schema, None, TimestampHandlingMode::Milli);
-
- if let Some(s) = format_desc.options.get("schemas.enable") {
- match s.to_lowercase().parse::() {
- Ok(true) => {
- let kafka_connect = KafkaConnectParams {
- schema_name: format!("{}.{}", db_name, sink_from_name),
- };
- key_encoder = key_encoder.with_kafka_connect(kafka_connect.clone());
- val_encoder = val_encoder.with_kafka_connect(kafka_connect);
- }
- Ok(false) => {}
- _ => {
- return Err(SinkError::Config(anyhow!(
- "schemas.enable is expected to be `true` or `false`, got {}",
- s
- )));
- }
+ if let Some(s) = format_desc.options.get("schemas.enable") {
+ match s.to_lowercase().parse::() {
+ Ok(true) => {
+ let kafka_connect = KafkaConnectParams {
+ schema_name: format!("{}.{}", db_name, sink_from_name),
+ };
+ key_encoder =
+ key_encoder.with_kafka_connect(kafka_connect.clone());
+ val_encoder = val_encoder.with_kafka_connect(kafka_connect);
+ }
+ Ok(false) => {}
+ _ => {
+ return Err(SinkError::Config(anyhow!(
+ "schemas.enable is expected to be `true` or `false`, got {}",
+ s
+ )));
+ }
+ }
+ };
+
+ // Initialize the upsert_stream
+ let formatter = UpsertFormatter::new(key_encoder, val_encoder);
+ Ok(SinkFormatterImpl::UpsertJson(formatter))
}
- };
-
- // Initialize the upsert_stream
- let formatter = UpsertFormatter::new(key_encoder, val_encoder);
- Ok(SinkFormatterImpl::UpsertJson(formatter))
- }
- }
- }
-
- pub fn new_with_redis(
- schema: Schema,
- pk_indices: Vec,
- is_append_only: bool,
- key_format: Option,
- value_format: Option,
- ) -> Result {
- match (key_format, value_format) {
- (Some(k), Some(v)) => {
- let key_encoder = TemplateEncoder::new(
- schema.clone(),
- Some(pk_indices),
- k,
- );
- let val_encoder =
- TemplateEncoder::new(schema, None, v);
- if is_append_only {
- Ok(SinkFormatterImpl::AppendOnlyTemplate(AppendOnlyFormatter::new(Some(key_encoder), val_encoder)))
- } else {
- Ok(SinkFormatterImpl::UpsertTemplate(UpsertFormatter::new(key_encoder, val_encoder)))
- }
- }
- (None, None) => {
- let key_encoder = JsonEncoder::new(
- schema.clone(),
- Some(pk_indices),
- TimestampHandlingMode::Milli,
- );
- let val_encoder = JsonEncoder::new(
- schema,
- None,
- TimestampHandlingMode::Milli,
- );
- if is_append_only {
- Ok(SinkFormatterImpl::AppendOnlyJson(AppendOnlyFormatter::new(Some(key_encoder), val_encoder)))
- } else {
- Ok(SinkFormatterImpl::UpsertJson(UpsertFormatter::new(key_encoder, val_encoder)))
+ SinkEncode::Template => {
+ let key_format = format_desc.options.get(KEY_FORMAT).ok_or_else(|| {
+ SinkError::Config(anyhow!(
+ "Cannot find 'key_format',please set it or use JSON"
+ ))
+ })?;
+ let value_format =
+ format_desc.options.get(VALUE_FORMAT).ok_or_else(|| {
+ SinkError::Config(anyhow!(
+ "Cannot find 'redis_value_format',please set it or use JSON"
+ ))
+ })?;
+ let key_encoder = TemplateEncoder::new(
+ schema.clone(),
+ Some(pk_indices),
+ key_format.clone(),
+ );
+ let val_encoder = TemplateEncoder::new(schema, None, value_format.clone());
+ Ok(SinkFormatterImpl::UpsertTemplate(UpsertFormatter::new(
+ key_encoder,
+ val_encoder,
+ )))
+ }
+ _ => err_unsupported(),
}
}
- _ => {
- Err(SinkError::Encode("Please provide template formats for both key and value, or choose the JSON format.".to_string()))
- }
}
}
}
diff --git a/src/connector/src/sink/formatter/upsert.rs b/src/connector/src/sink/formatter/upsert.rs
index 6ef2b5f2ca333..af8e70ff92850 100644
--- a/src/connector/src/sink/formatter/upsert.rs
+++ b/src/connector/src/sink/formatter/upsert.rs
@@ -40,7 +40,7 @@ impl SinkFormatter for UpsertFormatter {
&self,
chunk: &StreamChunk,
) -> impl Iterator- , Option
)>> {
- std::iter::from_generator(|| {
+ std::iter::from_coroutine(|| {
for (op, row) in chunk.rows() {
let event_key_object = Some(tri!(self.key_encoder.encode(row)));
diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs
index 6feb7b56a9a8b..af3ec3b981620 100644
--- a/src/connector/src/sink/redis.rs
+++ b/src/connector/src/sink/redis.rs
@@ -18,15 +18,16 @@ use anyhow::anyhow;
use async_trait::async_trait;
use redis::aio::Connection;
use redis::{Client as RedisClient, Pipeline};
-use regex::Regex;
use risingwave_common::array::StreamChunk;
use risingwave_common::catalog::Schema;
use serde_derive::{Deserialize, Serialize};
use serde_with::serde_as;
+use super::catalog::SinkFormatDesc;
+use super::encoder::template::TemplateEncoder;
use super::formatter::SinkFormatterImpl;
use super::writer::FormattedSink;
-use super::{SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
+use super::{SinkError, SinkParam};
use crate::dispatch_sink_formatter_impl;
use crate::sink::log_store::DeliveryFutureManagerAddFuture;
use crate::sink::writer::{
@@ -35,15 +36,12 @@ use crate::sink::writer::{
use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkWriterParam};
pub const REDIS_SINK: &str = "redis";
-
+pub const KEY_FORMAT: &str = "key_format";
+pub const VALUE_FORMAT: &str = "value_format";
#[derive(Deserialize, Serialize, Debug, Clone)]
pub struct RedisCommon {
#[serde(rename = "redis.url")]
pub url: String,
- #[serde(rename = "redis.keyformat")]
- pub key_format: Option,
- #[serde(rename = "redis.valueformat")]
- pub value_format: Option,
}
impl RedisCommon {
@@ -57,23 +55,13 @@ impl RedisCommon {
pub struct RedisConfig {
#[serde(flatten)]
pub common: RedisCommon,
-
- pub r#type: String, // accept "append-only" or "upsert"
}
impl RedisConfig {
pub fn from_hashmap(properties: HashMap) -> Result {
let config =
serde_json::from_value::(serde_json::to_value(properties).unwrap())
- .map_err(|e| SinkError::Config(anyhow!(e)))?;
- if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT {
- return Err(SinkError::Config(anyhow!(
- "`{}` must be {}, or {}",
- SINK_TYPE_OPTION,
- SINK_TYPE_APPEND_ONLY,
- SINK_TYPE_UPSERT
- )));
- }
+ .map_err(|e| SinkError::Config(anyhow!("{:?}", e)))?;
Ok(config)
}
}
@@ -82,28 +70,10 @@ impl RedisConfig {
pub struct RedisSink {
config: RedisConfig,
schema: Schema,
- is_append_only: bool,
pk_indices: Vec,
-}
-
-fn check_string_format(format: &Option, set: &HashSet) -> Result<()> {
- if let Some(format) = format {
- // We will check if the string inside {} corresponds to a column name in rw.
- // In other words, the content within {} should exclusively consist of column names from rw,
- // which means '{{column_name}}' or '{{column_name1},{column_name2}}' would be incorrect.
- let re = Regex::new(r"\{([^}]*)\}").unwrap();
- if !re.is_match(format) {
- return Err(SinkError::Redis(
- "Can't find {} in key_format or value_format".to_string(),
- ));
- }
- for capture in re.captures_iter(format) {
- if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){
- return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str())))
- }
- }
- }
- Ok(())
+ format_desc: SinkFormatDesc,
+ db_name: String,
+ sink_from_name: String,
}
#[async_trait]
@@ -120,8 +90,12 @@ impl TryFrom for RedisSink {
Ok(Self {
config,
schema: param.schema(),
- is_append_only: param.sink_type.is_append_only(),
pk_indices: param.downstream_pk,
+ format_desc: param
+ .format_desc
+ .ok_or_else(|| SinkError::Config(anyhow!("missing FORMAT ... ENCODE ...")))?,
+ db_name: param.db_name,
+ sink_from_name: param.sink_from_name,
})
}
}
@@ -137,7 +111,9 @@ impl Sink for RedisSink {
self.config.clone(),
self.schema.clone(),
self.pk_indices.clone(),
- self.is_append_only,
+ &self.format_desc,
+ self.db_name.clone(),
+ self.sink_from_name.clone(),
)
.await?
.into_log_sinker(usize::MAX))
@@ -160,8 +136,23 @@ impl Sink for RedisSink {
.filter(|(k, _)| self.pk_indices.contains(k))
.map(|(_, v)| v.name.clone())
.collect();
- check_string_format(&self.config.common.key_format, &pk_set)?;
- check_string_format(&self.config.common.value_format, &all_set)?;
+ if matches!(
+ self.format_desc.encode,
+ super::catalog::SinkEncode::Template
+ ) {
+ let key_format = self.format_desc.options.get(KEY_FORMAT).ok_or_else(|| {
+ SinkError::Config(anyhow!(
+ "Cannot find 'key_format',please set it or use JSON"
+ ))
+ })?;
+ let value_format = self.format_desc.options.get(VALUE_FORMAT).ok_or_else(|| {
+ SinkError::Config(anyhow!(
+ "Cannot find 'value_format',please set it or use JSON"
+ ))
+ })?;
+ TemplateEncoder::check_string_format(key_format, &pk_set)?;
+ TemplateEncoder::check_string_format(value_format, &all_set)?;
+ }
Ok(())
}
}
@@ -169,7 +160,6 @@ impl Sink for RedisSink {
pub struct RedisSinkWriter {
epoch: u64,
schema: Schema,
- is_append_only: bool,
pk_indices: Vec,
formatter: SinkFormatterImpl,
payload_writer: RedisSinkPayloadWriter,
@@ -223,21 +213,23 @@ impl RedisSinkWriter {
config: RedisConfig,
schema: Schema,
pk_indices: Vec,
- is_append_only: bool,
+ format_desc: &SinkFormatDesc,
+ db_name: String,
+ sink_from_name: String,
) -> Result {
let payload_writer = RedisSinkPayloadWriter::new(config.clone()).await?;
- let formatter = SinkFormatterImpl::new_with_redis(
+ let formatter = SinkFormatterImpl::new(
+ format_desc,
schema.clone(),
pk_indices.clone(),
- is_append_only,
- config.common.key_format,
- config.common.value_format,
- )?;
+ db_name,
+ sink_from_name,
+ )
+ .await?;
Ok(Self {
schema,
pk_indices,
- is_append_only,
epoch: 0,
formatter,
payload_writer,
@@ -245,24 +237,22 @@ impl RedisSinkWriter {
}
#[cfg(test)]
- pub fn mock(
+ pub async fn mock(
schema: Schema,
pk_indices: Vec,
- is_append_only: bool,
- key_format: Option,
- value_format: Option,
+ format_desc: &SinkFormatDesc,
) -> Result {
- let formatter = SinkFormatterImpl::new_with_redis(
+ let formatter = SinkFormatterImpl::new(
+ format_desc,
schema.clone(),
pk_indices.clone(),
- is_append_only,
- key_format,
- value_format,
- )?;
+ "d1".to_string(),
+ "t1".to_string(),
+ )
+ .await?;
Ok(Self {
schema,
pk_indices,
- is_append_only,
epoch: 0,
formatter,
payload_writer: RedisSinkPayloadWriter::mock(),
@@ -284,6 +274,8 @@ impl AsyncTruncateSinkWriter for RedisSinkWriter {
#[cfg(test)]
mod test {
+ use std::collections::BTreeMap;
+
use rdkafka::message::FromBytes;
use risingwave_common::array::{Array, I32Array, Op, StreamChunk, Utf8Array};
use risingwave_common::catalog::{Field, Schema};
@@ -291,6 +283,7 @@ mod test {
use risingwave_common::util::iter_util::ZipEqDebug;
use super::*;
+ use crate::sink::catalog::{SinkEncode, SinkFormat};
use crate::sink::log_store::DeliveryFutureManager;
#[tokio::test]
@@ -310,8 +303,15 @@ mod test {
},
]);
- let mut redis_sink_writer =
- RedisSinkWriter::mock(schema, vec![0], true, None, None).unwrap();
+ let format_desc = SinkFormatDesc {
+ format: SinkFormat::AppendOnly,
+ encode: SinkEncode::Json,
+ options: BTreeMap::default(),
+ };
+
+ let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc)
+ .await
+ .unwrap();
let chunk_a = StreamChunk::new(
vec![Op::Insert, Op::Insert, Op::Insert],
@@ -364,14 +364,21 @@ mod test {
},
]);
- let mut redis_sink_writer = RedisSinkWriter::mock(
- schema,
- vec![0],
- true,
- Some("key-{id}".to_string()),
- Some("values:{id:{id},name:{name}}".to_string()),
- )
- .unwrap();
+ let mut btree_map = BTreeMap::default();
+ btree_map.insert(KEY_FORMAT.to_string(), "key-{id}".to_string());
+ btree_map.insert(
+ VALUE_FORMAT.to_string(),
+ "values:{id:{id},name:{name}}".to_string(),
+ );
+ let format_desc = SinkFormatDesc {
+ format: SinkFormat::AppendOnly,
+ encode: SinkEncode::Template,
+ options: btree_map,
+ };
+
+ let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc)
+ .await
+ .unwrap();
let mut future_manager = DeliveryFutureManager::new(0);
diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs
index ad182e734a33a..3c52cb720dbd4 100644
--- a/src/connector/src/sink/remote.rs
+++ b/src/connector/src/sink/remote.rs
@@ -13,17 +13,23 @@
// limitations under the License.
use std::collections::HashMap;
+use std::fmt::Formatter;
+use std::future::Future;
use std::marker::PhantomData;
use std::ops::Deref;
+use std::time::Instant;
use anyhow::anyhow;
use async_trait::async_trait;
+use futures::stream::Peekable;
+use futures::{StreamExt, TryFutureExt, TryStreamExt};
use itertools::Itertools;
use jni::objects::{JByteArray, JValue, JValueOwned};
use prost::Message;
use risingwave_common::array::StreamChunk;
use risingwave_common::error::anyhow_error;
use risingwave_common::types::DataType;
+use risingwave_common::util::await_future_with_monitor_error_stream;
use risingwave_jni_core::jvm_runtime::JVM;
use risingwave_pb::connector_service::sink_coordinator_stream_request::{
CommitMetadata, StartCoordinator,
@@ -43,15 +49,17 @@ use risingwave_pb::connector_service::{
};
use tokio::sync::mpsc;
use tokio::sync::mpsc::{Receiver, Sender};
+use tokio_stream::wrappers::ReceiverStream;
use tracing::warn;
use super::encoder::{JsonEncoder, RowEncoder};
use crate::sink::coordinate::CoordinatedSinkWriter;
use crate::sink::encoder::TimestampHandlingMode;
+use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset};
use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt};
use crate::sink::{
- DummySinkCommitCoordinator, Result, Sink, SinkCommitCoordinator, SinkError, SinkMetrics,
- SinkParam, SinkWriterParam,
+ DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkCommitCoordinator, SinkError,
+ SinkMetrics, SinkParam, SinkWriterParam,
};
use crate::ConnectorParams;
@@ -101,18 +109,12 @@ impl TryFrom for RemoteSink {
impl Sink for RemoteSink {
type Coordinator = DummySinkCommitCoordinator;
- type LogSinker = LogSinkerOf>;
+ type LogSinker = RemoteLogSinker;
const SINK_NAME: &'static str = R::SINK_NAME;
async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result {
- Ok(RemoteSinkWriter::new(
- self.param.clone(),
- writer_param.connector_params,
- writer_param.sink_metrics.clone(),
- )
- .await?
- .into_log_sinker(writer_param.sink_metrics))
+ RemoteLogSinker::new(self.param.clone(), writer_param).await
}
async fn validate(&self) -> Result<()> {
@@ -192,6 +194,140 @@ impl Sink for RemoteSink {
}
}
+pub struct RemoteLogSinker {
+ writer: RemoteSinkWriter,
+ sink_metrics: SinkMetrics,
+}
+
+impl RemoteLogSinker {
+ async fn new(sink_param: SinkParam, writer_param: SinkWriterParam) -> Result {
+ let writer = RemoteSinkWriter::new(
+ sink_param,
+ writer_param.connector_params,
+ writer_param.sink_metrics.clone(),
+ )
+ .await?;
+ let sink_metrics = writer_param.sink_metrics;
+ Ok(RemoteLogSinker {
+ writer,
+ sink_metrics,
+ })
+ }
+}
+
+/// Await the given future while monitoring on error of the receiver stream.
+async fn await_future_with_monitor_receiver_err>>(
+ receiver: &mut SinkWriterStreamJniReceiver,
+ future: F,
+) -> Result {
+ match await_future_with_monitor_error_stream(&mut receiver.response_stream, future).await {
+ Ok(result) => result,
+ Err(None) => Err(SinkError::Remote(anyhow!("end of remote receiver stream"))),
+ Err(Some(err)) => Err(SinkError::Internal(err)),
+ }
+}
+
+#[async_trait]
+impl LogSinker for RemoteLogSinker {
+ async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> {
+ // Note: this is a total copy of the implementation of LogSinkerOf,
+ // except that we monitor the future of `log_reader.next_item` with await_future_with_monitor_receiver_err
+ // to monitor the error in the response stream.
+
+ let mut sink_writer = self.writer;
+ let sink_metrics = self.sink_metrics;
+ #[derive(Debug)]
+ enum LogConsumerState {
+ /// Mark that the log consumer is not initialized yet
+ Uninitialized,
+
+ /// Mark that a new epoch has begun.
+ EpochBegun { curr_epoch: u64 },
+
+ /// Mark that the consumer has just received a barrier
+ BarrierReceived { prev_epoch: u64 },
+ }
+
+ let mut state = LogConsumerState::Uninitialized;
+
+ log_reader.init().await?;
+
+ loop {
+ let (epoch, item): (u64, LogStoreReadItem) = await_future_with_monitor_receiver_err(
+ &mut sink_writer.stream_handle.response_rx,
+ log_reader.next_item().map_err(SinkError::Internal),
+ )
+ .await?;
+ if let LogStoreReadItem::UpdateVnodeBitmap(_) = &item {
+ match &state {
+ LogConsumerState::BarrierReceived { .. } => {}
+ _ => unreachable!(
+ "update vnode bitmap can be accepted only right after \
+ barrier, but current state is {:?}",
+ state
+ ),
+ }
+ }
+ // begin_epoch when not previously began
+ state = match state {
+ LogConsumerState::Uninitialized => {
+ sink_writer.begin_epoch(epoch).await?;
+ LogConsumerState::EpochBegun { curr_epoch: epoch }
+ }
+ LogConsumerState::EpochBegun { curr_epoch } => {
+ assert!(
+ epoch >= curr_epoch,
+ "new epoch {} should not be below the current epoch {}",
+ epoch,
+ curr_epoch
+ );
+ LogConsumerState::EpochBegun { curr_epoch: epoch }
+ }
+ LogConsumerState::BarrierReceived { prev_epoch } => {
+ assert!(
+ epoch > prev_epoch,
+ "new epoch {} should be greater than prev epoch {}",
+ epoch,
+ prev_epoch
+ );
+ sink_writer.begin_epoch(epoch).await?;
+ LogConsumerState::EpochBegun { curr_epoch: epoch }
+ }
+ };
+ match item {
+ LogStoreReadItem::StreamChunk { chunk, .. } => {
+ if let Err(e) = sink_writer.write_batch(chunk).await {
+ sink_writer.abort().await?;
+ return Err(e);
+ }
+ }
+ LogStoreReadItem::Barrier { is_checkpoint } => {
+ let prev_epoch = match state {
+ LogConsumerState::EpochBegun { curr_epoch } => curr_epoch,
+ _ => unreachable!("epoch must have begun before handling barrier"),
+ };
+ if is_checkpoint {
+ let start_time = Instant::now();
+ sink_writer.barrier(true).await?;
+ sink_metrics
+ .sink_commit_duration_metrics
+ .observe(start_time.elapsed().as_millis() as f64);
+ log_reader
+ .truncate(TruncateOffset::Barrier { epoch })
+ .await?;
+ } else {
+ sink_writer.barrier(false).await?;
+ }
+ state = LogConsumerState::BarrierReceived { prev_epoch }
+ }
+ LogStoreReadItem::UpdateVnodeBitmap(vnode_bitmap) => {
+ sink_writer.update_vnode_bitmap(vnode_bitmap).await?;
+ }
+ }
+ }
+ }
+}
+
#[derive(Debug)]
pub struct CoordinatedRemoteSink(pub RemoteSink);
@@ -286,14 +422,11 @@ impl SinkCoordinatorStreamJniHandle {
}
}
-const DEFAULT_CHANNEL_SIZE: usize = 16;
-#[derive(Debug)]
-pub struct SinkWriterStreamJniHandle {
+struct SinkWriterStreamJniSender {
request_tx: Sender,
- response_rx: Receiver,
}
-impl SinkWriterStreamJniHandle {
+impl SinkWriterStreamJniSender {
pub async fn start_epoch(&mut self, epoch: u64) -> Result<()> {
self.request_tx
.send(SinkWriterStreamRequest {
@@ -316,33 +449,29 @@ impl SinkWriterStreamJniHandle {
.map_err(|err| SinkError::Internal(err.into()))
}
- pub async fn barrier(&mut self, epoch: u64) -> Result<()> {
+ pub async fn barrier(&mut self, epoch: u64, is_checkpoint: bool) -> Result<()> {
self.request_tx
.send(SinkWriterStreamRequest {
request: Some(SinkRequest::Barrier(Barrier {
epoch,
- is_checkpoint: false,
+ is_checkpoint,
})),
})
.await
.map_err(|err| SinkError::Internal(err.into()))
}
+}
- pub async fn commit(&mut self, epoch: u64) -> Result {
- self.request_tx
- .send(SinkWriterStreamRequest {
- request: Some(SinkRequest::Barrier(Barrier {
- epoch,
- is_checkpoint: true,
- })),
- })
- .await
- .map_err(|err| SinkError::Internal(err.into()))?;
+struct SinkWriterStreamJniReceiver {
+ response_stream: Peekable>>,
+}
- match self.response_rx.recv().await {
- Some(SinkWriterStreamResponse {
+impl SinkWriterStreamJniReceiver {
+ async fn next_commit_response(&mut self) -> Result {
+ match self.response_stream.try_next().await {
+ Ok(Some(SinkWriterStreamResponse {
response: Some(sink_writer_stream_response::Response::Commit(rsp)),
- }) => Ok(rsp),
+ })) => Ok(rsp),
msg => Err(SinkError::Internal(anyhow!(
"should get Sync response but get {:?}",
msg
@@ -351,6 +480,53 @@ impl SinkWriterStreamJniHandle {
}
}
+const DEFAULT_CHANNEL_SIZE: usize = 16;
+struct SinkWriterStreamJniHandle {
+ request_tx: SinkWriterStreamJniSender,
+ response_rx: SinkWriterStreamJniReceiver,
+}
+
+impl std::fmt::Debug for SinkWriterStreamJniHandle {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ f.debug_struct("SinkWriterStreamJniHandle").finish()
+ }
+}
+
+impl SinkWriterStreamJniHandle {
+ async fn start_epoch(&mut self, epoch: u64) -> Result<()> {
+ await_future_with_monitor_receiver_err(
+ &mut self.response_rx,
+ self.request_tx.start_epoch(epoch),
+ )
+ .await
+ }
+
+ async fn write_batch(&mut self, epoch: u64, batch_id: u64, payload: Payload) -> Result<()> {
+ await_future_with_monitor_receiver_err(
+ &mut self.response_rx,
+ self.request_tx.write_batch(epoch, batch_id, payload),
+ )
+ .await
+ }
+
+ async fn barrier(&mut self, epoch: u64) -> Result<()> {
+ await_future_with_monitor_receiver_err(
+ &mut self.response_rx,
+ self.request_tx.barrier(epoch, false),
+ )
+ .await
+ }
+
+ async fn commit(&mut self, epoch: u64) -> Result {
+ await_future_with_monitor_receiver_err(
+ &mut self.response_rx,
+ self.request_tx.barrier(epoch, true),
+ )
+ .await?;
+ self.response_rx.next_commit_response().await
+ }
+}
+
pub type RemoteSinkWriter = RemoteSinkWriterInner<(), R>;
pub type CoordinatedRemoteSinkWriter = RemoteSinkWriterInner, R>;
@@ -374,10 +550,7 @@ impl RemoteSinkWriterInner {
let (request_tx, request_rx) = mpsc::channel(DEFAULT_CHANNEL_SIZE);
let (response_tx, response_rx) = mpsc::channel(DEFAULT_CHANNEL_SIZE);
- let mut stream_handle = SinkWriterStreamJniHandle {
- request_tx,
- response_rx,
- };
+ let mut response_stream = ReceiverStream::new(response_rx).peekable();
std::thread::spawn(move || {
let mut env = JVM.get_or_init().unwrap().attach_current_thread().unwrap();
@@ -388,7 +561,10 @@ impl RemoteSinkWriterInner {
"(JJ)V",
&[
JValue::from(&request_rx as *const Receiver as i64),
- JValue::from(&response_tx as *const Sender as i64),
+ JValue::from(
+ &response_tx as *const Sender>
+ as i64,
+ ),
],
);
@@ -410,8 +586,7 @@ impl RemoteSinkWriterInner {
};
// First request
- stream_handle
- .request_tx
+ request_tx
.send(sink_writer_stream_request)
.await
.map_err(|err| {
@@ -423,17 +598,18 @@ impl RemoteSinkWriterInner {
})?;
// First response
- match stream_handle.response_rx.recv().await {
- Some(SinkWriterStreamResponse {
+ match response_stream.try_next().await {
+ Ok(Some(SinkWriterStreamResponse {
response: Some(sink_writer_stream_response::Response::Start(_)),
- }) => {}
- msg => {
+ })) => {}
+ Ok(msg) => {
return Err(SinkError::Internal(anyhow!(
"should get start response for connector `{}` but get {:?}",
R::SINK_NAME,
msg
)));
}
+ Err(e) => return Err(SinkError::Internal(e)),
};
tracing::trace!(
@@ -444,6 +620,11 @@ impl RemoteSinkWriterInner {
let schema = param.schema();
+ let stream_handle = SinkWriterStreamJniHandle {
+ request_tx: SinkWriterStreamJniSender { request_tx },
+ response_rx: SinkWriterStreamJniReceiver { response_stream },
+ };
+
Ok(Self {
properties: param.properties,
epoch: None,
@@ -458,7 +639,7 @@ impl RemoteSinkWriterInner {
#[cfg(test)]
fn for_test(
- response_receiver: Receiver,
+ response_receiver: Receiver>,
request_sender: Sender,
) -> RemoteSinkWriter {
use risingwave_common::catalog::{Field, Schema};
@@ -480,8 +661,12 @@ impl RemoteSinkWriterInner {
]);
let stream_handle = SinkWriterStreamJniHandle {
- request_tx: request_sender,
- response_rx: response_receiver,
+ request_tx: SinkWriterStreamJniSender {
+ request_tx: request_sender,
+ },
+ response_rx: SinkWriterStreamJniReceiver {
+ response_stream: ReceiverStream::new(response_receiver).peekable(),
+ },
};
RemoteSinkWriter {
@@ -828,12 +1013,12 @@ mod test {
// test commit
response_sender
- .send(SinkWriterStreamResponse {
+ .send(Ok(SinkWriterStreamResponse {
response: Some(Response::Commit(CommitResponse {
epoch: 2022,
metadata: None,
})),
- })
+ }))
.await
.expect("test failed: failed to sync epoch");
sink.barrier(true).await.unwrap();
diff --git a/src/expr/core/src/aggregate/def.rs b/src/expr/core/src/aggregate/def.rs
index f71bfd454a415..964ec46c9f9c4 100644
--- a/src/expr/core/src/aggregate/def.rs
+++ b/src/expr/core/src/aggregate/def.rs
@@ -233,6 +233,9 @@ pub enum AggKind {
PercentileDisc,
Mode,
Grouping,
+
+ /// Return last seen one of the input values.
+ InternalLastSeenValue,
}
impl AggKind {
@@ -264,6 +267,7 @@ impl AggKind {
PbType::PercentileDisc => Ok(AggKind::PercentileDisc),
PbType::Mode => Ok(AggKind::Mode),
PbType::Grouping => Ok(AggKind::Grouping),
+ PbType::InternalLastSeenValue => Ok(AggKind::InternalLastSeenValue),
PbType::Unspecified => bail!("Unrecognized agg."),
}
}
@@ -294,8 +298,9 @@ impl AggKind {
Self::VarSamp => PbType::VarSamp,
Self::PercentileCont => PbType::PercentileCont,
Self::PercentileDisc => PbType::PercentileDisc,
- Self::Grouping => PbType::Grouping,
Self::Mode => PbType::Mode,
+ Self::Grouping => PbType::Grouping,
+ Self::InternalLastSeenValue => PbType::InternalLastSeenValue,
}
}
}
@@ -422,6 +427,7 @@ pub mod agg_kinds {
| AggKind::BoolAnd
| AggKind::BoolOr
| AggKind::ApproxCountDistinct
+ | AggKind::InternalLastSeenValue
};
}
pub use single_value_state;
@@ -450,7 +456,11 @@ impl AggKind {
/// Get the total phase agg kind from the partial phase agg kind.
pub fn partial_to_total(self) -> Option {
match self {
- AggKind::BitXor | AggKind::Min | AggKind::Max | AggKind::Sum => Some(self),
+ AggKind::BitXor
+ | AggKind::Min
+ | AggKind::Max
+ | AggKind::Sum
+ | AggKind::InternalLastSeenValue => Some(self),
AggKind::Sum0 | AggKind::Count => Some(AggKind::Sum0),
agg_kinds::simply_cannot_two_phase!() => None,
agg_kinds::rewritten!() => None,
diff --git a/src/expr/core/src/expr/build.rs b/src/expr/core/src/expr/build.rs
index 1ea03bd36f42a..7dffbcd42d66b 100644
--- a/src/expr/core/src/expr/build.rs
+++ b/src/expr/core/src/expr/build.rs
@@ -27,8 +27,13 @@ use super::expr_in::InExpression;
use super::expr_some_all::SomeAllExpression;
use super::expr_udf::UdfExpression;
use super::expr_vnode::VnodeExpression;
-use super::wrapper::{Checked, EvalErrorReport, NonStrict};
-use crate::expr::{BoxedExpression, Expression, InputRefExpression, LiteralExpression};
+use super::wrapper::checked::Checked;
+use super::wrapper::non_strict::NonStrict;
+use super::wrapper::EvalErrorReport;
+use super::NonStrictExpression;
+use crate::expr::{
+ BoxedExpression, Expression, ExpressionBoxExt, InputRefExpression, LiteralExpression,
+};
use crate::sig::FUNCTION_REGISTRY;
use crate::{bail, ExprError, Result};
@@ -41,8 +46,10 @@ pub fn build_from_prost(prost: &ExprNode) -> Result {
pub fn build_non_strict_from_prost(
prost: &ExprNode,
error_report: impl EvalErrorReport + 'static,
-) -> Result {
- ExprBuilder::new_non_strict(error_report).build(prost)
+) -> Result {
+ ExprBuilder::new_non_strict(error_report)
+ .build(prost)
+ .map(NonStrictExpression)
}
/// Build an expression from protobuf with possibly some wrappers attached to each node.
@@ -153,7 +160,7 @@ impl BuildBoxed for E {
prost: &ExprNode,
build_child: impl Fn(&ExprNode) -> Result,
) -> Result {
- Self::build(prost, build_child).map(Expression::boxed)
+ Self::build(prost, build_child).map(ExpressionBoxExt::boxed)
}
}
@@ -217,9 +224,9 @@ pub fn build_func_non_strict(
ret_type: DataType,
children: Vec,
error_report: impl EvalErrorReport + 'static,
-) -> Result {
+) -> Result {
let expr = build_func(func, ret_type, children)?;
- let wrapped = ExprBuilder::new_non_strict(error_report).wrap(expr);
+ let wrapped = NonStrictExpression(ExprBuilder::new_non_strict(error_report).wrap(expr));
Ok(wrapped)
}
diff --git a/src/expr/core/src/expr/mod.rs b/src/expr/core/src/expr/mod.rs
index 37e0104371a3e..48a46f640bf7b 100644
--- a/src/expr/core/src/expr/mod.rs
+++ b/src/expr/core/src/expr/mod.rs
@@ -58,7 +58,7 @@ pub use self::build::*;
pub use self::expr_input_ref::InputRefExpression;
pub use self::expr_literal::LiteralExpression;
pub use self::value::{ValueImpl, ValueRef};
-pub use self::wrapper::EvalErrorReport;
+pub use self::wrapper::*;
pub use super::{ExprError, Result};
/// Interface of an expression.
@@ -67,6 +67,7 @@ pub use super::{ExprError, Result};
/// should be implemented. Prefer calling and implementing `eval_v2` instead of `eval` if possible,
/// to gain the performance benefit of scalar expression.
#[async_trait::async_trait]
+#[auto_impl::auto_impl(&, Box)]
pub trait Expression: std::fmt::Debug + Sync + Send {
/// Get the return data type.
fn return_type(&self) -> DataType;
@@ -101,23 +102,77 @@ pub trait Expression: std::fmt::Debug + Sync + Send {
fn eval_const(&self) -> Result {
Err(ExprError::NotConstant)
}
+}
+/// An owned dynamically typed [`Expression`].
+pub type BoxedExpression = Box;
+
+/// Extension trait for boxing expressions.
+///
+/// This is not directly made into [`Expression`] trait because...
+/// - an expression does not have to be `'static`,
+/// - and for the ease of `auto_impl`.
+#[easy_ext::ext(ExpressionBoxExt)]
+impl E {
/// Wrap the expression in a Box.
- fn boxed(self) -> BoxedExpression
- where
- Self: Sized + Send + 'static,
- {
+ pub fn boxed(self) -> BoxedExpression {
Box::new(self)
}
}
-// TODO: make this an extension, or implement it on a `NonStrict` newtype.
-impl dyn Expression {
+/// An type-safe wrapper that indicates the inner expression can be evaluated in a non-strict
+/// manner, i.e., developers can directly call `eval_infallible` and `eval_row_infallible` without
+/// checking the result.
+///
+/// This is usually created by non-strict build functions like [`crate::expr::build_non_strict_from_prost`]
+/// and [`crate::expr::build_func_non_strict`]. It can also be created directly by
+/// [`NonStrictExpression::new_topmost`], where only the evaluation of the topmost level expression
+/// node is non-strict and should be treated as a TODO.
+///
+/// Compared to [`crate::expr::wrapper::non_strict::NonStrict`], this is more like an indicator
+/// applied on the root of an expression tree, while the latter is a wrapper that can be applied on
+/// each node of the tree and actually changes the behavior. As a result, [`NonStrictExpression`]
+/// does not implement [`Expression`] trait and instead deals directly with developers.
+#[derive(Debug)]
+pub struct NonStrictExpression(E);
+
+impl NonStrictExpression
+where
+ E: Expression,
+{
+ /// Create a non-strict expression directly wrapping the given expression.
+ ///
+ /// Should only be used in tests as evaluation may panic.
+ pub fn for_test(inner: E) -> NonStrictExpression
+ where
+ E: 'static,
+ {
+ NonStrictExpression(inner.boxed())
+ }
+
+ /// Create a non-strict expression from the given expression, where only the evaluation of the
+ /// topmost level expression node is non-strict (which is subtly different from
+ /// [`crate::expr::build_non_strict_from_prost`] where every node is non-strict).
+ ///
+ /// This should be used as a TODO.
+ pub fn new_topmost(
+ inner: E,
+ error_report: impl EvalErrorReport,
+ ) -> NonStrictExpression {
+ let inner = wrapper::non_strict::NonStrict::new(inner, error_report);
+ NonStrictExpression(inner)
+ }
+
+ /// Get the return data type.
+ pub fn return_type(&self) -> DataType {
+ self.0.return_type()
+ }
+
/// Evaluate the expression in vectorized execution and assert it succeeds. Returns an array.
///
/// Use with expressions built in non-strict mode.
pub async fn eval_infallible(&self, input: &DataChunk) -> ArrayRef {
- self.eval(input).await.expect("evaluation failed")
+ self.0.eval(input).await.expect("evaluation failed")
}
/// Evaluate the expression in row-based execution and assert it succeeds. Returns a nullable
@@ -125,38 +180,17 @@ impl dyn Expression {
///
/// Use with expressions built in non-strict mode.
pub async fn eval_row_infallible(&self, input: &OwnedRow) -> Datum {
- self.eval_row(input).await.expect("evaluation failed")
- }
-}
-
-/// An owned dynamically typed [`Expression`].
-pub type BoxedExpression = Box;
-
-// TODO: avoid the overhead of extra boxing.
-#[async_trait::async_trait]
-impl Expression for BoxedExpression {
- fn return_type(&self) -> DataType {
- (**self).return_type()
+ self.0.eval_row(input).await.expect("evaluation failed")
}
- async fn eval(&self, input: &DataChunk) -> Result {
- (**self).eval(input).await
- }
-
- async fn eval_v2(&self, input: &DataChunk) -> Result {
- (**self).eval_v2(input).await
- }
-
- async fn eval_row(&self, input: &OwnedRow) -> Result {
- (**self).eval_row(input).await
- }
-
- fn eval_const(&self) -> Result {
- (**self).eval_const()
+ /// Unwrap the inner expression.
+ pub fn into_inner(self) -> E {
+ self.0
}
- fn boxed(self) -> BoxedExpression {
- self
+ /// Get a reference to the inner expression.
+ pub fn inner(&self) -> &E {
+ &self.0
}
}
diff --git a/src/expr/core/src/expr/wrapper/checked.rs b/src/expr/core/src/expr/wrapper/checked.rs
index 1e049ad481010..b3b1375c4fa82 100644
--- a/src/expr/core/src/expr/wrapper/checked.rs
+++ b/src/expr/core/src/expr/wrapper/checked.rs
@@ -22,7 +22,7 @@ use crate::expr::{Expression, ValueImpl};
/// A wrapper of [`Expression`] that does extra checks after evaluation.
#[derive(Debug)]
-pub struct Checked(pub E);
+pub(crate) struct Checked(pub E);
// TODO: avoid the overhead of extra boxing.
#[async_trait]
diff --git a/src/expr/core/src/expr/wrapper/mod.rs b/src/expr/core/src/expr/wrapper/mod.rs
index 48241d05de45c..16988a050ad8d 100644
--- a/src/expr/core/src/expr/wrapper/mod.rs
+++ b/src/expr/core/src/expr/wrapper/mod.rs
@@ -12,8 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-mod checked;
-mod non_strict;
+pub(crate) mod checked;
+pub(crate) mod non_strict;
-pub use checked::Checked;
-pub use non_strict::{EvalErrorReport, NonStrict};
+pub use non_strict::{EvalErrorReport, LogReport};
diff --git a/src/expr/core/src/expr/wrapper/non_strict.rs b/src/expr/core/src/expr/wrapper/non_strict.rs
index 0859cea27aa49..782456023cdf7 100644
--- a/src/expr/core/src/expr/wrapper/non_strict.rs
+++ b/src/expr/core/src/expr/wrapper/non_strict.rs
@@ -23,7 +23,7 @@ use crate::expr::{Expression, ValueImpl};
use crate::ExprError;
/// Report an error during evaluation.
-#[auto_impl(Arc)]
+#[auto_impl(&, Arc)]
pub trait EvalErrorReport: Clone + Send + Sync {
/// Perform the error reporting.
///
@@ -42,11 +42,21 @@ impl EvalErrorReport for ! {
}
}
+/// Log the error to report an error during evaluation.
+#[derive(Clone)]
+pub struct LogReport;
+
+impl EvalErrorReport for LogReport {
+ fn report(&self, error: ExprError) {
+ tracing::error!(%error, "failed to evaluate expression");
+ }
+}
+
/// A wrapper of [`Expression`] that evaluates in a non-strict way. Basically...
/// - When an error occurs during chunk-level evaluation, recompute in row-based execution and pad
/// with NULL for each failed row.
/// - Report all error occurred during row-level evaluation to the [`EvalErrorReport`].
-pub struct NonStrict {
+pub(crate) struct NonStrict {
inner: E,
report: R,
}
diff --git a/src/expr/core/src/lib.rs b/src/expr/core/src/lib.rs
index c2f46d5632274..b49c4ae161dfc 100644
--- a/src/expr/core/src/lib.rs
+++ b/src/expr/core/src/lib.rs
@@ -17,7 +17,7 @@
#![feature(lint_reasons)]
#![feature(iterator_try_collect)]
#![feature(lazy_cell)]
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(arc_unwrap_or_clone)]
#![feature(never_type)]
diff --git a/src/expr/impl/src/aggregate/general.rs b/src/expr/impl/src/aggregate/general.rs
index de1331c524063..f47c94d45f24d 100644
--- a/src/expr/impl/src/aggregate/general.rs
+++ b/src/expr/impl/src/aggregate/general.rs
@@ -62,6 +62,15 @@ fn last_value(_: T, input: T) -> T {
input
}
+#[aggregate("internal_last_seen_value(*) -> auto", state = "ref")]
+fn internal_last_seen_value(state: T, input: T, retract: bool) -> T {
+ if retract {
+ state
+ } else {
+ input
+ }
+}
+
/// Note the following corner cases:
///
/// ```slt
diff --git a/src/expr/impl/src/lib.rs b/src/expr/impl/src/lib.rs
index a5906e4320282..6ea82d30ac5f1 100644
--- a/src/expr/impl/src/lib.rs
+++ b/src/expr/impl/src/lib.rs
@@ -28,7 +28,7 @@
#![feature(exclusive_range_pattern)]
#![feature(lazy_cell)]
#![feature(round_ties_even)]
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(test)]
#![feature(arc_unwrap_or_clone)]
diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs
index 889cc43fe6b18..c173c76c330c5 100644
--- a/src/expr/impl/src/scalar/cast.rs
+++ b/src/expr/impl/src/scalar/cast.rs
@@ -22,7 +22,9 @@ use risingwave_common::cast;
use risingwave_common::row::OwnedRow;
use risingwave_common::types::{DataType, Int256, IntoOrdered, JsonbRef, ToText, F64};
use risingwave_common::util::iter_util::ZipEqFast;
-use risingwave_expr::expr::{build_func, Context, Expression, InputRefExpression};
+use risingwave_expr::expr::{
+ build_func, Context, Expression, ExpressionBoxExt, InputRefExpression,
+};
use risingwave_expr::{function, ExprError, Result};
use risingwave_pb::expr::expr_node::PbType;
diff --git a/src/expr/impl/src/table_function/generate_series.rs b/src/expr/impl/src/table_function/generate_series.rs
index 586fa60de02c2..dfa09b0e215b8 100644
--- a/src/expr/impl/src/table_function/generate_series.rs
+++ b/src/expr/impl/src/table_function/generate_series.rs
@@ -159,7 +159,7 @@ mod tests {
use risingwave_common::array::DataChunk;
use risingwave_common::types::test_utils::IntervalTestExt;
use risingwave_common::types::{DataType, Decimal, Interval, ScalarImpl, Timestamp};
- use risingwave_expr::expr::{BoxedExpression, Expression, LiteralExpression};
+ use risingwave_expr::expr::{BoxedExpression, ExpressionBoxExt, LiteralExpression};
use risingwave_expr::table_function::build;
use risingwave_expr::ExprError;
use risingwave_pb::expr::table_function::PbType;
diff --git a/src/frontend/planner_test/tests/testdata/input/update.yaml b/src/frontend/planner_test/tests/testdata/input/update.yaml
index a63e5192073e6..9487b396d924a 100644
--- a/src/frontend/planner_test/tests/testdata/input/update.yaml
+++ b/src/frontend/planner_test/tests/testdata/input/update.yaml
@@ -75,4 +75,19 @@
create table t(v1 int as v2-1, v2 int, v3 int as v2+1, primary key (v3));
update t set v2 = 3;
expected_outputs:
- - binder_error
\ No newline at end of file
+ - binder_error
+- name: update subquery
+ sql: |
+ create table t (a int, b int);
+ update t set a = 777 where b not in (select a from t);
+ expected_outputs:
+ - logical_plan
+ - batch_plan
+
+- name: delete subquery
+ sql: |
+ create table t (a int, b int);
+ delete from t where a not in (select b from t);
+ expected_outputs:
+ - logical_plan
+ - batch_plan
diff --git a/src/frontend/planner_test/tests/testdata/output/agg.yaml b/src/frontend/planner_test/tests/testdata/output/agg.yaml
index aefb4df98ef4e..baa77dc79c89b 100644
--- a/src/frontend/planner_test/tests/testdata/output/agg.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/agg.yaml
@@ -1395,20 +1395,20 @@
sq_1.col_2;
batch_plan: |-
BatchExchange { order: [], dist: Single }
- └─BatchProject { exprs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))))] }
- └─BatchHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], aggs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))))] }
- └─BatchExchange { order: [], dist: HashShard(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))) }
- └─BatchHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], aggs: [max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)))] }
- └─BatchSortAgg { group_key: [lineitem.l_orderkey], aggs: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))] }
+ └─BatchProject { exprs: [max(max(internal_last_seen_value(lineitem.l_commitdate)))] }
+ └─BatchHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate)], aggs: [max(max(internal_last_seen_value(lineitem.l_commitdate)))] }
+ └─BatchExchange { order: [], dist: HashShard(internal_last_seen_value(lineitem.l_commitdate)) }
+ └─BatchHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate)], aggs: [max(internal_last_seen_value(lineitem.l_commitdate))] }
+ └─BatchSortAgg { group_key: [lineitem.l_orderkey], aggs: [internal_last_seen_value(lineitem.l_commitdate)] }
└─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_commitdate], distribution: UpstreamHashShard(lineitem.l_orderkey) }
stream_plan: |-
- StreamMaterialize { columns: [col_0, first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))(hidden)], stream_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], pk_columns: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], pk_conflict: NoCheck }
- └─StreamProject { exprs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)))), first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))] }
- └─StreamHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], aggs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)))), count] }
- └─StreamExchange { dist: HashShard(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))) }
- └─StreamHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)), $expr1], aggs: [max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))), count] }
- └─StreamProject { exprs: [lineitem.l_orderkey, first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)), Vnode(lineitem.l_orderkey) as $expr1] }
- └─StreamHashAgg { group_key: [lineitem.l_orderkey], aggs: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)), count] }
+ StreamMaterialize { columns: [col_0, internal_last_seen_value(lineitem.l_commitdate)(hidden)], stream_key: [internal_last_seen_value(lineitem.l_commitdate)], pk_columns: [internal_last_seen_value(lineitem.l_commitdate)], pk_conflict: NoCheck }
+ └─StreamProject { exprs: [max(max(internal_last_seen_value(lineitem.l_commitdate))), internal_last_seen_value(lineitem.l_commitdate)] }
+ └─StreamHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate)], aggs: [max(max(internal_last_seen_value(lineitem.l_commitdate))), count] }
+ └─StreamExchange { dist: HashShard(internal_last_seen_value(lineitem.l_commitdate)) }
+ └─StreamHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate), $expr1], aggs: [max(internal_last_seen_value(lineitem.l_commitdate)), count] }
+ └─StreamProject { exprs: [lineitem.l_orderkey, internal_last_seen_value(lineitem.l_commitdate), Vnode(lineitem.l_orderkey) as $expr1] }
+ └─StreamHashAgg { group_key: [lineitem.l_orderkey], aggs: [internal_last_seen_value(lineitem.l_commitdate), count] }
└─StreamTableScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_commitdate], pk: [lineitem.l_orderkey], dist: UpstreamHashShard(lineitem.l_orderkey) }
- name: two phase agg on hop window input should use two phase agg
sql: |
diff --git a/src/frontend/planner_test/tests/testdata/output/append_only.yaml b/src/frontend/planner_test/tests/testdata/output/append_only.yaml
index 184abd564c32b..d693d3fc942df 100644
--- a/src/frontend/planner_test/tests/testdata/output/append_only.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/append_only.yaml
@@ -14,11 +14,12 @@
select t1.v1 as id, v2, v3 from t1 join t2 on t1.v1=t2.v1;
stream_plan: |-
StreamMaterialize { columns: [id, v2, v3, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, id], pk_columns: [t1._row_id, t2._row_id, id], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v3, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v1) }
- └─StreamTableScan { table: t2, columns: [t2.v1, t2.v3, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v3, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v1) }
+ └─StreamTableScan { table: t2, columns: [t2.v1, t2.v3, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- sql: |
create table t1 (v1 int, v2 int) append only;
select v1 from t1 order by v1 limit 3 offset 3;
diff --git a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml
index fde09972bb66b..ce6724dc91c37 100644
--- a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml
@@ -234,9 +234,10 @@
└─BatchValues { rows: [] }
stream_plan: |-
StreamMaterialize { columns: [v, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v], pk_columns: [t._row_id, t._row_id#1, v], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] }
- ├─StreamExchange { dist: HashShard(t.v) }
- │ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(t.v) }
- └─StreamFilter { predicate: false:Boolean }
- └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v, t._row_id, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] }
+ ├─StreamExchange { dist: HashShard(t.v) }
+ │ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v) }
+ └─StreamFilter { predicate: false:Boolean }
+ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml b/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml
index 236bc31b2503e..2d1b0951089e8 100644
--- a/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml
@@ -60,9 +60,9 @@
select t2.c, t2.d, count(distinct t.a) from t join t2 on t.a = t2.c group by t2.c, t2.d;
batch_plan: |-
BatchExchange { order: [], dist: Single }
- └─BatchHashAgg { group_key: [first_value(t2.c order_by(t2.c ASC))], aggs: [first_value(first_value(t2.d order_by(t2.d ASC)) order_by(first_value(t2.d order_by(t2.d ASC)) ASC)), count(t.a)] }
- └─BatchExchange { order: [], dist: HashShard(first_value(t2.c order_by(t2.c ASC))) }
- └─BatchHashAgg { group_key: [t.a], aggs: [first_value(t2.c order_by(t2.c ASC)), first_value(t2.d order_by(t2.d ASC))] }
+ └─BatchHashAgg { group_key: [internal_last_seen_value(t2.c)], aggs: [internal_last_seen_value(internal_last_seen_value(t2.d)), count(t.a)] }
+ └─BatchExchange { order: [], dist: HashShard(internal_last_seen_value(t2.c)) }
+ └─BatchHashAgg { group_key: [t.a], aggs: [internal_last_seen_value(t2.c), internal_last_seen_value(t2.d)] }
└─BatchLookupJoin { type: Inner, predicate: t.a = t2.c, output: [t2.c, t2.d, t.a] }
└─BatchExchange { order: [], dist: UpstreamHashShard(t.a) }
└─BatchScan { table: t, columns: [t.a], distribution: SomeShard }
diff --git a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml
index 6f4f8a673c996..e7196f7cf4fea 100644
--- a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml
@@ -134,141 +134,145 @@
└─BatchScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], distribution: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
stream_plan: |-
StreamMaterialize { columns: [s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id(hidden), stock.s_w_id(hidden), min(stock.s_quantity)(hidden), $expr2(hidden), region.r_regionkey(hidden), supplier.s_nationkey(hidden)], stream_key: [stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_columns: [n_name, s_name, i_id, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] }
- │ └─StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] }
- │ ├─StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] }
- │ │ └─StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] }
- │ │ └─StreamExchange { dist: HashShard(stock.s_i_id) }
- │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] }
- │ │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
- │ │ │ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] }
- │ │ │ ├─StreamExchange { dist: HashShard(region.r_regionkey) }
- │ │ │ │ └─StreamProject { exprs: [region.r_regionkey] }
- │ │ │ │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
- │ │ │ │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
- │ │ │ └─StreamExchange { dist: HashShard(nation.n_regionkey) }
- │ │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
- │ │ │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
- │ │ │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
- │ │ │ └─StreamExchange { dist: HashShard(nation.n_nationkey) }
- │ │ │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
- │ │ └─StreamExchange { dist: HashShard($expr1) }
- │ │ └─StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] }
- │ │ └─StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) }
- │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
- │ └─StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all }
- │ ├─StreamExchange { dist: HashShard(item.i_id) }
- │ │ └─StreamProject { exprs: [item.i_id, item.i_name] }
- │ │ └─StreamFilter { predicate: Like(item.i_data, '%b':Varchar) }
- │ │ └─StreamTableScan { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
- │ └─StreamExchange { dist: HashShard(stock.s_i_id) }
- │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
- └─StreamExchange { dist: HashShard(supplier.s_suppkey) }
- └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] }
- ├─StreamExchange { dist: HashShard(region.r_regionkey) }
- │ └─StreamProject { exprs: [region.r_regionkey] }
- │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
- │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
- └─StreamExchange { dist: HashShard(nation.n_regionkey) }
- └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
- ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
- │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
- └─StreamExchange { dist: HashShard(nation.n_nationkey) }
- └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
+ └─StreamExchange { dist: HashShard(stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey) }
+ └─StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] }
+ │ └─StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] }
+ │ ├─StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] }
+ │ │ └─StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] }
+ │ │ └─StreamExchange { dist: HashShard(stock.s_i_id) }
+ │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] }
+ │ │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+ │ │ │ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] }
+ │ │ │ ├─StreamExchange { dist: HashShard(region.r_regionkey) }
+ │ │ │ │ └─StreamProject { exprs: [region.r_regionkey] }
+ │ │ │ │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
+ │ │ │ │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
+ │ │ │ └─StreamExchange { dist: HashShard(nation.n_regionkey) }
+ │ │ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
+ │ │ │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
+ │ │ │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+ │ │ │ └─StreamExchange { dist: HashShard(nation.n_nationkey) }
+ │ │ │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
+ │ │ └─StreamExchange { dist: HashShard($expr1) }
+ │ │ └─StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] }
+ │ │ └─StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) }
+ │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
+ │ └─StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all }
+ │ ├─StreamExchange { dist: HashShard(item.i_id) }
+ │ │ └─StreamProject { exprs: [item.i_id, item.i_name] }
+ │ │ └─StreamFilter { predicate: Like(item.i_data, '%b':Varchar) }
+ │ │ └─StreamTableScan { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
+ │ └─StreamExchange { dist: HashShard(stock.s_i_id) }
+ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
+ └─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] }
+ ├─StreamExchange { dist: HashShard(region.r_regionkey) }
+ │ └─StreamProject { exprs: [region.r_regionkey] }
+ │ └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
+ │ └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
+ └─StreamExchange { dist: HashShard(nation.n_regionkey) }
+ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
+ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
+ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+ └─StreamExchange { dist: HashShard(nation.n_nationkey) }
+ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id(hidden), stock.s_w_id(hidden), min(stock.s_quantity)(hidden), $expr2(hidden), region.r_regionkey(hidden), supplier.s_nationkey(hidden)], stream_key: [stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_columns: [n_name, s_name, i_id, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([2]) from 1
- └── StreamExchange Hash([0]) from 11
+ └── StreamExchange Hash([8, 9, 10, 11, 12, 13]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([2]) from 2
+ └── StreamExchange Hash([0]) from 12
+
+ Fragment 2
StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] }
└── StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
├── StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] }
│ └── StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] } { intermediate state table: 9, state tables: [ 8 ], distinct tables: [] }
- │ └── StreamExchange Hash([0]) from 2
+ │ └── StreamExchange Hash([0]) from 3
└── StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all } { left table: 26, right table: 28, left degree table: 27, right degree table: 29 }
- ├── StreamExchange Hash([0]) from 9
- └── StreamExchange Hash([0]) from 10
-
- Fragment 2
- StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 }
- ├── StreamExchange Hash([0]) from 3
- └── StreamExchange Hash([2]) from 8
+ ├── StreamExchange Hash([0]) from 10
+ └── StreamExchange Hash([0]) from 11
Fragment 3
- StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
+ StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 }
├── StreamExchange Hash([0]) from 4
- └── StreamExchange Hash([1]) from 5
+ └── StreamExchange Hash([2]) from 9
Fragment 4
+ StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
+ ├── StreamExchange Hash([0]) from 5
+ └── StreamExchange Hash([1]) from 6
+
+ Fragment 5
StreamProject { exprs: [region.r_regionkey] }
└── StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
└── Chain { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } { state table: 18 }
├── Upstream
└── BatchPlanNode
- Fragment 5
+ Fragment 6
StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } { left table: 19, right table: 21, left degree table: 20, right degree table: 22 }
- ├── StreamExchange Hash([1]) from 6
- └── StreamExchange Hash([0]) from 7
+ ├── StreamExchange Hash([1]) from 7
+ └── StreamExchange Hash([0]) from 8
- Fragment 6
+ Fragment 7
Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 23 }
├── Upstream
└── BatchPlanNode
- Fragment 7
+ Fragment 8
Chain { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 24 }
├── Upstream
└── BatchPlanNode
- Fragment 8
+ Fragment 9
StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] }
└── StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) }
└── Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 25 }
├── Upstream
└── BatchPlanNode
- Fragment 9
+ Fragment 10
StreamProject { exprs: [item.i_id, item.i_name] }
└── StreamFilter { predicate: Like(item.i_data, '%b':Varchar) }
└── Chain { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } { state table: 30 }
├── Upstream
└── BatchPlanNode
- Fragment 10
+ Fragment 11
Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 31 }
├── Upstream
└── BatchPlanNode
- Fragment 11
+ Fragment 12
StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] } { left table: 32, right table: 34, left degree table: 33, right degree table: 35 }
- ├── StreamExchange Hash([0]) from 12
- └── StreamExchange Hash([6]) from 13
+ ├── StreamExchange Hash([0]) from 13
+ └── StreamExchange Hash([6]) from 14
- Fragment 12
+ Fragment 13
StreamProject { exprs: [region.r_regionkey] }
└── StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
└── Chain { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } { state table: 36 }
├── Upstream
└── BatchPlanNode
- Fragment 13
+ Fragment 14
StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } { left table: 37, right table: 39, left degree table: 38, right degree table: 40 }
- ├── StreamExchange Hash([3]) from 14
- └── StreamExchange Hash([0]) from 15
+ ├── StreamExchange Hash([3]) from 15
+ └── StreamExchange Hash([0]) from 16
- Fragment 14
+ Fragment 15
Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 41 }
├── Upstream
└── BatchPlanNode
- Fragment 15
+ Fragment 16
Chain { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 42 }
├── Upstream
└── BatchPlanNode
@@ -359,7 +363,7 @@
Table 42 { columns: [ vnode, n_nationkey, nation_backfill_finished, nation_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
- Table 4294967294 { columns: [ s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey ], primary key: [ $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $12 ASC, $13 ASC, $11 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], distribution key: [ 11 ], read pk prefix len hint: 9 }
+ Table 4294967294 { columns: [ s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey ], primary key: [ $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $12 ASC, $13 ASC, $11 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], distribution key: [ 8, 9, 10, 11, 12, 13 ], read pk prefix len hint: 9 }
- id: ch_q3
before:
@@ -2496,59 +2500,63 @@
└─LogicalScan { table: revenue1, columns: [revenue1.total_revenue] }
stream_plan: |-
StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no(hidden)], stream_key: [s_suppkey, revenue1.supplier_no, total_revenue], pk_columns: [s_suppkey, revenue1.supplier_no, total_revenue], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
- ├─StreamExchange { dist: HashShard(revenue1.total_revenue) }
- │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
- │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
- │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
- │ └─StreamExchange { dist: HashShard($expr1) }
- │ └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] }
- │ └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
- └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) }
- └─StreamProject { exprs: [max(max(revenue1.total_revenue))] }
- └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] }
- └─StreamExchange { dist: Single }
- └─StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] }
- └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] }
- └─StreamTableScan { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
+ └─StreamExchange { dist: HashShard(supplier.s_suppkey, revenue1.total_revenue, revenue1.supplier_no) }
+ └─StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
+ ├─StreamExchange { dist: HashShard(revenue1.total_revenue) }
+ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
+ │ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+ │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+ │ └─StreamExchange { dist: HashShard($expr1) }
+ │ └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] }
+ │ └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
+ └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) }
+ └─StreamProject { exprs: [max(max(revenue1.total_revenue))] }
+ └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] }
+ └─StreamExchange { dist: Single }
+ └─StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] }
+ └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] }
+ └─StreamTableScan { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no(hidden)], stream_key: [s_suppkey, revenue1.supplier_no, total_revenue], pk_columns: [s_suppkey, revenue1.supplier_no, total_revenue], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([4]) from 1
- └── StreamExchange Hash([0]) from 4
+ └── StreamExchange Hash([0, 4, 5]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([4]) from 2
+ └── StreamExchange Hash([0]) from 5
+
+ Fragment 2
StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
├── left table: 4
├── right table: 6
├── left degree table: 5
├── right degree table: 7
- ├── StreamExchange Hash([0]) from 2
- └── StreamExchange Hash([1]) from 3
+ ├── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([1]) from 4
- Fragment 2
+ Fragment 3
Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 8 }
├── Upstream
└── BatchPlanNode
- Fragment 3
+ Fragment 4
StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] }
└── Chain { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } { state table: 9 }
├── Upstream
└── BatchPlanNode
- Fragment 4
+ Fragment 5
StreamProject { exprs: [max(max(revenue1.total_revenue))] }
└── StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } { intermediate state table: 11, state tables: [ 10 ], distinct tables: [] }
- └── StreamExchange Single from 5
+ └── StreamExchange Single from 6
- Fragment 5
+ Fragment 6
StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] } { intermediate state table: 13, state tables: [ 12 ], distinct tables: [] }
└── StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] }
└── Chain { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } { state table: 14 }
@@ -2590,7 +2598,7 @@
Table 14 { columns: [ vnode, supplier_no, revenue1_backfill_finished, revenue1_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
- Table 4294967294 { columns: [ s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no ], primary key: [ $0 ASC, $5 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 4 ], read pk prefix len hint: 3 }
+ Table 4294967294 { columns: [ s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no ], primary key: [ $0 ASC, $5 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 0, 4, 5 ], read pk prefix len hint: 3 }
- id: ch_q16
before:
@@ -3174,58 +3182,62 @@
└─BatchScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_delivery_d], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [s_name, s_address, supplier.s_suppkey(hidden), supplier.s_nationkey(hidden)], stream_key: [supplier.s_suppkey, supplier.s_nationkey], pk_columns: [s_name, supplier.s_suppkey, supplier.s_nationkey], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] }
- ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
- │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all }
- │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
- │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
- │ └─StreamExchange { dist: HashShard(nation.n_nationkey) }
- │ └─StreamProject { exprs: [nation.n_nationkey] }
- │ └─StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) }
- │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
- └─StreamExchange { dist: HashShard($expr1) }
- └─StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] }
- └─StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) }
- └─StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] }
- └─StreamHashAgg { group_key: [stock.s_i_id, stock.s_w_id, stock.s_quantity], aggs: [sum(order_line.ol_quantity), count] }
- └─StreamHashJoin { type: LeftSemi, predicate: stock.s_i_id = item.i_id, output: all }
- ├─StreamHashJoin { type: Inner, predicate: stock.s_i_id = order_line.ol_i_id, output: [stock.s_i_id, stock.s_w_id, stock.s_quantity, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
- │ ├─StreamExchange { dist: HashShard(stock.s_i_id) }
- │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
- │ └─StreamExchange { dist: HashShard(order_line.ol_i_id) }
- │ └─StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
- │ └─StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) }
- │ └─StreamTableScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) }
- └─StreamExchange { dist: HashShard(item.i_id) }
- └─StreamProject { exprs: [item.i_id] }
- └─StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) }
- └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
+ └─StreamExchange { dist: HashShard(supplier.s_suppkey, supplier.s_nationkey) }
+ └─StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] }
+ ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+ │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all }
+ │ ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
+ │ │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+ │ └─StreamExchange { dist: HashShard(nation.n_nationkey) }
+ │ └─StreamProject { exprs: [nation.n_nationkey] }
+ │ └─StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) }
+ │ └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
+ └─StreamExchange { dist: HashShard($expr1) }
+ └─StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] }
+ └─StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) }
+ └─StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] }
+ └─StreamHashAgg { group_key: [stock.s_i_id, stock.s_w_id, stock.s_quantity], aggs: [sum(order_line.ol_quantity), count] }
+ └─StreamHashJoin { type: LeftSemi, predicate: stock.s_i_id = item.i_id, output: all }
+ ├─StreamHashJoin { type: Inner, predicate: stock.s_i_id = order_line.ol_i_id, output: [stock.s_i_id, stock.s_w_id, stock.s_quantity, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
+ │ ├─StreamExchange { dist: HashShard(stock.s_i_id) }
+ │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
+ │ └─StreamExchange { dist: HashShard(order_line.ol_i_id) }
+ │ └─StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
+ │ └─StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) }
+ │ └─StreamTableScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) }
+ └─StreamExchange { dist: HashShard(item.i_id) }
+ └─StreamProject { exprs: [item.i_id] }
+ └─StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) }
+ └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [s_name, s_address, supplier.s_suppkey(hidden), supplier.s_nationkey(hidden)], stream_key: [supplier.s_suppkey, supplier.s_nationkey], pk_columns: [s_name, supplier.s_suppkey, supplier.s_nationkey], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 4
+ └── StreamExchange Hash([2, 3]) from 1
Fragment 1
- StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
- ├── StreamExchange Hash([3]) from 2
- └── StreamExchange Hash([0]) from 3
+ StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 5
Fragment 2
+ StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
+ ├── StreamExchange Hash([3]) from 3
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 3
Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 8 }
├── Upstream
└── BatchPlanNode
- Fragment 3
+ Fragment 4
StreamProject { exprs: [nation.n_nationkey] }
└── StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) }
└── Chain { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 9 }
├── Upstream
└── BatchPlanNode
- Fragment 4
+ Fragment 5
StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] }
└── StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) }
└── StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] }
@@ -3236,16 +3248,16 @@
│ ├── right table: 17
│ ├── left degree table: 16
│ ├── right degree table: 18
- │ ├── StreamExchange Hash([0]) from 5
- │ └── StreamExchange Hash([0]) from 6
- └── StreamExchange Hash([0]) from 7
+ │ ├── StreamExchange Hash([0]) from 6
+ │ └── StreamExchange Hash([0]) from 7
+ └── StreamExchange Hash([0]) from 8
- Fragment 5
+ Fragment 6
Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 19 }
├── Upstream
└── BatchPlanNode
- Fragment 6
+ Fragment 7
StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
└── StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) }
└── Chain { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) }
@@ -3253,7 +3265,7 @@
├── Upstream
└── BatchPlanNode
- Fragment 7
+ Fragment 8
StreamProject { exprs: [item.i_id] }
└── StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) }
└── Chain { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } { state table: 21 }
@@ -3304,7 +3316,7 @@
Table 21 { columns: [ vnode, i_id, item_backfill_finished, item_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
- Table 4294967294 { columns: [ s_name, s_address, supplier.s_suppkey, supplier.s_nationkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 2 ], read pk prefix len hint: 3 }
+ Table 4294967294 { columns: [ s_name, s_address, supplier.s_suppkey, supplier.s_nationkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 2, 3 ], read pk prefix len hint: 3 }
- id: ch_q21
before:
diff --git a/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml b/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml
index 9c961429276a3..dece27002b19b 100644
--- a/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml
@@ -23,11 +23,12 @@
└─LogicalScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id] }
stream_plan: |-
StreamMaterialize { columns: [v3, v4, v1, t2._row_id(hidden), t1._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, v3], pk_columns: [t2._row_id, t1._row_id, v3], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t2._row_id, t1._row_id] }
- ├─StreamExchange { dist: HashShard(t2.v3) }
- │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- └─StreamExchange { dist: HashShard(t1.v1) }
- └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v3, t2._row_id, t1._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t2._row_id, t1._row_id] }
+ ├─StreamExchange { dist: HashShard(t2.v3) }
+ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1) }
+ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- sql: |
create table t1 (v1 int, v2 int);
create table t2 (v3 int, v4 int);
@@ -79,8 +80,9 @@
└─LogicalValues { rows: [['cn':Varchar, 'China':Varchar], ['us':Varchar, 'United States':Varchar]], schema: Schema { fields: [*VALUES*_0.column_0:Varchar, *VALUES*_0.column_1:Varchar] } }
stream_plan: |-
StreamMaterialize { columns: [v, c, abbr, real, t._row_id(hidden), _row_id(hidden)], stream_key: [t._row_id, _row_id, c], pk_columns: [t._row_id, _row_id, c], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.c = *VALUES*_0.column_0, output: [t.v, t.c, *VALUES*_0.column_0, *VALUES*_0.column_1, t._row_id, _row_id] }
- ├─StreamExchange { dist: HashShard(t.c) }
- │ └─StreamTableScan { table: t, columns: [t.v, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(*VALUES*_0.column_0) }
- └─StreamValues { rows: [['cn':Varchar, 'China':Varchar, 0:Int64], ['us':Varchar, 'United States':Varchar, 1:Int64]] }
+ └─StreamExchange { dist: HashShard(t.c, t._row_id, _row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.c = *VALUES*_0.column_0, output: [t.v, t.c, *VALUES*_0.column_0, *VALUES*_0.column_1, t._row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard(t.c) }
+ │ └─StreamTableScan { table: t, columns: [t.v, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(*VALUES*_0.column_0) }
+ └─StreamValues { rows: [['cn':Varchar, 'China':Varchar, 0:Int64], ['us':Varchar, 'United States':Varchar, 1:Int64]] }
diff --git a/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml b/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml
index 85d76188f3e76..818fd88b30a20 100644
--- a/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml
@@ -963,31 +963,35 @@
└─BatchScan { table: a, columns: [a.k1], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), a.k1(hidden)], stream_key: [ak1.a._row_id, ak1.k1], pk_columns: [ak1.a._row_id, ak1.k1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
- ├─StreamExchange { dist: HashShard(ak1.k1) }
- │ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
- └─StreamHashAgg { group_key: [a.k1], aggs: [count] }
- └─StreamExchange { dist: HashShard(a.k1) }
- └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
+ └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1) }
+ └─StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
+ ├─StreamExchange { dist: HashShard(ak1.k1) }
+ │ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
+ └─StreamHashAgg { group_key: [a.k1], aggs: [count] }
+ └─StreamExchange { dist: HashShard(a.k1) }
+ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), a.k1(hidden)], stream_key: [ak1.a._row_id, ak1.k1], pk_columns: [ak1.a._row_id, ak1.k1], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([2, 3]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } { state table: 4 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 6 }
├── Upstream
└── BatchPlanNode
@@ -1022,7 +1026,7 @@
├── columns: [ v, bv, ak1.a._row_id, ak1.k1, a.k1 ]
├── primary key: [ $2 ASC, $3 ASC ]
├── value indices: [ 0, 1, 2, 3, 4 ]
- ├── distribution key: [ 3 ]
+ ├── distribution key: [ 2, 3 ]
└── read pk prefix len hint: 2
- id: aggk1_join_Ak1_onk1
@@ -1054,31 +1058,35 @@
└─BatchScan { table: a, columns: [a.k1], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v, bv, a.k1(hidden), ak1.a._row_id(hidden)], stream_key: [a.k1, ak1.a._row_id], pk_columns: [a.k1, ak1.a._row_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
- ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
- │ └─StreamExchange { dist: HashShard(a.k1) }
- │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
- └─StreamExchange { dist: HashShard(ak1.k1) }
- └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
+ └─StreamExchange { dist: HashShard(a.k1, ak1.a._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
+ ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
+ │ └─StreamExchange { dist: HashShard(a.k1) }
+ │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
+ └─StreamExchange { dist: HashShard(ak1.k1) }
+ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [v, bv, a.k1(hidden), ak1.a._row_id(hidden)], stream_key: [a.k1, ak1.a._row_id], pk_columns: [a.k1, ak1.a._row_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
- │ └── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([2, 3]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
+ │ └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 5 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } { state table: 6 }
├── Upstream
└── BatchPlanNode
@@ -1113,7 +1121,7 @@
├── columns: [ v, bv, a.k1, ak1.a._row_id ]
├── primary key: [ $2 ASC, $3 ASC ]
├── value indices: [ 0, 1, 2, 3 ]
- ├── distribution key: [ 2 ]
+ ├── distribution key: [ 2, 3 ]
└── read pk prefix len hint: 2
- id: aggk1_join_aggk1_onk1
@@ -1156,33 +1164,37 @@
└─BatchScan { table: b, columns: [b.k1], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [num, bv, a.k1(hidden), b.k1(hidden)], stream_key: [a.k1], pk_columns: [a.k1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
- ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
- │ └─StreamExchange { dist: HashShard(a.k1) }
- │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
- └─StreamHashAgg { group_key: [b.k1], aggs: [count] }
- └─StreamExchange { dist: HashShard(b.k1) }
- └─StreamTableScan { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) }
+ └─StreamExchange { dist: HashShard(a.k1) }
+ └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
+ ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
+ │ └─StreamExchange { dist: HashShard(a.k1) }
+ │ └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
+ └─StreamHashAgg { group_key: [b.k1], aggs: [count] }
+ └─StreamExchange { dist: HashShard(b.k1) }
+ └─StreamTableScan { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [num, bv, a.k1(hidden), b.k1(hidden)], stream_key: [a.k1], pk_columns: [a.k1], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
- │ └── StreamExchange Hash([0]) from 1
- └── StreamHashAgg { group_key: [b.k1], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([2]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
+ │ └── StreamExchange Hash([0]) from 2
+ └── StreamHashAgg { group_key: [b.k1], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 5 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } { state table: 7 }
├── Upstream
└── BatchPlanNode
diff --git a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml
index a340014298c47..ab282ebe3858a 100644
--- a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml
@@ -124,16 +124,17 @@
└─LogicalScan { table: t2, columns: [t2.v2] }
stream_plan: |-
StreamMaterialize { columns: [v1, max, t1._row_id(hidden)], stream_key: [t1._row_id, v1], pk_columns: [t1._row_id, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v1 = max(max(t2.v2)), output: [t1.v1, max(max(t2.v2)), t1._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(max(max(t2.v2))) }
- └─StreamProject { exprs: [max(max(t2.v2))] }
- └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] }
- └─StreamExchange { dist: Single }
- └─StreamHashAgg { group_key: [$expr1], aggs: [max(t2.v2), count] }
- └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v1 = max(max(t2.v2)), output: [t1.v1, max(max(t2.v2)), t1._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(max(max(t2.v2))) }
+ └─StreamProject { exprs: [max(max(t2.v2))] }
+ └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] }
+ └─StreamExchange { dist: Single }
+ └─StreamHashAgg { group_key: [$expr1], aggs: [max(t2.v2), count] }
+ └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: Dynamic filter join on unequal types
sql: |
create table t1 (v1 int);
diff --git a/src/frontend/planner_test/tests/testdata/output/except.yaml b/src/frontend/planner_test/tests/testdata/output/except.yaml
index 204a1814b8db7..1e27a7b74c0f0 100644
--- a/src/frontend/planner_test/tests/testdata/output/except.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/except.yaml
@@ -108,20 +108,20 @@
create table t2 (a int, b numeric, c bigint, primary key(a));
select * from t1 except select * from t2;
optimized_logical_plan_for_batch: |-
- LogicalAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+ LogicalAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
└─LogicalJoin { type: LeftAnti, on: IsNotDistinctFrom(t1.a, t2.a) AND IsNotDistinctFrom(t1.b, t2.b) AND IsNotDistinctFrom(t1.c, t2.c), output: all }
├─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.c] }
└─LogicalScan { table: t2, columns: [t2.a, t2.b, t2.c] }
batch_plan: |-
BatchExchange { order: [], dist: Single }
- └─BatchHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+ └─BatchHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
└─BatchLookupJoin { type: LeftAnti, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
└─BatchExchange { order: [], dist: UpstreamHashShard(t1.a) }
└─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c], distribution: UpstreamHashShard(t1.a) }
stream_plan: |-
StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
- └─StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
- └─StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
+ └─StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+ └─StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
└─StreamExchange { dist: HashShard(t1.a) }
└─StreamHashJoin { type: LeftAnti, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
├─StreamExchange { dist: HashShard(t1.a, t1.b, t1.c) }
@@ -130,83 +130,70 @@
└─StreamTableScan { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) }
stream_dist_plan: |+
Fragment 0
- StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck } { materialized table: 4294967294 }
- └── StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
- └── StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
- ├── intermediate state table: 2
- ├── state tables: [ 0, 1 ]
+ StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
+ ├── materialized table: 4294967294
+ └── StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+ └── StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
+ ├── intermediate state table: 0
+ ├── state tables: []
├── distinct tables: []
└── StreamExchange Hash([0]) from 1
Fragment 1
StreamHashJoin { type: LeftAnti, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
- ├── left table: 3
- ├── right table: 5
- ├── left degree table: 4
- ├── right degree table: 6
+ ├── left table: 1
+ ├── right table: 3
+ ├── left degree table: 2
+ ├── right degree table: 4
├── StreamExchange Hash([0, 1, 2]) from 2
└── StreamExchange Hash([0, 1, 2]) from 3
Fragment 2
- Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 7 }
+ Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 5 }
├── Upstream
└── BatchPlanNode
Fragment 3
- Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 8 }
+ Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 6 }
├── Upstream
└── BatchPlanNode
Table 0
- ├── columns: [ t1_a, t1_b, t1_c ]
- ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
- ├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
- └── read pk prefix len hint: 1
-
- Table 1
- ├── columns: [ t1_a, t1_c, t1_b ]
- ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
- ├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
- └── read pk prefix len hint: 1
-
- Table 2
- ├── columns: [ t1_a, first_value(t1_b order_by(t1_b ASC)), first_value(t1_c order_by(t1_c ASC)), count ]
+ ├── columns: [ t1_a, internal_last_seen_value(t1_b), internal_last_seen_value(t1_c), count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
- Table 3
+ Table 1
├── columns: [ t1_a, t1_b, t1_c ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 0, 1, 2 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 4
+ Table 2
├── columns: [ t1_a, t1_b, t1_c, _degree ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 3 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 5
+ Table 3
├── columns: [ t2_a, t2_b, t2_c ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 0, 1, 2 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 6
+ Table 4
├── columns: [ t2_a, t2_b, t2_c, _degree ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 3 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 7
+ Table 5
├── columns: [ vnode, a, t1_backfill_finished, t1_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
@@ -214,7 +201,7 @@
├── read pk prefix len hint: 1
└── vnode column idx: 0
- Table 8
+ Table 6
├── columns: [ vnode, a, t2_backfill_finished, t2_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
diff --git a/src/frontend/planner_test/tests/testdata/output/intersect.yaml b/src/frontend/planner_test/tests/testdata/output/intersect.yaml
index c203f1f953814..91839346824ec 100644
--- a/src/frontend/planner_test/tests/testdata/output/intersect.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/intersect.yaml
@@ -108,20 +108,20 @@
create table t2 (a int, b numeric, c bigint, primary key(a));
select * from t1 intersect select * from t2;
optimized_logical_plan_for_batch: |-
- LogicalAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+ LogicalAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
└─LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(t1.a, t2.a) AND IsNotDistinctFrom(t1.b, t2.b) AND IsNotDistinctFrom(t1.c, t2.c), output: all }
├─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.c] }
└─LogicalScan { table: t2, columns: [t2.a, t2.b, t2.c] }
batch_plan: |-
BatchExchange { order: [], dist: Single }
- └─BatchHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+ └─BatchHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
└─BatchLookupJoin { type: LeftSemi, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
└─BatchExchange { order: [], dist: UpstreamHashShard(t1.a) }
└─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c], distribution: UpstreamHashShard(t1.a) }
stream_plan: |-
StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
- └─StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
- └─StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
+ └─StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+ └─StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
└─StreamExchange { dist: HashShard(t1.a) }
└─StreamHashJoin { type: LeftSemi, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
├─StreamExchange { dist: HashShard(t1.a, t1.b, t1.c) }
@@ -130,83 +130,70 @@
└─StreamTableScan { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) }
stream_dist_plan: |+
Fragment 0
- StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck } { materialized table: 4294967294 }
- └── StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
- └── StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
- ├── intermediate state table: 2
- ├── state tables: [ 0, 1 ]
+ StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
+ ├── materialized table: 4294967294
+ └── StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+ └── StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
+ ├── intermediate state table: 0
+ ├── state tables: []
├── distinct tables: []
└── StreamExchange Hash([0]) from 1
Fragment 1
StreamHashJoin { type: LeftSemi, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
- ├── left table: 3
- ├── right table: 5
- ├── left degree table: 4
- ├── right degree table: 6
+ ├── left table: 1
+ ├── right table: 3
+ ├── left degree table: 2
+ ├── right degree table: 4
├── StreamExchange Hash([0, 1, 2]) from 2
└── StreamExchange Hash([0, 1, 2]) from 3
Fragment 2
- Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 7 }
+ Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 5 }
├── Upstream
└── BatchPlanNode
Fragment 3
- Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 8 }
+ Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 6 }
├── Upstream
└── BatchPlanNode
Table 0
- ├── columns: [ t1_a, t1_b, t1_c ]
- ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
- ├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
- └── read pk prefix len hint: 1
-
- Table 1
- ├── columns: [ t1_a, t1_c, t1_b ]
- ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
- ├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
- └── read pk prefix len hint: 1
-
- Table 2
- ├── columns: [ t1_a, first_value(t1_b order_by(t1_b ASC)), first_value(t1_c order_by(t1_c ASC)), count ]
+ ├── columns: [ t1_a, internal_last_seen_value(t1_b), internal_last_seen_value(t1_c), count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
- Table 3
+ Table 1
├── columns: [ t1_a, t1_b, t1_c ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 0, 1, 2 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 4
+ Table 2
├── columns: [ t1_a, t1_b, t1_c, _degree ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 3 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 5
+ Table 3
├── columns: [ t2_a, t2_b, t2_c ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 0, 1, 2 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 6
+ Table 4
├── columns: [ t2_a, t2_b, t2_c, _degree ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
├── value indices: [ 3 ]
├── distribution key: [ 0, 1, 2 ]
└── read pk prefix len hint: 3
- Table 7
+ Table 5
├── columns: [ vnode, a, t1_backfill_finished, t1_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
@@ -214,7 +201,7 @@
├── read pk prefix len hint: 1
└── vnode column idx: 0
- Table 8
+ Table 6
├── columns: [ vnode, a, t2_backfill_finished, t2_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
diff --git a/src/frontend/planner_test/tests/testdata/output/join.yaml b/src/frontend/planner_test/tests/testdata/output/join.yaml
index 4ef01cc84ab47..a61d2a0d73327 100644
--- a/src/frontend/planner_test/tests/testdata/output/join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/join.yaml
@@ -14,14 +14,15 @@
└─LogicalScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id] }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v1 = t3.v5, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
- ├─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
- │ ├─StreamExchange { dist: HashShard(t1.v1) }
- │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ └─StreamExchange { dist: HashShard(t2.v3) }
- │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- └─StreamExchange { dist: HashShard(t3.v5) }
- └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id, t3._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t3.v5, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
+ ├─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
+ │ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ └─StreamExchange { dist: HashShard(t2.v3) }
+ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t3.v5) }
+ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
- name: self join
sql: |
create table t (v1 int, v2 int);
@@ -33,11 +34,12 @@
└─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] }
stream_plan: |-
StreamMaterialize { columns: [t1v1, t2v1, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, t1v1], pk_columns: [t._row_id, t._row_id#1, t1v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v1, t._row_id, t._row_id] }
- ├─StreamExchange { dist: HashShard(t.v1) }
- │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(t.v1) }
- └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v1, t._row_id, t._row_id] }
+ ├─StreamExchange { dist: HashShard(t.v1) }
+ │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v1) }
+ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- sql: |
create table t1 (v1 int, v2 int);
create table t2 (v1 int, v2 int);
@@ -65,15 +67,16 @@
└─BatchScan { table: t3, columns: [t3.v1, t3.v2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [t1_v1, t1_v2, t2_v1, t2_v2, t3_v1, t3_v2, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1, t3._row_id, t2_v2], pk_columns: [t1._row_id, t2._row_id, t1_v1, t3._row_id, t2_v2], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t2.v2 = t3.v2, output: [t1.v1, t1.v2, t2.v1, t2.v2, t3.v1, t3.v2, t1._row_id, t2._row_id, t3._row_id] }
- ├─StreamExchange { dist: HashShard(t2.v2) }
- │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
- │ ├─StreamExchange { dist: HashShard(t1.v1) }
- │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ └─StreamExchange { dist: HashShard(t2.v1) }
- │ └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- └─StreamExchange { dist: HashShard(t3.v2) }
- └─StreamTableScan { table: t3, columns: [t3.v1, t3.v2, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t2.v2, t1._row_id, t2._row_id, t3._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t2.v2 = t3.v2, output: [t1.v1, t1.v2, t2.v1, t2.v2, t3.v1, t3.v2, t1._row_id, t2._row_id, t3._row_id] }
+ ├─StreamExchange { dist: HashShard(t2.v2) }
+ │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+ │ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ └─StreamExchange { dist: HashShard(t2.v1) }
+ │ └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t3.v2) }
+ └─StreamTableScan { table: t3, columns: [t3.v1, t3.v2, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
- sql: |
create table t1 (v1 int, v2 int);
create table t2 (v1 int, v2 int);
@@ -93,11 +96,12 @@
└─BatchScan { table: t2, columns: [t2.v1, t2.v2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [t1_v2, t2_v2, t1._row_id(hidden), t1.v1(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1.v1], pk_columns: [t1._row_id, t2._row_id, t1.v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v2, t2.v2, t1._row_id, t1.v1, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v1) }
- └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1._row_id, t1.v1, t2._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v2, t2.v2, t1._row_id, t1.v1, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v1) }
+ └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- sql: |
create table t1 (v1 int, v2 int);
create table t2 (v1 int, v2 int);
@@ -154,11 +158,12 @@
└─BatchScan { table: i, columns: [i.x], distribution: UpstreamHashShard(i.x) }
stream_plan: |-
StreamMaterialize { columns: [ix, iix, i.t._row_id(hidden), i.t._row_id#1(hidden)], stream_key: [i.t._row_id, i.t._row_id#1, ix], pk_columns: [i.t._row_id, i.t._row_id#1, ix], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id] }
- ├─StreamExchange { dist: HashShard(i.x) }
- │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
- └─StreamExchange { dist: HashShard(i.x) }
- └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
+ └─StreamExchange { dist: HashShard(i.x, i.t._row_id, i.t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id] }
+ ├─StreamExchange { dist: HashShard(i.x) }
+ │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
+ └─StreamExchange { dist: HashShard(i.x) }
+ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
- name: Left & right has same SomeShard distribution. There should still be exchanges below hash join
sql: |
create table t(x int);
@@ -170,11 +175,12 @@
└─BatchScan { table: i, columns: [i.x], distribution: UpstreamHashShard(i.x) }
stream_plan: |-
StreamMaterialize { columns: [ix, tx, i.t._row_id(hidden), t._row_id(hidden)], stream_key: [i.t._row_id, t._row_id, ix], pk_columns: [i.t._row_id, t._row_id, ix], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: i.x = t.x, output: [i.x, t.x, i.t._row_id, t._row_id] }
- ├─StreamExchange { dist: HashShard(i.x) }
- │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
- └─StreamExchange { dist: HashShard(t.x) }
- └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(i.x, i.t._row_id, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: i.x = t.x, output: [i.x, t.x, i.t._row_id, t._row_id] }
+ ├─StreamExchange { dist: HashShard(i.x) }
+ │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
+ └─StreamExchange { dist: HashShard(t.x) }
+ └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- name: Left & right has same HashShard distribution. There should be no exchange below hash join
sql: |
create table t(x int);
@@ -628,12 +634,13 @@
└─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), $expr1(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, $expr1], pk_columns: [t1._row_id, t2._row_id, $expr1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] }
- ├─StreamExchange { dist: HashShard($expr1) }
- │ └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v2) }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1._row_id, $expr1, t2._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] }
+ ├─StreamExchange { dist: HashShard($expr1) }
+ │ └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v2) }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: Repeated columns in project should not interfere with join result (https://github.com/risingwavelabs/risingwave/issues/8216)
sql: |
create table t(x int);
@@ -652,39 +659,43 @@
select t1.src p1, t1.dst p2, t2.dst p3 from t t1, t t2, t t3 where t1.dst = t2.src and t2.src = t3.dst and t3.dst = t1.src;
stream_plan: |-
StreamMaterialize { columns: [p1, p2, p3, t._row_id(hidden), t._row_id#1(hidden), t.src(hidden), t._row_id#2(hidden)], stream_key: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_columns: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] }
- ├─StreamExchange { dist: HashShard(t.src) }
- │ └─StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] }
- │ ├─StreamExchange { dist: HashShard(t.dst) }
- │ │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- │ └─StreamExchange { dist: HashShard(t.src) }
- │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(t.dst) }
- └─StreamTableScan { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.src, t.dst, t._row_id, t._row_id, t.src, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] }
+ ├─StreamExchange { dist: HashShard(t.src) }
+ │ └─StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] }
+ │ ├─StreamExchange { dist: HashShard(t.dst) }
+ │ │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ │ └─StreamExchange { dist: HashShard(t.src) }
+ │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.dst) }
+ └─StreamTableScan { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [p1, p2, p3, t._row_id(hidden), t._row_id#1(hidden), t.src(hidden), t._row_id#2(hidden)], stream_key: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_columns: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 4
+ └── StreamExchange Hash([0, 1, 3, 4, 5, 6]) from 1
Fragment 1
- StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
- ├── StreamExchange Hash([1]) from 2
- └── StreamExchange Hash([0]) from 3
+ StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 5
Fragment 2
+ StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
+ ├── StreamExchange Hash([1]) from 3
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 3
Chain { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 8 }
├── Upstream
└── BatchPlanNode
- Fragment 3
+ Fragment 4
Chain { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 9 }
├── Upstream
└── BatchPlanNode
- Fragment 4
+ Fragment 5
Chain { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 10 }
├── Upstream
└── BatchPlanNode
@@ -711,5 +722,5 @@
Table 10 { columns: [ vnode, _row_id, t_backfill_finished, t_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
- Table 4294967294 { columns: [ p1, p2, p3, t._row_id, t._row_id#1, t.src, t._row_id#2 ], primary key: [ $3 ASC, $4 ASC, $1 ASC, $6 ASC, $5 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 6 }
+ Table 4294967294 { columns: [ p1, p2, p3, t._row_id, t._row_id#1, t.src, t._row_id#2 ], primary key: [ $3 ASC, $4 ASC, $1 ASC, $6 ASC, $5 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 1, 3, 4, 5, 6 ], read pk prefix len hint: 6 }
diff --git a/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml b/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml
index b24fc18c6b513..31c53d02a9a18 100644
--- a/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml
@@ -34,19 +34,20 @@
└─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id, v2, t4._row_id, v5], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id, v2, t4._row_id, v5], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
- ├─StreamExchange { dist: HashShard(t3.v5) }
- │ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
- │ ├─StreamExchange { dist: HashShard(t1.v2) }
- │ │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
- │ │ ├─StreamExchange { dist: HashShard(t1.v1) }
- │ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ │ └─StreamExchange { dist: HashShard(t2.v3) }
- │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- │ └─StreamExchange { dist: HashShard(t3.v6) }
- │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
- └─StreamExchange { dist: HashShard(t4.v7) }
- └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1.v2, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
+ ├─StreamExchange { dist: HashShard(t3.v5) }
+ │ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
+ │ ├─StreamExchange { dist: HashShard(t1.v2) }
+ │ │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
+ │ │ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ │ └─StreamExchange { dist: HashShard(t2.v3) }
+ │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ │ └─StreamExchange { dist: HashShard(t3.v6) }
+ │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+ └─StreamExchange { dist: HashShard(t4.v7) }
+ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
- name: bushy tree join ordering
sql: |
create table t1 (v1 int, v2 int);
@@ -81,19 +82,20 @@
└─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t2._row_id(hidden), t1._row_id(hidden), t4._row_id(hidden), t3._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, v3, t4._row_id, t3._row_id, v7, v2], pk_columns: [t2._row_id, t1._row_id, v3, t4._row_id, t3._row_id, v7, v2], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t1._row_id, t4._row_id, t3._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v2) }
- │ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t1.v2, t2._row_id, t1._row_id] }
- │ ├─StreamExchange { dist: HashShard(t2.v3) }
- │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- │ └─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t3.v6) }
- └─StreamHashJoin { type: Inner, predicate: t4.v7 = t3.v5, output: [t4.v7, t4.v8, t3.v5, t3.v6, t4._row_id, t3._row_id] }
- ├─StreamExchange { dist: HashShard(t4.v7) }
- │ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
- └─StreamExchange { dist: HashShard(t3.v5) }
- └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v2, t2.v3, t4.v7, t2._row_id, t1._row_id, t4._row_id, t3._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t1._row_id, t4._row_id, t3._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v2) }
+ │ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t1.v2, t2._row_id, t1._row_id] }
+ │ ├─StreamExchange { dist: HashShard(t2.v3) }
+ │ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ │ └─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t3.v6) }
+ └─StreamHashJoin { type: Inner, predicate: t4.v7 = t3.v5, output: [t4.v7, t4.v8, t3.v5, t3.v6, t4._row_id, t3._row_id] }
+ ├─StreamExchange { dist: HashShard(t4.v7) }
+ │ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+ └─StreamExchange { dist: HashShard(t3.v5) }
+ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
- name: bushy tree join ordering manually
sql: |
set rw_enable_join_ordering = false;
@@ -128,19 +130,20 @@
└─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id, t4._row_id, v5, v2], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id, t4._row_id, v5, v2], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v2) }
- │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
- │ ├─StreamExchange { dist: HashShard(t1.v1) }
- │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ └─StreamExchange { dist: HashShard(t2.v3) }
- │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- └─StreamExchange { dist: HashShard(t3.v6) }
- └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
- ├─StreamExchange { dist: HashShard(t3.v5) }
- │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
- └─StreamExchange { dist: HashShard(t4.v7) }
- └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1.v2, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v2) }
+ │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
+ │ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ └─StreamExchange { dist: HashShard(t2.v3) }
+ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t3.v6) }
+ └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
+ ├─StreamExchange { dist: HashShard(t3.v5) }
+ │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+ └─StreamExchange { dist: HashShard(t4.v7) }
+ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
- name: right deep tree join ordering manually
sql: |
set rw_enable_join_ordering = false;
@@ -175,16 +178,17 @@
└─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t3._row_id, t4._row_id, v5, v4, v1], pk_columns: [t1._row_id, t2._row_id, t3._row_id, t4._row_id, v5, v4, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v3) }
- └─StreamHashJoin { type: Inner, predicate: t2.v4 = t3.v6, output: [t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t3._row_id, t4._row_id] }
- ├─StreamExchange { dist: HashShard(t2.v4) }
- │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- └─StreamExchange { dist: HashShard(t3.v6) }
- └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
- ├─StreamExchange { dist: HashShard(t3.v5) }
- │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
- └─StreamExchange { dist: HashShard(t4.v7) }
- └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t2.v4, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v3) }
+ └─StreamHashJoin { type: Inner, predicate: t2.v4 = t3.v6, output: [t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t3._row_id, t4._row_id] }
+ ├─StreamExchange { dist: HashShard(t2.v4) }
+ │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t3.v6) }
+ └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
+ ├─StreamExchange { dist: HashShard(t3.v5) }
+ │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+ └─StreamExchange { dist: HashShard(t4.v7) }
+ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml
index 8e63beb9798c1..85bfb1a6cda36 100644
--- a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml
@@ -42,22 +42,23 @@
└─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), all_sales._row_id(hidden), salesperson.id(hidden), all_sales.amount(hidden), salesperson.id#1(hidden)], stream_key: [salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount], pk_columns: [salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM salesperson.id AND all_sales.amount = max(all_sales.amount), output: [salesperson.name, max(all_sales.amount), all_sales.customer_name, salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount, salesperson.id] }
- ├─StreamHashJoin { type: Inner, predicate: salesperson.id = all_sales.salesperson_id, output: [salesperson.id, salesperson.name, all_sales.customer_name, all_sales.amount, salesperson._row_id, all_sales._row_id] }
- │ ├─StreamExchange { dist: HashShard(salesperson.id) }
- │ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
- │ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
- │ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
- └─StreamProject { exprs: [salesperson.id, max(all_sales.amount)] }
- └─StreamHashAgg { group_key: [salesperson.id], aggs: [max(all_sales.amount), count] }
- └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.id, all_sales.amount, all_sales._row_id] }
- ├─StreamProject { exprs: [salesperson.id] }
- │ └─StreamHashAgg { group_key: [salesperson.id], aggs: [count] }
- │ └─StreamExchange { dist: HashShard(salesperson.id) }
- │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
- └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
- └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
- └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+ └─StreamExchange { dist: HashShard(salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount) }
+ └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM salesperson.id AND all_sales.amount = max(all_sales.amount), output: [salesperson.name, max(all_sales.amount), all_sales.customer_name, salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount, salesperson.id] }
+ ├─StreamHashJoin { type: Inner, predicate: salesperson.id = all_sales.salesperson_id, output: [salesperson.id, salesperson.name, all_sales.customer_name, all_sales.amount, salesperson._row_id, all_sales._row_id] }
+ │ ├─StreamExchange { dist: HashShard(salesperson.id) }
+ │ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+ │ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+ │ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+ └─StreamProject { exprs: [salesperson.id, max(all_sales.amount)] }
+ └─StreamHashAgg { group_key: [salesperson.id], aggs: [max(all_sales.amount), count] }
+ └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.id, all_sales.amount, all_sales._row_id] }
+ ├─StreamProject { exprs: [salesperson.id] }
+ │ └─StreamHashAgg { group_key: [salesperson.id], aggs: [count] }
+ │ └─StreamExchange { dist: HashShard(salesperson.id) }
+ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+ └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
+ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
- name: lateral join 2
sql: |
create table all_sales (salesperson_id int, customer_name varchar, amount int );
@@ -87,14 +88,15 @@
└─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), salesperson.id(hidden), all_sales.salesperson_id(hidden)], stream_key: [salesperson._row_id, salesperson.id], pk_columns: [salesperson._row_id, salesperson.id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
- ├─StreamExchange { dist: HashShard(salesperson.id) }
- │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
- └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
- └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
- └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
- └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
- └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+ └─StreamExchange { dist: HashShard(salesperson._row_id, salesperson.id) }
+ └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
+ ├─StreamExchange { dist: HashShard(salesperson.id) }
+ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+ └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
+ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+ └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
+ └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
+ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
- name: lateral join 2 (left join)
sql: |
create table all_sales (salesperson_id int, customer_name varchar, amount int );
@@ -124,14 +126,15 @@
└─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), salesperson.id(hidden), all_sales.salesperson_id(hidden)], stream_key: [salesperson._row_id, salesperson.id], pk_columns: [salesperson._row_id, salesperson.id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
- ├─StreamExchange { dist: HashShard(salesperson.id) }
- │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
- └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
- └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
- └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
- └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
- └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+ └─StreamExchange { dist: HashShard(salesperson._row_id, salesperson.id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
+ ├─StreamExchange { dist: HashShard(salesperson.id) }
+ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+ └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
+ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+ └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
+ └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
+ └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
- name: lateral join 2 (right join) should throw an error
sql: |
create table all_sales (salesperson_id int, customer_name varchar, amount int );
@@ -165,14 +168,15 @@
└─BatchScan { table: t, columns: [t.arr], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [x, arr, unnest, t._row_id(hidden), t.arr(hidden), projected_row_id(hidden)], stream_key: [t._row_id, projected_row_id, arr], pk_columns: [t._row_id, projected_row_id, arr], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.arr IS NOT DISTINCT FROM t.arr, output: [t.x, t.arr, Unnest($0), t._row_id, t.arr, projected_row_id] }
- ├─StreamExchange { dist: HashShard(t.arr) }
- │ └─StreamTableScan { table: t, columns: [t.x, t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamProjectSet { select_list: [$0, Unnest($0)] }
- └─StreamProject { exprs: [t.arr] }
- └─StreamHashAgg { group_key: [t.arr], aggs: [count] }
- └─StreamExchange { dist: HashShard(t.arr) }
- └─StreamTableScan { table: t, columns: [t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.arr, t._row_id, projected_row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.arr IS NOT DISTINCT FROM t.arr, output: [t.x, t.arr, Unnest($0), t._row_id, t.arr, projected_row_id] }
+ ├─StreamExchange { dist: HashShard(t.arr) }
+ │ └─StreamTableScan { table: t, columns: [t.x, t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamProjectSet { select_list: [$0, Unnest($0)] }
+ └─StreamProject { exprs: [t.arr] }
+ └─StreamHashAgg { group_key: [t.arr], aggs: [count] }
+ └─StreamExchange { dist: HashShard(t.arr) }
+ └─StreamTableScan { table: t, columns: [t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- name: https://github.com/risingwavelabs/risingwave/issues/12298
sql: |
create table t1(c varchar, n varchar, id varchar, d varchar);
diff --git a/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml b/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml
index 2f7d9e5e75b3b..6838ddb331939 100644
--- a/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml
@@ -12,8 +12,9 @@
select m1.v1 as m1v1, m1.v2 as m1v2, m2.v1 as m2v1, m2.v2 as m2v2 from m1 join m2 on m1.v1 = m2.v1;
stream_plan: |-
StreamMaterialize { columns: [m1v1, m1v2, m2v1, m2v2, m1.t1._row_id(hidden), m2.t1._row_id(hidden)], stream_key: [m1.t1._row_id, m2.t1._row_id, m1v1], pk_columns: [m1.t1._row_id, m2.t1._row_id, m1v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: m1.v1 = m2.v1, output: [m1.v1, m1.v2, m2.v1, m2.v2, m1.t1._row_id, m2.t1._row_id] }
- ├─StreamExchange { dist: HashShard(m1.v1) }
- │ └─StreamTableScan { table: m1, columns: [m1.v1, m1.v2, m1.t1._row_id], pk: [m1.t1._row_id], dist: UpstreamHashShard(m1.t1._row_id) }
- └─StreamExchange { dist: HashShard(m2.v1) }
- └─StreamTableScan { table: m2, columns: [m2.v1, m2.v2, m2.t1._row_id], pk: [m2.t1._row_id], dist: UpstreamHashShard(m2.t1._row_id) }
+ └─StreamExchange { dist: HashShard(m1.v1, m1.t1._row_id, m2.t1._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: m1.v1 = m2.v1, output: [m1.v1, m1.v2, m2.v1, m2.v2, m1.t1._row_id, m2.t1._row_id] }
+ ├─StreamExchange { dist: HashShard(m1.v1) }
+ │ └─StreamTableScan { table: m1, columns: [m1.v1, m1.v2, m1.t1._row_id], pk: [m1.t1._row_id], dist: UpstreamHashShard(m1.t1._row_id) }
+ └─StreamExchange { dist: HashShard(m2.v1) }
+ └─StreamTableScan { table: m2, columns: [m2.v1, m2.v2, m2.t1._row_id], pk: [m2.t1._row_id], dist: UpstreamHashShard(m2.t1._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml
index f4b9e28ce0775..8d452bf45bc36 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml
@@ -181,34 +181,38 @@
└─BatchScan { table: auction, columns: [auction.id, auction.seller, auction.category], distribution: UpstreamHashShard(auction.id) }
stream_plan: |-
StreamMaterialize { columns: [name, city, state, id, auction.seller(hidden), person.id(hidden)], stream_key: [id, auction.seller], pk_columns: [id, auction.seller], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
- ├─StreamExchange { dist: HashShard(auction.seller) }
- │ └─StreamProject { exprs: [auction.id, auction.seller] }
- │ └─StreamFilter { predicate: (auction.category = 10:Int32) }
- │ └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
- └─StreamExchange { dist: HashShard(person.id) }
- └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) }
- └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) }
+ └─StreamExchange { dist: HashShard(auction.id, auction.seller) }
+ └─StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
+ ├─StreamExchange { dist: HashShard(auction.seller) }
+ │ └─StreamProject { exprs: [auction.id, auction.seller] }
+ │ └─StreamFilter { predicate: (auction.category = 10:Int32) }
+ │ └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+ └─StreamExchange { dist: HashShard(person.id) }
+ └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) }
+ └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [name, city, state, id, auction.seller(hidden), person.id(hidden)], stream_key: [id, auction.seller], pk_columns: [id, auction.seller], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([1]) from 1
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([3, 4]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([1]) from 2
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
StreamProject { exprs: [auction.id, auction.seller] }
└── StreamFilter { predicate: (auction.category = 10:Int32) }
└── Chain { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 4 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) }
└── Chain { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 5 }
├── Upstream
@@ -242,7 +246,7 @@
├── columns: [ name, city, state, id, auction.seller, person.id ]
├── primary key: [ $3 ASC, $4 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5 ]
- ├── distribution key: [ 4 ]
+ ├── distribution key: [ 3, 4 ]
└── read pk prefix len hint: 2
- id: nexmark_q4
@@ -834,9 +838,9 @@
AND P.endtime = A.endtime;
batch_plan: |-
BatchExchange { order: [], dist: Single }
- └─BatchHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, first_value(person.name order_by(person.name ASC)), $expr1] }
+ └─BatchHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1] }
├─BatchExchange { order: [], dist: HashShard(person.id, $expr1, $expr2) }
- │ └─BatchHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [first_value(person.name order_by(person.name ASC))] }
+ │ └─BatchHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name)] }
│ └─BatchProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
│ └─BatchProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
│ └─BatchScan { table: person, columns: [person.id, person.name, person.date_time], distribution: UpstreamHashShard(person.id) }
@@ -847,50 +851,54 @@
└─BatchScan { table: auction, columns: [auction.date_time, auction.seller], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), auction.seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, starttime, $expr2], pk_columns: [id, starttime, $expr2], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, first_value(person.name order_by(person.name ASC)), $expr1, $expr2, auction.seller, $expr3, $expr4] }
- ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) }
- │ └─StreamProject { exprs: [person.id, $expr1, $expr2, first_value(person.name order_by(person.name ASC))] }
- │ └─StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [first_value(person.name order_by(person.name ASC)), count] }
- │ └─StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
- │ └─StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
- │ └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) }
- └─StreamProject { exprs: [auction.seller, $expr3, $expr4] }
- └─StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] }
- └─StreamExchange { dist: HashShard(auction.seller, $expr3, $expr4) }
- └─StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] }
- └─StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] }
- └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+ └─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) }
+ └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] }
+ ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) }
+ │ └─StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] }
+ │ └─StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] }
+ │ └─StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
+ │ └─StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
+ │ └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) }
+ └─StreamProject { exprs: [auction.seller, $expr3, $expr4] }
+ └─StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] }
+ └─StreamExchange { dist: HashShard(auction.seller, $expr3, $expr4) }
+ └─StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] }
+ └─StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] }
+ └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), auction.seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, starttime, $expr2], pk_columns: [id, starttime, $expr2], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, first_value(person.name order_by(person.name ASC)), $expr1, $expr2, auction.seller, $expr3, $expr4] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0, 1, 2]) from 1
- └── StreamProject { exprs: [auction.seller, $expr3, $expr4] }
- └── StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } { intermediate state table: 7, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0, 1, 2]) from 2
+ └── StreamExchange Hash([0, 2, 3]) from 1
Fragment 1
- StreamProject { exprs: [person.id, $expr1, $expr2, first_value(person.name order_by(person.name ASC))] }
- └── StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [first_value(person.name order_by(person.name ASC)), count] } { intermediate state table: 5, state tables: [ 4 ], distinct tables: [] }
+ StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0, 1, 2]) from 2
+ └── StreamProject { exprs: [auction.seller, $expr3, $expr4] }
+ └── StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0, 1, 2]) from 3
+
+ Fragment 2
+ StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] }
+ └── StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
└── StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
└── StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
- └── Chain { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 6 }
+ └── Chain { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 5 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] }
└── StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] }
- └── Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 8 }
+ └── Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 }
├── Upstream
└── BatchPlanNode
- Table 0 { columns: [ person_id, $expr1, $expr2, first_value(person_name order_by(person_name ASC)) ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
+ Table 0 { columns: [ person_id, $expr1, $expr2, internal_last_seen_value(person_name) ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
Table 1 { columns: [ person_id, $expr1, $expr2, _degree ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
@@ -898,17 +906,20 @@
Table 3 { columns: [ auction_seller, $expr3, $expr4, _degree ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
- Table 4 { columns: [ person_id, $expr1, $expr2, person_name ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 3 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
-
- Table 5 { columns: [ person_id, $expr1, $expr2, first_value(person_name order_by(person_name ASC)), count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3, 4 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
+ Table 4 { columns: [ person_id, $expr1, $expr2, internal_last_seen_value(person_name), count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3, 4 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
- Table 6 { columns: [ vnode, id, person_backfill_finished, person_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+ Table 5 { columns: [ vnode, id, person_backfill_finished, person_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
- Table 7 { columns: [ auction_seller, $expr3, $expr4, count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
+ Table 6 { columns: [ auction_seller, $expr3, $expr4, count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
- Table 8 { columns: [ vnode, id, auction_backfill_finished, auction_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+ Table 7 { columns: [ vnode, id, auction_backfill_finished, auction_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
- Table 4294967294 { columns: [ id, name, starttime, $expr2, auction.seller, $expr3, $expr4 ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 }
+ Table 4294967294
+ ├── columns: [ id, name, starttime, $expr2, auction.seller, $expr3, $expr4 ]
+ ├── primary key: [ $0 ASC, $2 ASC, $3 ASC ]
+ ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
+ ├── distribution key: [ 0, 2, 3 ]
+ └── read pk prefix len hint: 3
- id: nexmark_q9
before:
@@ -1130,27 +1141,31 @@
└─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) }
stream_plan: |-
StreamMaterialize { columns: [auction, bidder, price, date_time, value, bid._row_id(hidden), $expr1(hidden), side_input.key(hidden)], stream_key: [bid._row_id, $expr1], pk_columns: [bid._row_id, $expr1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
- ├─StreamExchange { dist: HashShard($expr1) }
- │ └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] }
- │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(side_input.key) }
- └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) }
+ └─StreamExchange { dist: HashShard(bid._row_id, $expr1) }
+ └─StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
+ ├─StreamExchange { dist: HashShard($expr1) }
+ │ └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] }
+ │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(side_input.key) }
+ └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction, bidder, price, date_time, value, bid._row_id(hidden), $expr1(hidden), side_input.key(hidden)], stream_key: [bid._row_id, $expr1], pk_columns: [bid._row_id, $expr1], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
- ├── StreamExchange Hash([4]) from 1
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange Hash([5, 6]) from 1
Fragment 1
+ StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
+ ├── StreamExchange Hash([4]) from 2
+ └── StreamExchange NoShuffle from 3
+
+ Fragment 2
StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] }
└── Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 0 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) } { state table: 1 }
├── Upstream
└── BatchPlanNode
@@ -1163,7 +1178,7 @@
├── columns: [ auction, bidder, price, date_time, value, bid._row_id, $expr1, side_input.key ]
├── primary key: [ $5 ASC, $6 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
- ├── distribution key: [ 6 ]
+ ├── distribution key: [ 5, 6 ]
└── read pk prefix len hint: 2
- id: nexmark_q14
@@ -1792,30 +1807,34 @@
└─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id(hidden), auction.id(hidden)], stream_key: [bid._row_id, auction], pk_columns: [bid._row_id, auction], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
- ├─StreamExchange { dist: HashShard(bid.auction) }
- │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
- └─StreamExchange { dist: HashShard(auction.id) }
- └─StreamFilter { predicate: (auction.category = 10:Int32) }
- └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+ └─StreamExchange { dist: HashShard(bid.auction, bid._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
+ ├─StreamExchange { dist: HashShard(bid.auction) }
+ │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+ └─StreamExchange { dist: HashShard(auction.id) }
+ └─StreamFilter { predicate: (auction.category = 10:Int32) }
+ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id(hidden), auction.id(hidden)], stream_key: [bid._row_id, auction], pk_columns: [bid._row_id, auction], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0, 14]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 4 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
StreamFilter { predicate: (auction.category = 10:Int32) }
└── Chain { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 5 }
├── Upstream
@@ -1837,7 +1856,7 @@
├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id, auction.id ]
├── primary key: [ $14 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 14 ]
└── read pk prefix len hint: 2
- id: nexmark_q21
@@ -1943,33 +1962,37 @@
└─BatchScan { table: bid, columns: [bid.auction, bid.price], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction(hidden)], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
- ├─StreamExchange { dist: HashShard(auction.id) }
- │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
- └─StreamProject { exprs: [bid.auction, max(bid.price)] }
- └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] }
- └─StreamExchange { dist: HashShard(bid.auction) }
- └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+ └─StreamExchange { dist: HashShard(auction.id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
+ ├─StreamExchange { dist: HashShard(auction.id) }
+ │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+ └─StreamProject { exprs: [bid.auction, max(bid.price)] }
+ └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] }
+ └─StreamExchange { dist: HashShard(bid.auction) }
+ └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction(hidden)], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [bid.auction, max(bid.price)] }
- └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 1
Fragment 1
+ StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [bid.auction, max(bid.price)] }
+ └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 4 }
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 }
├── Upstream
└── BatchPlanNode
@@ -2026,9 +2049,9 @@
SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid
)
batch_plan: |-
- BatchNestedLoopJoin { type: Inner, predicate: (count(bid.auction) >= $expr1), output: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+ BatchNestedLoopJoin { type: Inner, predicate: (count(bid.auction) >= $expr1), output: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
├─BatchExchange { order: [], dist: Single }
- │ └─BatchHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+ │ └─BatchHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction)] }
│ └─BatchHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
│ ├─BatchExchange { order: [], dist: HashShard(auction.id) }
│ │ └─BatchScan { table: auction, columns: [auction.id, auction.item_name], distribution: UpstreamHashShard(auction.id) }
@@ -2043,9 +2066,9 @@
└─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
- └─StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
- ├─StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
- │ └─StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
+ └─StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
+ ├─StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
+ │ └─StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
│ └─StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
│ ├─StreamExchange { dist: HashShard(auction.id) }
│ │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
@@ -2063,50 +2086,50 @@
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+ └── StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
├── left table: 0
├── right table: 1
- ├── StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
- │ └── StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
- │ ├── intermediate state table: 3
- │ ├── state tables: [ 2 ]
+ ├── StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
+ │ └── StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
+ │ ├── intermediate state table: 2
+ │ ├── state tables: []
│ ├── distinct tables: []
│ └── StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
- │ ├── left table: 4
- │ ├── right table: 6
- │ ├── left degree table: 5
- │ ├── right degree table: 7
+ │ ├── left table: 3
+ │ ├── right table: 5
+ │ ├── left degree table: 4
+ │ ├── right degree table: 6
│ ├── StreamExchange Hash([0]) from 1
│ └── StreamExchange Hash([0]) from 2
└── StreamExchange Broadcast from 3
Fragment 1
- Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 8 }
+ Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 }
├── Upstream
└── BatchPlanNode
Fragment 2
- Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 9 }
+ Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 8 }
├── Upstream
└── BatchPlanNode
Fragment 3
StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(bid.auction))) as $expr1] }
- └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(bid.auction)), count] } { intermediate state table: 10, state tables: [], distinct tables: [] }
+ └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(bid.auction)), count] } { intermediate state table: 9, state tables: [], distinct tables: [] }
└── StreamExchange Single from 4
Fragment 4
StreamStatelessSimpleAgg { aggs: [sum0(count), count(bid.auction)] }
- └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } { intermediate state table: 11, state tables: [], distinct tables: [] }
+ └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } { intermediate state table: 10, state tables: [], distinct tables: [] }
└── StreamExchange Hash([0]) from 5
Fragment 5
- Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 12 }
+ Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 11 }
├── Upstream
└── BatchPlanNode
Table 0
- ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction) ]
+ ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction) ]
├── primary key: [ $2 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2 ]
├── distribution key: [ 0 ]
@@ -2115,28 +2138,36 @@
Table 1 { columns: [ $expr1 ], primary key: [], value indices: [ 0 ], distribution key: [], read pk prefix len hint: 0 }
Table 2
- ├── columns: [ auction_id, auction_item_name, bid__row_id ]
- ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
- ├── value indices: [ 0, 1, 2 ]
+ ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), count ]
+ ├── primary key: [ $0 ASC ]
+ ├── value indices: [ 1, 2, 3 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
Table 3
- ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), count ]
+ ├── columns: [ auction_id, auction_item_name ]
├── primary key: [ $0 ASC ]
- ├── value indices: [ 1, 2, 3 ]
+ ├── value indices: [ 0, 1 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
- Table 4 { columns: [ auction_id, auction_item_name ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
-
- Table 5 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+ Table 4 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 6 { columns: [ bid_auction, bid__row_id ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+ Table 5
+ ├── columns: [ bid_auction, bid__row_id ]
+ ├── primary key: [ $0 ASC, $1 ASC ]
+ ├── value indices: [ 0, 1 ]
+ ├── distribution key: [ 0 ]
+ └── read pk prefix len hint: 1
- Table 7 { columns: [ bid_auction, bid__row_id, _degree ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+ Table 6
+ ├── columns: [ bid_auction, bid__row_id, _degree ]
+ ├── primary key: [ $0 ASC, $1 ASC ]
+ ├── value indices: [ 2 ]
+ ├── distribution key: [ 0 ]
+ └── read pk prefix len hint: 1
- Table 8
+ Table 7
├── columns: [ vnode, id, auction_backfill_finished, auction_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
@@ -2144,7 +2175,7 @@
├── read pk prefix len hint: 1
└── vnode column idx: 0
- Table 9
+ Table 8
├── columns: [ vnode, _row_id, bid_backfill_finished, bid_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
@@ -2152,11 +2183,16 @@
├── read pk prefix len hint: 1
└── vnode column idx: 0
- Table 10 { columns: [ sum0(sum0(count)), sum0(count(bid_auction)), count ], primary key: [], value indices: [ 0, 1, 2 ], distribution key: [], read pk prefix len hint: 0 }
+ Table 9
+ ├── columns: [ sum0(sum0(count)), sum0(count(bid_auction)), count ]
+ ├── primary key: []
+ ├── value indices: [ 0, 1, 2 ]
+ ├── distribution key: []
+ └── read pk prefix len hint: 0
- Table 11 { columns: [ bid_auction, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+ Table 10 { columns: [ bid_auction, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 12
+ Table 11
├── columns: [ vnode, _row_id, bid_backfill_finished, bid_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
@@ -2199,39 +2235,43 @@
└─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
- ├─StreamExchange { dist: HashShard(auction.id) }
- │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
- └─StreamProject { exprs: [bid.auction] }
- └─StreamFilter { predicate: (count >= 20:Int32) }
- └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
- └─StreamExchange { dist: HashShard(bid.auction) }
- └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+ └─StreamExchange { dist: HashShard(auction.id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
+ ├─StreamExchange { dist: HashShard(auction.id) }
+ │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+ └─StreamProject { exprs: [bid.auction] }
+ └─StreamFilter { predicate: (count >= 20:Int32) }
+ └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+ └─StreamExchange { dist: HashShard(bid.auction) }
+ └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [bid.auction] }
- └── StreamFilter { predicate: (count >= 20:Int32) }
- └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
- ├── intermediate state table: 5
- ├── state tables: []
- ├── distinct tables: []
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 1
Fragment 1
+ StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [bid.auction] }
+ └── StreamFilter { predicate: (count >= 20:Int32) }
+ └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+ ├── intermediate state table: 5
+ ├── state tables: []
+ ├── distinct tables: []
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
├── state table: 4
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 }
├── Upstream
└── BatchPlanNode
@@ -2317,39 +2357,43 @@
└─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
- ├─StreamExchange { dist: HashShard(auction.id) }
- │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
- └─StreamProject { exprs: [bid.auction] }
- └─StreamFilter { predicate: (count < 20:Int32) }
- └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
- └─StreamExchange { dist: HashShard(bid.auction) }
- └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+ └─StreamExchange { dist: HashShard(auction.id) }
+ └─StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
+ ├─StreamExchange { dist: HashShard(auction.id) }
+ │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+ └─StreamProject { exprs: [bid.auction] }
+ └─StreamFilter { predicate: (count < 20:Int32) }
+ └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+ └─StreamExchange { dist: HashShard(bid.auction) }
+ └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [bid.auction] }
- └── StreamFilter { predicate: (count < 20:Int32) }
- └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
- ├── intermediate state table: 5
- ├── state tables: []
- ├── distinct tables: []
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 1
Fragment 1
+ StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [bid.auction] }
+ └── StreamFilter { predicate: (count < 20:Int32) }
+ └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+ ├── intermediate state table: 5
+ ├── state tables: []
+ ├── distinct tables: []
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
├── state table: 4
├── Upstream
└── BatchPlanNode
- Fragment 2
+ Fragment 3
Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 }
├── Upstream
└── BatchPlanNode
@@ -2427,7 +2471,7 @@
BatchTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 }
└─BatchExchange { order: [], dist: Single }
└─BatchTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 }
- └─BatchHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+ └─BatchHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction)] }
└─BatchHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
├─BatchExchange { order: [], dist: HashShard(auction.id) }
│ └─BatchScan { table: auction, columns: [auction.id, auction.item_name], distribution: UpstreamHashShard(auction.id) }
@@ -2435,12 +2479,12 @@
└─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [bid_count, auction_id], pk_conflict: NoCheck }
- └─StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+ └─StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
└─StreamTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 }
└─StreamExchange { dist: Single }
└─StreamGroupTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0, group_key: [$expr1] }
- └─StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), Vnode(auction.id) as $expr1] }
- └─StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
+ └─StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction), Vnode(auction.id) as $expr1] }
+ └─StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
└─StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
├─StreamExchange { dist: HashShard(auction.id) }
│ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
@@ -2450,44 +2494,44 @@
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [bid_count, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+ └── StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
└── StreamTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 } { state table: 0 }
└── StreamExchange Single from 1
Fragment 1
StreamGroupTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0, group_key: [$expr1] } { state table: 1 }
- └── StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), Vnode(auction.id) as $expr1] }
- └── StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
- ├── intermediate state table: 3
- ├── state tables: [ 2 ]
+ └── StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction), Vnode(auction.id) as $expr1] }
+ └── StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
+ ├── intermediate state table: 2
+ ├── state tables: []
├── distinct tables: []
└── StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
- ├── left table: 4
- ├── right table: 6
- ├── left degree table: 5
- ├── right degree table: 7
+ ├── left table: 3
+ ├── right table: 5
+ ├── left degree table: 4
+ ├── right degree table: 6
├── StreamExchange Hash([0]) from 2
└── StreamExchange Hash([0]) from 3
Fragment 2
- Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 8 }
+ Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 }
├── Upstream
└── BatchPlanNode
Fragment 3
- Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 9 }
+ Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 8 }
├── Upstream
└── BatchPlanNode
Table 0
- ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), $expr1 ]
+ ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), $expr1 ]
├── primary key: [ $2 DESC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3 ]
├── distribution key: []
└── read pk prefix len hint: 0
Table 1
- ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), $expr1 ]
+ ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), $expr1 ]
├── primary key: [ $3 ASC, $2 DESC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3 ]
├── distribution key: [ 0 ]
@@ -2495,38 +2539,31 @@
└── vnode column idx: 3
Table 2
- ├── columns: [ auction_id, auction_item_name, bid__row_id ]
- ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
- ├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
- └── read pk prefix len hint: 1
-
- Table 3
- ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), count ]
+ ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
- Table 4 { columns: [ auction_id, auction_item_name ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+ Table 3 { columns: [ auction_id, auction_item_name ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 5 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+ Table 4 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 6
+ Table 5
├── columns: [ bid_auction, bid__row_id ]
├── primary key: [ $0 ASC, $1 ASC ]
├── value indices: [ 0, 1 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
- Table 7
+ Table 6
├── columns: [ bid_auction, bid__row_id, _degree ]
├── primary key: [ $0 ASC, $1 ASC ]
├── value indices: [ 2 ]
├── distribution key: [ 0 ]
└── read pk prefix len hint: 1
- Table 8
+ Table 7
├── columns: [ vnode, id, auction_backfill_finished, auction_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
@@ -2534,7 +2571,7 @@
├── read pk prefix len hint: 1
└── vnode column idx: 0
- Table 9
+ Table 8
├── columns: [ vnode, _row_id, bid_backfill_finished, bid_row_count ]
├── primary key: [ $0 ASC ]
├── value indices: [ 1, 2, 3 ]
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml
index 7c694fad1fa67..31be64b2c480a 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml
@@ -158,29 +158,33 @@
└─BatchSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, seller], pk_columns: [_row_id, _row_id#1, seller], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] }
- ├─StreamExchange { dist: HashShard(seller) }
- │ └─StreamFilter { predicate: (category = 10:Int32) }
- │ └─StreamRowIdGen { row_id_index: 10 }
- │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
- └─StreamExchange { dist: HashShard(id) }
- └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) }
- └─StreamRowIdGen { row_id_index: 8 }
- └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(_row_id, seller, _row_id) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] }
+ ├─StreamExchange { dist: HashShard(seller) }
+ │ └─StreamFilter { predicate: (category = 10:Int32) }
+ │ └─StreamRowIdGen { row_id_index: 10 }
+ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id) }
+ └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) }
+ └─StreamRowIdGen { row_id_index: 8 }
+ └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, seller], pk_columns: [_row_id, _row_id#1, seller], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([7]) from 1
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([4, 5, 6]) from 1
Fragment 1
+ StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([7]) from 2
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
StreamFilter { predicate: (category = 10:Int32) }
└── StreamRowIdGen { row_id_index: 10 }
└── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 4 }
- Fragment 2
+ Fragment 3
StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) }
└── StreamRowIdGen { row_id_index: 8 }
└── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { source state table: 5 }
@@ -211,7 +215,7 @@
├── columns: [ name, city, state, id, _row_id, seller, _row_id#1 ]
├── primary key: [ $4 ASC, $6 ASC, $5 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
- ├── distribution key: [ 5 ]
+ ├── distribution key: [ 4, 5, 6 ]
└── read pk prefix len hint: 3
- id: nexmark_q4
@@ -737,40 +741,44 @@
└─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, name, starttime, $expr2], pk_columns: [id, name, starttime, $expr2], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all }
- ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) }
- │ └─StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] }
- │ └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) }
- │ └─StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
- │ └─StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] }
- │ └─StreamRowIdGen { row_id_index: 8 }
- │ └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
- └─StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] }
- └─StreamExchange { dist: HashShard(seller, $expr3, $expr4) }
- └─StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] }
- └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] }
- └─StreamRowIdGen { row_id_index: 10 }
- └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all }
+ ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) }
+ │ └─StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] }
+ │ └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) }
+ │ └─StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
+ │ └─StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] }
+ │ └─StreamRowIdGen { row_id_index: 8 }
+ │ └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
+ └─StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] }
+ └─StreamExchange { dist: HashShard(seller, $expr3, $expr4) }
+ └─StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] }
+ └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] }
+ └─StreamRowIdGen { row_id_index: 10 }
+ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, name, starttime, $expr2], pk_columns: [id, name, starttime, $expr2], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0, 2, 3]) from 1
- └── StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } { state table: 6 }
- └── StreamExchange Hash([0, 1, 2]) from 3
+ └── StreamExchange Hash([0, 1, 2, 3]) from 1
Fragment 1
- StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } { state table: 4 }
- └── StreamExchange Hash([0, 1, 2, 3]) from 2
+ StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0, 2, 3]) from 2
+ └── StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } { state table: 6 }
+ └── StreamExchange Hash([0, 1, 2]) from 4
Fragment 2
+ StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } { state table: 4 }
+ └── StreamExchange Hash([0, 1, 2, 3]) from 3
+
+ Fragment 3
StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
└── StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] }
└── StreamRowIdGen { row_id_index: 8 }
└── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] }
└── StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] }
└── StreamRowIdGen { row_id_index: 10 }
@@ -796,7 +804,7 @@
├── columns: [ id, name, starttime, $expr2, seller, $expr3, $expr4 ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
- ├── distribution key: [ 0, 2, 3 ]
+ ├── distribution key: [ 0, 1, 2, 3 ]
└── read pk prefix len hint: 4
- id: nexmark_q9
@@ -1629,31 +1637,31 @@
└─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] }
- ├─StreamExchange { dist: HashShard(auction) }
- │ └─StreamRowIdGen { row_id_index: 7 }
- │ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
- └─StreamExchange { dist: HashShard(id) }
- └─StreamFilter { predicate: (category = 10:Int32) }
- └─StreamRowIdGen { row_id_index: 10 }
- └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(auction, _row_id, _row_id) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard(auction) }
+ │ └─StreamRowIdGen { row_id_index: 7 }
+ │ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id) }
+ └─StreamFilter { predicate: (category = 10:Int32) }
+ └─StreamRowIdGen { row_id_index: 10 }
+ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0, 14, 15]) from 1
Fragment 1
+ StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
StreamRowIdGen { row_id_index: 7 }
└── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 4 }
- Fragment 2
+ Fragment 3
StreamFilter { predicate: (category = 10:Int32) }
└── StreamRowIdGen { row_id_index: 10 }
└── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 5 }
@@ -1674,7 +1682,7 @@
├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ]
├── primary key: [ $14 ASC, $15 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 14, 15 ]
└── read pk prefix len hint: 3
- id: nexmark_q21
@@ -1775,30 +1783,34 @@
└─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] }
- ├─StreamExchange { dist: HashShard(id) }
- │ └─StreamRowIdGen { row_id_index: 10 }
- │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
- └─StreamProject { exprs: [auction, max(price)] }
- └─StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] }
- └─StreamExchange { dist: HashShard(auction) }
- └─StreamRowIdGen { row_id_index: 7 }
- └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id, _row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] }
+ ├─StreamExchange { dist: HashShard(id) }
+ │ └─StreamRowIdGen { row_id_index: 10 }
+ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamProject { exprs: [auction, max(price)] }
+ └─StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] }
+ └─StreamExchange { dist: HashShard(auction) }
+ └─StreamRowIdGen { row_id_index: 7 }
+ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [auction, max(price)] }
- └── StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0, 3]) from 1
Fragment 1
+ StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [auction, max(price)] }
+ └── StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
StreamRowIdGen { row_id_index: 10 }
└── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 4 }
- Fragment 2
+ Fragment 3
StreamRowIdGen { row_id_index: 7 }
└── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 }
@@ -1825,7 +1837,7 @@
├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, auction ]
├── primary key: [ $3 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3, 4 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 3 ]
└── read pk prefix len hint: 2
- id: nexmark_q102
@@ -1992,37 +2004,41 @@
└─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
- ├─StreamExchange { dist: HashShard(id) }
- │ └─StreamRowIdGen { row_id_index: 10 }
- │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
- └─StreamProject { exprs: [auction] }
- └─StreamFilter { predicate: (count >= 20:Int32) }
- └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
- └─StreamExchange { dist: HashShard(auction) }
- └─StreamRowIdGen { row_id_index: 7 }
- └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id, _row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
+ ├─StreamExchange { dist: HashShard(id) }
+ │ └─StreamRowIdGen { row_id_index: 10 }
+ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamProject { exprs: [auction] }
+ └─StreamFilter { predicate: (count >= 20:Int32) }
+ └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
+ └─StreamExchange { dist: HashShard(auction) }
+ └─StreamRowIdGen { row_id_index: 7 }
+ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [auction] }
- └── StreamFilter { predicate: (count >= 20:Int32) }
- └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0, 2]) from 1
Fragment 1
+ StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [auction] }
+ └── StreamFilter { predicate: (count >= 20:Int32) }
+ └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
StreamRowIdGen { row_id_index: 10 }
└── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
└── source state table: 4
- Fragment 2
+ Fragment 3
StreamRowIdGen { row_id_index: 7 }
└── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 }
@@ -2049,7 +2065,7 @@
├── columns: [ auction_id, auction_item_name, _row_id ]
├── primary key: [ $2 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 2 ]
└── read pk prefix len hint: 2
- id: nexmark_q104
@@ -2080,37 +2096,41 @@
└─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
- ├─StreamExchange { dist: HashShard(id) }
- │ └─StreamRowIdGen { row_id_index: 10 }
- │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
- └─StreamProject { exprs: [auction] }
- └─StreamFilter { predicate: (count < 20:Int32) }
- └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
- └─StreamExchange { dist: HashShard(auction) }
- └─StreamRowIdGen { row_id_index: 7 }
- └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id, _row_id) }
+ └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
+ ├─StreamExchange { dist: HashShard(id) }
+ │ └─StreamRowIdGen { row_id_index: 10 }
+ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamProject { exprs: [auction] }
+ └─StreamFilter { predicate: (count < 20:Int32) }
+ └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
+ └─StreamExchange { dist: HashShard(auction) }
+ └─StreamRowIdGen { row_id_index: 7 }
+ └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [auction] }
- └── StreamFilter { predicate: (count < 20:Int32) }
- └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0, 2]) from 1
Fragment 1
+ StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [auction] }
+ └── StreamFilter { predicate: (count < 20:Int32) }
+ └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 3
+
+ Fragment 2
StreamRowIdGen { row_id_index: 10 }
└── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
└── source state table: 4
- Fragment 2
+ Fragment 3
StreamRowIdGen { row_id_index: 7 }
└── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 }
@@ -2137,7 +2157,7 @@
├── columns: [ auction_id, auction_item_name, _row_id ]
├── primary key: [ $2 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 2 ]
└── read pk prefix len hint: 2
- id: nexmark_q105
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml
index ccdde39e76764..c6c3ffd4f5ad6 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml
@@ -717,65 +717,69 @@
AND P.endtime = A.endtime;
stream_plan: |-
StreamMaterialize { columns: [id, name, starttime, $expr6(hidden), $expr8(hidden), $expr9(hidden), $expr10(hidden)], stream_key: [id, name, starttime, $expr6], pk_columns: [id, name, starttime, $expr6], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all }
- ├─StreamExchange { dist: HashShard($expr2, $expr5, $expr6) }
- │ └─StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] }
- │ └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) }
- │ └─StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] }
- │ └─StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] }
- │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 0:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, _row_id] }
- │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] }
- └─StreamExchange { dist: HashShard($expr8, $expr9, $expr10) }
- └─StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] }
- └─StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] }
- └─StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] }
- └─StreamFilter { predicate: (event_type = 1:Int32) }
- └─StreamShare { id: 5 }
- └─StreamProject { exprs: [event_type, person, auction, _row_id] }
- └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all }
+ ├─StreamExchange { dist: HashShard($expr2, $expr5, $expr6) }
+ │ └─StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] }
+ │ └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) }
+ │ └─StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] }
+ │ └─StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] }
+ │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 0:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+ │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] }
+ └─StreamExchange { dist: HashShard($expr8, $expr9, $expr10) }
+ └─StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] }
+ └─StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] }
+ └─StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] }
+ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ └─StreamShare { id: 5 }
+ └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [id, name, starttime, $expr6(hidden), $expr8(hidden), $expr9(hidden), $expr10(hidden)], stream_key: [id, name, starttime, $expr6], pk_columns: [id, name, starttime, $expr6], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0, 2, 3]) from 1
- └── StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } { state table: 6 }
- └── StreamExchange Hash([0, 1, 2]) from 4
+ └── StreamExchange Hash([0, 1, 2, 3]) from 1
Fragment 1
- StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } { state table: 4 }
- └── StreamExchange Hash([0, 1, 2, 3]) from 2
+ StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0, 2, 3]) from 2
+ └── StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } { state table: 6 }
+ └── StreamExchange Hash([0, 1, 2]) from 5
Fragment 2
+ StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } { state table: 4 }
+ └── StreamExchange Hash([0, 1, 2, 3]) from 3
+
+ Fragment 3
StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] }
└── StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] }
└── StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] }
└── StreamFilter { predicate: (event_type = 0:Int32) }
- └── StreamExchange NoShuffle from 3
+ └── StreamExchange NoShuffle from 4
- Fragment 3
+ Fragment 4
StreamProject { exprs: [event_type, person, auction, _row_id] }
└── StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
└── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 4
+ Fragment 5
StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] }
└── StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] }
└── StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 3
+ └── StreamExchange NoShuffle from 4
Table 0 { columns: [ $expr2, $expr3, $expr5, $expr6 ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 }
@@ -795,7 +799,7 @@
├── columns: [ id, name, starttime, $expr6, $expr8, $expr9, $expr10 ]
├── primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ]
├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
- ├── distribution key: [ 0, 2, 3 ]
+ ├── distribution key: [ 0, 1, 2, 3 ]
└── read pk prefix len hint: 4
- id: nexmark_q9
@@ -1180,59 +1184,63 @@
WHERE A.category = 10;
stream_plan: |-
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] }
- ├─StreamExchange { dist: HashShard($expr3) }
- │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] }
- │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
- │ ├─StreamFilter { predicate: (event_type = 2:Int32) }
- │ │ └─StreamShare { id: 5 }
- │ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
- │ │ └─StreamRowIdGen { row_id_index: 5 }
- │ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: HashShard($expr9) }
- └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] }
- └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
- └─StreamShare { id: 5 }
- └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr3, _row_id, _row_id) }
+ └─StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard($expr3) }
+ │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] }
+ │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+ │ ├─StreamFilter { predicate: (event_type = 2:Int32) }
+ │ │ └─StreamShare { id: 5 }
+ │ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+ │ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard($expr9) }
+ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] }
+ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
+ └─StreamShare { id: 5 }
+ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 4
+ └── StreamExchange Hash([0, 14, 15]) from 1
Fragment 1
+ StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 5
+
+ Fragment 2
StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] }
└── StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { left table: 4, right table: 5 }
├── StreamFilter { predicate: (event_type = 2:Int32) }
- │ └── StreamExchange NoShuffle from 2
- └── StreamExchange Broadcast from 3
+ │ └── StreamExchange NoShuffle from 3
+ └── StreamExchange Broadcast from 4
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
└── StreamRowIdGen { row_id_index: 5 }
└── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 6 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
└── StreamNow { output: [now] } { state table: 7 }
- Fragment 4
+ Fragment 5
StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] }
└── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr3, $expr4, $expr5, $expr6, $expr7, $expr8, _row_id ], primary key: [ $0 ASC, $6 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
@@ -1250,7 +1258,12 @@
Table 7 { columns: [ now ], primary key: [], value indices: [ 0 ], distribution key: [], read pk prefix len hint: 0 }
- Table 4294967294 { columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ], primary key: [ $14 ASC, $15 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
+ Table 4294967294
+ ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ]
+ ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ]
+ ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
+ ├── distribution key: [ 0, 14, 15 ]
+ └── read pk prefix len hint: 3
- id: nexmark_q21
before:
@@ -1375,61 +1388,65 @@
) b ON a.id = b.auction;
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 1:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamProject { exprs: [$expr5, max($expr6)] }
- └─StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] }
- └─StreamExchange { dist: HashShard($expr5) }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] }
- └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
- ├─StreamFilter { predicate: (event_type = 2:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamExchange { dist: Broadcast }
- └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
- └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamProject { exprs: [$expr5, max($expr6)] }
+ └─StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] }
+ └─StreamExchange { dist: HashShard($expr5) }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] }
+ └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+ ├─StreamFilter { predicate: (event_type = 2:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: Broadcast }
+ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
+ └─StreamNow { output: [now] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [$expr5, max($expr6)] }
- └── StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } { intermediate state table: 6, state tables: [ 5 ], distinct tables: [] }
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 3]) from 1
Fragment 1
+ StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [$expr5, max($expr6)] }
+ └── StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } { intermediate state table: 6, state tables: [ 5 ], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
└── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] }
└── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { left table: 7, right table: 8 }
├── StreamFilter { predicate: (event_type = 2:Int32) }
- │ └── StreamExchange NoShuffle from 2
- └── StreamExchange Broadcast from 4
+ │ └── StreamExchange NoShuffle from 3
+ └── StreamExchange Broadcast from 5
- Fragment 4
+ Fragment 5
StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
└── StreamNow { output: [now] } { state table: 9 }
@@ -1457,7 +1474,7 @@
├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, $expr5 ]
├── primary key: [ $3 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3, 4 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 3 ]
└── read pk prefix len hint: 2
- id: nexmark_q102
@@ -1642,65 +1659,69 @@
);
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 1:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamProject { exprs: [$expr5] }
- └─StreamFilter { predicate: (count >= 20:Int32) }
- └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
- └─StreamExchange { dist: HashShard($expr5) }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
- └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
- ├─StreamFilter { predicate: (event_type = 2:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamExchange { dist: Broadcast }
- └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
- └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamProject { exprs: [$expr5] }
+ └─StreamFilter { predicate: (count >= 20:Int32) }
+ └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
+ └─StreamExchange { dist: HashShard($expr5) }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
+ └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+ ├─StreamFilter { predicate: (event_type = 2:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: Broadcast }
+ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
+ └─StreamNow { output: [now] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [$expr5] }
- └── StreamFilter { predicate: (count >= 20:Int32) }
- └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 2]) from 1
Fragment 1
+ StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [$expr5] }
+ └── StreamFilter { predicate: (count >= 20:Int32) }
+ └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
└── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
└── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
├── left table: 6
├── right table: 7
├── StreamFilter { predicate: (event_type = 2:Int32) }
- │ └── StreamExchange NoShuffle from 2
- └── StreamExchange Broadcast from 4
+ │ └── StreamExchange NoShuffle from 3
+ └── StreamExchange Broadcast from 5
- Fragment 4
+ Fragment 5
StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
└── StreamNow { output: [now] } { state table: 8 }
@@ -1731,7 +1752,7 @@
├── columns: [ auction_id, auction_item_name, _row_id ]
├── primary key: [ $2 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 2 ]
└── read pk prefix len hint: 2
- id: nexmark_q104
@@ -1752,65 +1773,69 @@
);
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 1:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamProject { exprs: [$expr5] }
- └─StreamFilter { predicate: (count < 20:Int32) }
- └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
- └─StreamExchange { dist: HashShard($expr5) }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
- └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
- ├─StreamFilter { predicate: (event_type = 2:Int32) }
- │ └─StreamShare { id: 5 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamExchange { dist: Broadcast }
- └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
- └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id) }
+ └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamProject { exprs: [$expr5] }
+ └─StreamFilter { predicate: (count < 20:Int32) }
+ └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
+ └─StreamExchange { dist: HashShard($expr5) }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
+ └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+ ├─StreamFilter { predicate: (event_type = 2:Int32) }
+ │ └─StreamShare { id: 5 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: Broadcast }
+ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
+ └─StreamNow { output: [now] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [$expr5] }
- └── StreamFilter { predicate: (count < 20:Int32) }
- └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 2]) from 1
Fragment 1
+ StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [$expr5] }
+ └── StreamFilter { predicate: (count < 20:Int32) }
+ └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
└── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
└── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
├── left table: 6
├── right table: 7
├── StreamFilter { predicate: (event_type = 2:Int32) }
- │ └── StreamExchange NoShuffle from 2
- └── StreamExchange Broadcast from 4
+ │ └── StreamExchange NoShuffle from 3
+ └── StreamExchange Broadcast from 5
- Fragment 4
+ Fragment 5
StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
└── StreamNow { output: [now] } { state table: 8 }
@@ -1841,7 +1866,7 @@
├── columns: [ auction_id, auction_item_name, _row_id ]
├── primary key: [ $2 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 2 ]
└── read pk prefix len hint: 2
- id: nexmark_q105
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml
index 3554e31d281ec..39adc39a16653 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml
@@ -131,45 +131,49 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), $expr3(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, $expr3], pk_columns: [_row_id, _row_id#1, $expr3], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
- ├─StreamExchange { dist: HashShard($expr3) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [event_type, person, auction, _row_id] }
- │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamExchange { dist: HashShard($expr4) }
- └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] }
- └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [event_type, person, auction, _row_id] }
- └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard(_row_id, $expr3, _row_id) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
+ ├─StreamExchange { dist: HashShard($expr3) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+ │ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr4) }
+ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] }
+ └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+ └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), $expr3(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, $expr3], pk_columns: [_row_id, _row_id#1, $expr3], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([1]) from 1
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([4, 5, 6]) from 1
Fragment 1
+ StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([1]) from 2
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, person, auction, _row_id] }
└── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -177,10 +181,10 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] }
└── StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 1 ], read pk prefix len hint: 1 }
@@ -194,7 +198,7 @@
Table 5 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ name, city, state, id, _row_id, $expr3, _row_id#1 ], primary key: [ $4 ASC, $6 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 5 ], read pk prefix len hint: 3 }
+ Table 4294967294 { columns: [ name, city, state, id, _row_id, $expr3, _row_id#1 ], primary key: [ $4 ASC, $6 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 4, 5, 6 ], read pk prefix len hint: 3 }
eowc_stream_error: |-
Not supported: The query cannot be executed in Emit-On-Window-Close mode.
@@ -696,43 +700,48 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] }
- └─StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
- ├─StreamExchange { dist: HashShard($expr4) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: (event_type = 2:Int32) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamExchange { dist: HashShard(max($expr4)) }
- └─StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] }
- └─StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] }
- └─StreamExchange { dist: HashShard($expr5) }
- └─StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
- └─StreamFilter { predicate: (event_type = 2:Int32) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr4, _row_id, $expr5) }
+ └─StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
+ ├─StreamExchange { dist: HashShard($expr4) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: (event_type = 2:Int32) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard(max($expr4)) }
+ └─StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] }
+ └─StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] }
+ └─StreamExchange { dist: HashShard($expr5) }
+ └─StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
+ └─StreamFilter { predicate: (event_type = 2:Int32) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
- StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] } { materialized table: 4294967294 }
- └── StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
- ├── left table: 0
- ├── right table: 2
- ├── left degree table: 1
- ├── right degree table: 3
- ├── StreamExchange Hash([2]) from 1
- └── StreamExchange Hash([1]) from 3
+ StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] }
+ ├── materialized table: 4294967294
+ └── StreamExchange Hash([1, 4, 5]) from 1
Fragment 1
- StreamNoOp
- └── StreamExchange NoShuffle from 2
+ StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
+ ├── left table: 0
+ ├── right table: 2
+ ├── left degree table: 1
+ ├── right degree table: 3
+ ├── StreamExchange Hash([2]) from 2
+ └── StreamExchange Hash([1]) from 4
Fragment 2
+ StreamNoOp
+ └── StreamExchange NoShuffle from 3
+
+ Fragment 3
StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: (event_type = 2:Int32) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -740,14 +749,14 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] }
└── StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] } { intermediate state table: 6, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 4
+ └── StreamExchange Hash([0]) from 5
- Fragment 4
+ Fragment 5
StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr2, $expr3, $expr4, $expr1, _row_id ], primary key: [ $2 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4 ], distribution key: [ 2 ], read pk prefix len hint: 1 }
@@ -763,7 +772,7 @@
Table 6 { columns: [ $expr5, max($expr4), count ], primary key: [ $0 ASC ], value indices: [ 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ auction, price, bidder, date_time, _row_id, $expr5 ], primary key: [ $4 ASC, $5 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 1 ], read pk prefix len hint: 3 }
+ Table 4294967294 { columns: [ auction, price, bidder, date_time, _row_id, $expr5 ], primary key: [ $4 ASC, $5 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 1, 4, 5 ], read pk prefix len hint: 3 }
eowc_stream_plan: |-
StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time] }
@@ -845,52 +854,56 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime, $expr5(hidden), $expr6(hidden), $expr8(hidden)] }
- └─StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all }
- ├─StreamExchange { dist: HashShard($expr3, $expr2, $expr5) }
- │ └─StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] }
- │ └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) }
- │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] }
- │ └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] }
- │ └─StreamFilter { predicate: (event_type = 0:Int32) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] }
- └─StreamExchange { dist: HashShard($expr7, $expr6, $expr8) }
- └─StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] }
- └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] }
- └─StreamFilter { predicate: (event_type = 1:Int32) }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
- └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) }
+ └─StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all }
+ ├─StreamExchange { dist: HashShard($expr3, $expr2, $expr5) }
+ │ └─StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] }
+ │ └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) }
+ │ └─StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] }
+ │ └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] }
+ │ └─StreamFilter { predicate: (event_type = 0:Int32) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] }
+ └─StreamExchange { dist: HashShard($expr7, $expr6, $expr8) }
+ └─StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] }
+ └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] }
+ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
+ └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime, $expr5(hidden), $expr6(hidden), $expr8(hidden)] }
├── materialized table: 4294967294
- └── StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0, 2, 3]) from 1
- └── StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } { state table: 7 }
- └── StreamExchange Hash([0, 1, 2]) from 4
+ └── StreamExchange Hash([0, 1, 2, 3]) from 1
Fragment 1
- StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } { state table: 4 }
- └── StreamExchange Hash([0, 1, 2, 3]) from 2
+ StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0, 2, 3]) from 2
+ └── StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } { state table: 7 }
+ └── StreamExchange Hash([0, 1, 2]) from 5
Fragment 2
+ StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } { state table: 4 }
+ └── StreamExchange Hash([0, 1, 2, 3]) from 3
+
+ Fragment 3
StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] }
└── StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] }
└── StreamFilter { predicate: (event_type = 0:Int32) }
- └── StreamExchange NoShuffle from 3
+ └── StreamExchange NoShuffle from 4
- Fragment 3
+ Fragment 4
StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -898,11 +911,11 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 6 }
- Fragment 4
+ Fragment 5
StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] }
└── StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 3
+ └── StreamExchange NoShuffle from 4
Table 0 { columns: [ $expr3, $expr4, $expr2, $expr5 ], primary key: [ $2 ASC, $3 ASC, $0 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 }
@@ -920,7 +933,7 @@
Table 7 { columns: [ $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
- Table 4294967294 { columns: [ id, name, starttime, $expr5, $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 4 }
+ Table 4294967294 { columns: [ id, name, starttime, $expr5, $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 1, 2, 3 ], read pk prefix len hint: 4 }
eowc_stream_plan: |-
StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime] }
@@ -1715,41 +1728,45 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
- └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: (event_type = 2:Int32) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamExchange { dist: HashShard($expr7) }
- └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] }
- └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
- └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id, _row_id) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: (event_type = 2:Int32) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ │ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr7) }
+ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] }
+ └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+ └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 14, 15]) from 1
Fragment 1
+ StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: (event_type = 2:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -1757,10 +1774,10 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] }
└── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr2, $expr3, $expr4, $expr5, $expr6, $expr1, _row_id ], primary key: [ $0 ASC, $6 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
@@ -1774,7 +1791,12 @@
Table 5 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ], primary key: [ $14 ASC, $15 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
+ Table 4294967294
+ ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ]
+ ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ]
+ ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
+ ├── distribution key: [ 0, 14, 15 ]
+ └── read pk prefix len hint: 3
eowc_stream_error: |-
Not supported: The query cannot be executed in Emit-On-Window-Close mode.
@@ -1909,45 +1931,49 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr4(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 1:Int32) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamProject { exprs: [$expr4, max($expr5)] }
- └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] }
- └─StreamExchange { dist: HashShard($expr4) }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] }
- └─StreamFilter { predicate: (event_type = 2:Int32) }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
- └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamProject { exprs: [$expr4, max($expr5)] }
+ └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] }
+ └─StreamExchange { dist: HashShard($expr4) }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] }
+ └─StreamFilter { predicate: (event_type = 2:Int32) }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr4(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [$expr4, max($expr5)] }
- └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 3]) from 1
Fragment 1
+ StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [$expr4, max($expr5)] }
+ └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, _row_id] }
└── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -1955,10 +1981,10 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] }
└── StreamFilter { predicate: (event_type = 2:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
@@ -1978,7 +2004,7 @@
├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, $expr4 ]
├── primary key: [ $3 ASC, $0 ASC ]
├── value indices: [ 0, 1, 2, 3, 4 ]
- ├── distribution key: [ 0 ]
+ ├── distribution key: [ 0, 3 ]
└── read pk prefix len hint: 2
eowc_stream_error: |-
@@ -2184,47 +2210,51 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 1:Int32) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamProject { exprs: [$expr4] }
- └─StreamFilter { predicate: (count >= 20:Int32) }
- └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
- └─StreamExchange { dist: HashShard($expr4) }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
- └─StreamFilter { predicate: (event_type = 2:Int32) }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
- └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamProject { exprs: [$expr4] }
+ └─StreamFilter { predicate: (count >= 20:Int32) }
+ └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
+ └─StreamExchange { dist: HashShard($expr4) }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
+ └─StreamFilter { predicate: (event_type = 2:Int32) }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [$expr4] }
- └── StreamFilter { predicate: (count >= 20:Int32) }
- └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 2]) from 1
Fragment 1
+ StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [$expr4] }
+ └── StreamFilter { predicate: (count >= 20:Int32) }
+ └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, _row_id] }
└── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -2232,10 +2262,10 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
└── StreamFilter { predicate: (event_type = 2:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
@@ -2251,7 +2281,7 @@
Table 6 { columns: [ $expr4, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 2 }
+ Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 2 ], read pk prefix len hint: 2 }
eowc_stream_error: |-
Not supported: The query cannot be executed in Emit-On-Window-Close mode.
@@ -2290,47 +2320,51 @@
└─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all }
- ├─StreamExchange { dist: HashShard($expr2) }
- │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
- │ └─StreamFilter { predicate: (event_type = 1:Int32) }
- │ └─StreamShare { id: 6 }
- │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
- │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- │ └─StreamRowIdGen { row_id_index: 5 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
- └─StreamProject { exprs: [$expr4] }
- └─StreamFilter { predicate: (count < 20:Int32) }
- └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
- └─StreamExchange { dist: HashShard($expr4) }
- └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
- └─StreamFilter { predicate: (event_type = 2:Int32) }
- └─StreamShare { id: 6 }
- └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
- └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
- └─StreamRowIdGen { row_id_index: 5 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
- └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
- └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamExchange { dist: HashShard($expr2, _row_id) }
+ └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all }
+ ├─StreamExchange { dist: HashShard($expr2) }
+ │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+ │ └─StreamFilter { predicate: (event_type = 1:Int32) }
+ │ └─StreamShare { id: 6 }
+ │ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+ │ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ │ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ │ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+ └─StreamProject { exprs: [$expr4] }
+ └─StreamFilter { predicate: (count < 20:Int32) }
+ └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
+ └─StreamExchange { dist: HashShard($expr4) }
+ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
+ └─StreamFilter { predicate: (event_type = 2:Int32) }
+ └─StreamShare { id: 6 }
+ └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+ └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+ └─StreamRowIdGen { row_id_index: 5 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+ └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+ └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [$expr4] }
- └── StreamFilter { predicate: (count < 20:Int32) }
- └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
- └── StreamExchange Hash([0]) from 3
+ └── StreamExchange Hash([0, 2]) from 1
Fragment 1
+ StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [$expr4] }
+ └── StreamFilter { predicate: (count < 20:Int32) }
+ └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 2
StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
└── StreamFilter { predicate: (event_type = 1:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 2
+ Fragment 3
StreamProject { exprs: [event_type, auction, bid, _row_id] }
└── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
└── StreamRowIdGen { row_id_index: 5 }
@@ -2338,10 +2372,10 @@
└── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
└── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
- Fragment 3
+ Fragment 4
StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
└── StreamFilter { predicate: (event_type = 2:Int32) }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
@@ -2357,7 +2391,7 @@
Table 6 { columns: [ $expr4, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 2 }
+ Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 2 ], read pk prefix len hint: 2 }
eowc_stream_error: |-
Not supported: The query cannot be executed in Emit-On-Window-Close mode.
diff --git a/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml b/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml
index 5cc81578f829c..733a19f4ba05c 100644
--- a/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml
@@ -185,15 +185,16 @@
└─BatchScan { table: t, columns: [t.x, t.y, t.w], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [x, y, sum, max, min, t._row_id(hidden), t.y(hidden)], stream_key: [t._row_id, y], pk_columns: [t._row_id, y], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.y = t.y, output: [t.x, t.y, sum(t.x), max(t.x), min(t.w), t._row_id, t.y] }
- ├─StreamExchange { dist: HashShard(t.y) }
- │ └─StreamShare { id: 1 }
- │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamProject { exprs: [t.y, sum(t.x), max(t.x), min(t.w)] }
- └─StreamHashAgg { group_key: [t.y], aggs: [sum(t.x), max(t.x), min(t.w), count] }
- └─StreamExchange { dist: HashShard(t.y) }
- └─StreamShare { id: 1 }
- └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.y, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.y = t.y, output: [t.x, t.y, sum(t.x), max(t.x), min(t.w), t._row_id, t.y] }
+ ├─StreamExchange { dist: HashShard(t.y) }
+ │ └─StreamShare { id: 1 }
+ │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamProject { exprs: [t.y, sum(t.x), max(t.x), min(t.w)] }
+ └─StreamHashAgg { group_key: [t.y], aggs: [sum(t.x), max(t.x), min(t.w), count] }
+ └─StreamExchange { dist: HashShard(t.y) }
+ └─StreamShare { id: 1 }
+ └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- id: aggregate with over clause, rows frame definition with implicit current row, without ORDER BY
sql: |
create table t(x int, y int);
@@ -913,12 +914,13 @@
└─BatchScan { table: t, columns: [t.x, t.y, t.z], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [t1x, t2x, t1z, t2y, t2z, t._row_id(hidden)], stream_key: [t1x, t._row_id], pk_columns: [t1x, t._row_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.x = t.x, output: [t.x, t.x, t.z, t.y, t.z, t._row_id] }
- ├─StreamGroupTopN { order: [t.y ASC], limit: 1, offset: 0, group_key: [t.x] }
- │ └─StreamExchange { dist: HashShard(t.x) }
- │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(t.x) }
- └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.x, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.x = t.x, output: [t.x, t.x, t.z, t.y, t.z, t._row_id] }
+ ├─StreamGroupTopN { order: [t.y ASC], limit: 1, offset: 0, group_key: [t.x] }
+ │ └─StreamExchange { dist: HashShard(t.x) }
+ │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.x) }
+ └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- id: split calls with different ORDER BY or PARTITION BY
sql: |
create table t(x int, y int, z int);
diff --git a/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml b/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml
index 55131ed1614cd..65469e7754e6b 100644
--- a/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml
@@ -21,15 +21,16 @@
Tone.id = Ttwo.id;
stream_plan: |-
StreamMaterialize { columns: [max_v1, max_v2, t1.id(hidden), t2.id(hidden)], stream_key: [t1.id], pk_columns: [t1.id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.id = t2.id, output: [max(t1.v1), max(t2.v2), t1.id, t2.id] }
- ├─StreamProject { exprs: [t1.id, max(t1.v1)] }
- │ └─StreamHashAgg { group_key: [t1.id], aggs: [max(t1.v1), count] }
- │ └─StreamExchange { dist: HashShard(t1.id) }
- │ └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamProject { exprs: [t2.id, max(t2.v2)] }
- └─StreamHashAgg { group_key: [t2.id], aggs: [max(t2.v2), count] }
- └─StreamExchange { dist: HashShard(t2.id) }
- └─StreamTableScan { table: t2, columns: [t2.id, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.id = t2.id, output: [max(t1.v1), max(t2.v2), t1.id, t2.id] }
+ ├─StreamProject { exprs: [t1.id, max(t1.v1)] }
+ │ └─StreamHashAgg { group_key: [t1.id], aggs: [max(t1.v1), count] }
+ │ └─StreamExchange { dist: HashShard(t1.id) }
+ │ └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamProject { exprs: [t2.id, max(t2.v2)] }
+ └─StreamHashAgg { group_key: [t2.id], aggs: [max(t2.v2), count] }
+ └─StreamExchange { dist: HashShard(t2.id) }
+ └─StreamTableScan { table: t2, columns: [t2.id, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- sql: |
create table t (id int, v int);
SELECT Tone.max_v, Ttwo.min_v
@@ -51,15 +52,16 @@
Tone.id = Ttwo.id;
stream_plan: |-
StreamMaterialize { columns: [max_v, min_v, t.id(hidden), t.id#1(hidden)], stream_key: [t.id], pk_columns: [t.id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.id = t.id, output: [max(t.v), min(t.v), t.id, t.id] }
- ├─StreamProject { exprs: [t.id, max(t.v)] }
- │ └─StreamHashAgg { group_key: [t.id], aggs: [max(t.v), count] }
- │ └─StreamExchange { dist: HashShard(t.id) }
- │ └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamProject { exprs: [t.id, min(t.v)] }
- └─StreamHashAgg { group_key: [t.id], aggs: [min(t.v), count] }
- └─StreamExchange { dist: HashShard(t.id) }
- └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.id) }
+ └─StreamHashJoin { type: Inner, predicate: t.id = t.id, output: [max(t.v), min(t.v), t.id, t.id] }
+ ├─StreamProject { exprs: [t.id, max(t.v)] }
+ │ └─StreamHashAgg { group_key: [t.id], aggs: [max(t.v), count] }
+ │ └─StreamExchange { dist: HashShard(t.id) }
+ │ └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamProject { exprs: [t.id, min(t.v)] }
+ └─StreamHashAgg { group_key: [t.id], aggs: [min(t.v), count] }
+ └─StreamExchange { dist: HashShard(t.id) }
+ └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- sql: |
create table t (v1 varchar, v2 varchar, v3 varchar);
select
diff --git a/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml b/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml
index ae37459ef7bed..91dff73df0e6a 100644
--- a/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml
@@ -260,15 +260,16 @@
└─LogicalScan { table: t2, columns: [t2.v2], predicate: (t2.v2 > ('2021-04-01 00:00:00+00:00':Timestamptz + '01:00:00':Interval)) }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1], pk_columns: [t1._row_id, t2._row_id, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
- │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamProject { exprs: [AddWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: HashShard(t2.v2) }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
+ │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamProject { exprs: [AddWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard(t2.v2) }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: now() in a complex cmp expr does not get pushed down
sql: |
create table t1(v1 timestamp with time zone);
@@ -343,14 +344,15 @@
└─LogicalScan { table: t2, columns: [t2.v2], predicate: (t2.v2 > '2021-04-01 00:00:00+00:00':Timestamptz) }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1], pk_columns: [t1._row_id, t2._row_id, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamDynamicFilter { predicate: (t1.v1 > now), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
- │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: HashShard(t2.v2) }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamDynamicFilter { predicate: (t1.v1 > now), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
+ │ ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard(t2.v2) }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: eq-predicate derived condition is banned for mismatching types
sql: |
create table t1(v1 int, v2 int);
diff --git a/src/frontend/planner_test/tests/testdata/output/project_set.yaml b/src/frontend/planner_test/tests/testdata/output/project_set.yaml
index 23db668a070df..676772d99d72e 100644
--- a/src/frontend/planner_test/tests/testdata/output/project_set.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/project_set.yaml
@@ -155,17 +155,18 @@
└─BatchScan { table: t, columns: [t.x], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [unnest, t._row_id(hidden), projected_row_id(hidden), t._row_id#1(hidden), projected_row_id#1(hidden)], stream_key: [t._row_id, projected_row_id, t._row_id#1, projected_row_id#1, unnest], pk_columns: [t._row_id, projected_row_id, t._row_id#1, projected_row_id#1, unnest], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id] }
- ├─StreamExchange { dist: HashShard(Unnest($0)) }
- │ └─StreamShare { id: 3 }
- │ └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
- │ └─StreamProjectSet { select_list: [Unnest($0), $1] }
- │ └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(Unnest($0)) }
- └─StreamShare { id: 3 }
- └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
- └─StreamProjectSet { select_list: [Unnest($0), $1] }
- └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id) }
+ └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id] }
+ ├─StreamExchange { dist: HashShard(Unnest($0)) }
+ │ └─StreamShare { id: 3 }
+ │ └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
+ │ └─StreamProjectSet { select_list: [Unnest($0), $1] }
+ │ └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(Unnest($0)) }
+ └─StreamShare { id: 3 }
+ └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
+ └─StreamProjectSet { select_list: [Unnest($0), $1] }
+ └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- name: issue-10080
sql: |
with cte as (SELECT 1 as v1, unnest(array[1,2,3,4,5]) AS v2) select v1 from cte;
diff --git a/src/frontend/planner_test/tests/testdata/output/select_except.yaml b/src/frontend/planner_test/tests/testdata/output/select_except.yaml
index 2193524b7076f..ffd6da30b90bc 100644
--- a/src/frontend/planner_test/tests/testdata/output/select_except.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/select_except.yaml
@@ -34,11 +34,12 @@
└─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v3, v2, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v1], pk_columns: [t._row_id, t._row_id#1, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v3, t.v2, t._row_id, t._row_id] }
- ├─StreamExchange { dist: HashShard(t.v1) }
- │ └─StreamTableScan { table: t, columns: [t.v1, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(t.v1) }
- └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v3, t.v2, t._row_id, t._row_id] }
+ ├─StreamExchange { dist: HashShard(t.v1) }
+ │ └─StreamTableScan { table: t, columns: [t.v1, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v1) }
+ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- name: qualified wildcard
sql: |
create table t (v1 int, v2 int, v3 int);
@@ -52,11 +53,12 @@
└─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, v2, v3, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v1], pk_columns: [t._row_id, t._row_id#1, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v2, t.v3, t._row_id, t._row_id] }
- ├─StreamExchange { dist: HashShard(t.v1) }
- │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- └─StreamExchange { dist: HashShard(t.v1) }
- └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v2, t.v3, t._row_id, t._row_id] }
+ ├─StreamExchange { dist: HashShard(t.v1) }
+ │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+ └─StreamExchange { dist: HashShard(t.v1) }
+ └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
- name: except with unknown column
sql: |
create table t (v1 int, v2 int, v3 int);
diff --git a/src/frontend/planner_test/tests/testdata/output/share.yaml b/src/frontend/planner_test/tests/testdata/output/share.yaml
index 2815b00784b1d..15404d6d863ab 100644
--- a/src/frontend/planner_test/tests/testdata/output/share.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/share.yaml
@@ -343,24 +343,25 @@
└─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [a_id, b_id, a_ts, b_ts, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, a_id], pk_columns: [_row_id, _row_id#1, a_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: id = id, output: [id, id, date_time, date_time, _row_id, _row_id] }
- ├─StreamExchange { dist: HashShard(id) }
- │ └─StreamProject { exprs: [id, date_time, _row_id] }
- │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [id, date_time, $expr1, _row_id], cleaned_by_watermark: true }
- │ ├─StreamProject { exprs: [id, date_time, AtTimeZone(date_time, 'UTC':Varchar) as $expr1, _row_id] }
- │ │ └─StreamFilter { predicate: (initial_bid = 1:Int32) }
- │ │ └─StreamShare { id: 4 }
- │ │ └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
- │ │ └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
- │ │ └─StreamRowIdGen { row_id_index: 10 }
- │ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:00:01':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: HashShard(id) }
- └─StreamFilter { predicate: (initial_bid = 2:Int32) }
- └─StreamShare { id: 4 }
- └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
- └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
- └─StreamRowIdGen { row_id_index: 10 }
- └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ └─StreamExchange { dist: HashShard(id, _row_id, _row_id) }
+ └─StreamHashJoin { type: Inner, predicate: id = id, output: [id, id, date_time, date_time, _row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard(id) }
+ │ └─StreamProject { exprs: [id, date_time, _row_id] }
+ │ └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [id, date_time, $expr1, _row_id], cleaned_by_watermark: true }
+ │ ├─StreamProject { exprs: [id, date_time, AtTimeZone(date_time, 'UTC':Varchar) as $expr1, _row_id] }
+ │ │ └─StreamFilter { predicate: (initial_bid = 1:Int32) }
+ │ │ └─StreamShare { id: 4 }
+ │ │ └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
+ │ │ └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
+ │ │ └─StreamRowIdGen { row_id_index: 10 }
+ │ │ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:00:01':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard(id) }
+ └─StreamFilter { predicate: (initial_bid = 2:Int32) }
+ └─StreamShare { id: 4 }
+ └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
+ └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
+ └─StreamRowIdGen { row_id_index: 10 }
+ └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
diff --git a/src/frontend/planner_test/tests/testdata/output/shared_views.yaml b/src/frontend/planner_test/tests/testdata/output/shared_views.yaml
index 775812f77b59c..3777705c97ced 100644
--- a/src/frontend/planner_test/tests/testdata/output/shared_views.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/shared_views.yaml
@@ -23,19 +23,20 @@
└─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] }
stream_plan: |-
StreamMaterialize { columns: [z, a, b, t1._row_id(hidden), t1._row_id#1(hidden), t1._row_id#2(hidden), t1.x(hidden)], stream_key: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, z], pk_columns: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, z], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x] }
- ├─StreamExchange { dist: HashShard($expr1) }
- │ └─StreamShare { id: 3 }
- │ └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
- │ └─StreamFilter { predicate: (t1.y > 0:Int32) }
- │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard($expr2) }
- └─StreamProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3, t1._row_id, t1._row_id, t1.x] }
- └─StreamHashJoin { type: Inner, predicate: t1.x = $expr1, output: [t1.x, t1.y, $expr1, t1._row_id, t1._row_id] }
- ├─StreamExchange { dist: HashShard(t1.x) }
- │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard($expr1) }
- └─StreamShare { id: 3 }
- └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
- └─StreamFilter { predicate: (t1.y > 0:Int32) }
- └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard($expr1, t1._row_id, t1._row_id, t1._row_id, t1.x) }
+ └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x] }
+ ├─StreamExchange { dist: HashShard($expr1) }
+ │ └─StreamShare { id: 3 }
+ │ └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
+ │ └─StreamFilter { predicate: (t1.y > 0:Int32) }
+ │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard($expr2) }
+ └─StreamProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3, t1._row_id, t1._row_id, t1.x] }
+ └─StreamHashJoin { type: Inner, predicate: t1.x = $expr1, output: [t1.x, t1.y, $expr1, t1._row_id, t1._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.x) }
+ │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard($expr1) }
+ └─StreamShare { id: 3 }
+ └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
+ └─StreamFilter { predicate: (t1.y > 0:Int32) }
+ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/subquery.yaml b/src/frontend/planner_test/tests/testdata/output/subquery.yaml
index 1ad1d9f92c418..e07e84e040929 100644
--- a/src/frontend/planner_test/tests/testdata/output/subquery.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/subquery.yaml
@@ -346,21 +346,22 @@
└─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [date_time, window_start, window_end, auction._row_id(hidden)], stream_key: [auction._row_id, window_start, window_end, date_time], pk_columns: [auction._row_id, window_start, window_end, date_time], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all }
- ├─StreamExchange { dist: HashShard(auction.date_time) }
- │ └─StreamShare { id: 3 }
- │ └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
- │ └─StreamFilter { predicate: IsNotNull(auction.date_time) }
- │ └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
- └─StreamProject { exprs: [auction.date_time] }
- └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
- └─StreamProject { exprs: [auction.date_time] }
- └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
- └─StreamExchange { dist: HashShard(auction.date_time) }
- └─StreamShare { id: 3 }
- └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
- └─StreamFilter { predicate: IsNotNull(auction.date_time) }
- └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
+ └─StreamExchange { dist: HashShard(auction.date_time, window_start, window_end, auction._row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all }
+ ├─StreamExchange { dist: HashShard(auction.date_time) }
+ │ └─StreamShare { id: 3 }
+ │ └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
+ │ └─StreamFilter { predicate: IsNotNull(auction.date_time) }
+ │ └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
+ └─StreamProject { exprs: [auction.date_time] }
+ └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
+ └─StreamProject { exprs: [auction.date_time] }
+ └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
+ └─StreamExchange { dist: HashShard(auction.date_time) }
+ └─StreamShare { id: 3 }
+ └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
+ └─StreamFilter { predicate: IsNotNull(auction.date_time) }
+ └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
- sql: |
CREATE TABLE t (v int);
SELECT 1 FROM t AS t_inner WHERE EXISTS ( SELECT 1 HAVING t_inner.v > 1);
@@ -535,22 +536,23 @@
└─BatchScan { table: t, columns: [t.x], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [x, y, k, sum_x, t.x(hidden)], stream_key: [k, x], pk_columns: [k, x], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, t.y, t.k, sum(Unnest($0)), t.x] }
- ├─StreamExchange { dist: HashShard(t.x) }
- │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
- └─StreamProject { exprs: [t.x, sum(Unnest($0))] }
- └─StreamHashAgg { group_key: [t.x], aggs: [sum(Unnest($0)), count] }
- └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, Unnest($0), t.x, projected_row_id] }
- ├─StreamProject { exprs: [t.x] }
- │ └─StreamHashAgg { group_key: [t.x], aggs: [count] }
- │ └─StreamExchange { dist: HashShard(t.x) }
- │ └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
- └─StreamProject { exprs: [t.x, Unnest($0), projected_row_id] }
- └─StreamProjectSet { select_list: [$0, Unnest($0)] }
- └─StreamProject { exprs: [t.x] }
- └─StreamHashAgg { group_key: [t.x], aggs: [count] }
- └─StreamExchange { dist: HashShard(t.x) }
- └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
+ └─StreamExchange { dist: HashShard(t.x, t.k) }
+ └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, t.y, t.k, sum(Unnest($0)), t.x] }
+ ├─StreamExchange { dist: HashShard(t.x) }
+ │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
+ └─StreamProject { exprs: [t.x, sum(Unnest($0))] }
+ └─StreamHashAgg { group_key: [t.x], aggs: [sum(Unnest($0)), count] }
+ └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, Unnest($0), t.x, projected_row_id] }
+ ├─StreamProject { exprs: [t.x] }
+ │ └─StreamHashAgg { group_key: [t.x], aggs: [count] }
+ │ └─StreamExchange { dist: HashShard(t.x) }
+ │ └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
+ └─StreamProject { exprs: [t.x, Unnest($0), projected_row_id] }
+ └─StreamProjectSet { select_list: [$0, Unnest($0)] }
+ └─StreamProject { exprs: [t.x] }
+ └─StreamHashAgg { group_key: [t.x], aggs: [count] }
+ └─StreamExchange { dist: HashShard(t.x) }
+ └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
- name: CorrelatedInputRef in ProjectSet and apply on condition is true.
sql: |
create table t(x int[], y int[], k int primary key);
@@ -582,29 +584,29 @@
create table t(x int[], y int[], k int primary key);
select *, (select sum(i) from (select unnest(x) i, 1 c) Q where k = c ) as sum_x from t;
optimized_logical_plan_for_batch: |-
- LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
+ LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
├─LogicalScan { table: t, columns: [t.x, t.y, t.k] }
- └─LogicalAgg { group_key: [first_value(t.x order_by(t.x ASC)), t.k], aggs: [sum(Unnest($0))] }
- └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(first_value(t.x order_by(t.x ASC)), first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
- ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+ └─LogicalAgg { group_key: [internal_last_seen_value(t.x), t.k], aggs: [sum(Unnest($0))] }
+ └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(internal_last_seen_value(t.x), internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
+ ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
│ └─LogicalScan { table: t, columns: [t.x, t.k] }
- └─LogicalProject { exprs: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
+ └─LogicalProject { exprs: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
└─LogicalProjectSet { select_list: [$0, $1, Unnest($0)] }
- └─LogicalJoin { type: Inner, on: true, output: [first_value(t.x order_by(t.x ASC)), t.k] }
- ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+ └─LogicalJoin { type: Inner, on: true, output: [internal_last_seen_value(t.x), t.k] }
+ ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
│ └─LogicalScan { table: t, columns: [t.x, t.k], predicate: (t.k = 1:Int32) }
└─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
optimized_logical_plan_for_stream: |-
- LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
+ LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
├─LogicalScan { table: t, columns: [t.x, t.y, t.k] }
- └─LogicalAgg { group_key: [first_value(t.x order_by(t.x ASC)), t.k], aggs: [sum(Unnest($0))] }
- └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(first_value(t.x order_by(t.x ASC)), first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
- ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+ └─LogicalAgg { group_key: [internal_last_seen_value(t.x), t.k], aggs: [sum(Unnest($0))] }
+ └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(internal_last_seen_value(t.x), internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
+ ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
│ └─LogicalScan { table: t, columns: [t.x, t.k] }
- └─LogicalProject { exprs: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
+ └─LogicalProject { exprs: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
└─LogicalProjectSet { select_list: [$0, $1, Unnest($0)] }
- └─LogicalJoin { type: Inner, on: true, output: [first_value(t.x order_by(t.x ASC)), t.k] }
- ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+ └─LogicalJoin { type: Inner, on: true, output: [internal_last_seen_value(t.x), t.k] }
+ ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
│ └─LogicalScan { table: t, columns: [t.x, t.k], predicate: (t.k = 1:Int32) }
└─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
- name: CorrelatedInputRef in ProjectSet and apply on condition refers to table function.
@@ -632,16 +634,17 @@
└─BatchScan { table: integers, columns: [integers.i], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [i, col, integers._row_id(hidden), integers.i(hidden)], stream_key: [integers._row_id, i], pk_columns: [i, integers._row_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, row_number, integers._row_id, integers.i] }
- ├─StreamExchange { dist: HashShard(integers.i) }
- │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
- └─StreamProject { exprs: [integers.i, row_number, integers._row_id] }
- └─StreamOverWindow { window_functions: [row_number() OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
- └─StreamExchange { dist: HashShard(integers.i) }
- └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
- └─StreamFilter { predicate: IsNotNull(integers.i) }
- └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamExchange { dist: HashShard(integers.i, integers._row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, row_number, integers._row_id, integers.i] }
+ ├─StreamExchange { dist: HashShard(integers.i) }
+ │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
+ └─StreamProject { exprs: [integers.i, row_number, integers._row_id] }
+ └─StreamOverWindow { window_functions: [row_number() OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
+ └─StreamExchange { dist: HashShard(integers.i) }
+ └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
+ └─StreamFilter { predicate: IsNotNull(integers.i) }
+ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- name: test over window subquery 2 (with nested loop join so cannot be transformed into a stream plan)
sql: |
CREATE TABLE integers(i INTEGER);
@@ -690,16 +693,17 @@
└─BatchScan { table: integers, columns: [integers.i], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [i, col, integers._row_id(hidden), integers.i(hidden)], stream_key: [integers._row_id, i], pk_columns: [i, integers._row_id], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, sum, integers._row_id, integers.i] }
- ├─StreamExchange { dist: HashShard(integers.i) }
- │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
- └─StreamProject { exprs: [integers.i, sum, integers._row_id] }
- └─StreamOverWindow { window_functions: [sum(integers.i) OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
- └─StreamExchange { dist: HashShard(integers.i) }
- └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
- └─StreamFilter { predicate: IsNotNull(integers.i) }
- └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamExchange { dist: HashShard(integers.i, integers._row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, sum, integers._row_id, integers.i] }
+ ├─StreamExchange { dist: HashShard(integers.i) }
+ │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
+ └─StreamProject { exprs: [integers.i, sum, integers._row_id] }
+ └─StreamOverWindow { window_functions: [sum(integers.i) OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
+ └─StreamExchange { dist: HashShard(integers.i) }
+ └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
+ └─StreamFilter { predicate: IsNotNull(integers.i) }
+ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- name: test over window subquery 4 (with nested loop join so cannot be transformed into a stream plan)
sql: |
CREATE TABLE integers(i INTEGER);
@@ -747,17 +751,18 @@
└─BatchScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [i, integers._row_id(hidden), $expr1(hidden), integers.correlated_col(hidden)], stream_key: [integers._row_id, $expr1, integers.correlated_col], pk_columns: [integers._row_id, $expr1, integers.correlated_col], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: $expr1 = sum AND integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.i, integers._row_id, $expr1, integers.correlated_col] }
- ├─StreamExchange { dist: HashShard(integers.correlated_col, $expr1) }
- │ └─StreamProject { exprs: [integers.i, integers.correlated_col, integers.i::Int64 as $expr1, integers._row_id] }
- │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- └─StreamExchange { dist: HashShard(rows.correlated_col, sum) }
- └─StreamProject { exprs: [rows.correlated_col, sum, rows._row_id, rows.k] }
- └─StreamOverWindow { window_functions: [sum(rows.v) OVER(PARTITION BY rows.correlated_col, rows.k ORDER BY rows.v ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
- └─StreamExchange { dist: HashShard(rows.correlated_col, rows.k) }
- └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
- └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
- └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
+ └─StreamExchange { dist: HashShard(integers._row_id, $expr1, integers.correlated_col) }
+ └─StreamHashJoin { type: LeftSemi, predicate: $expr1 = sum AND integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.i, integers._row_id, $expr1, integers.correlated_col] }
+ ├─StreamExchange { dist: HashShard(integers.correlated_col, $expr1) }
+ │ └─StreamProject { exprs: [integers.i, integers.correlated_col, integers.i::Int64 as $expr1, integers._row_id] }
+ │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamExchange { dist: HashShard(rows.correlated_col, sum) }
+ └─StreamProject { exprs: [rows.correlated_col, sum, rows._row_id, rows.k] }
+ └─StreamOverWindow { window_functions: [sum(rows.v) OVER(PARTITION BY rows.correlated_col, rows.k ORDER BY rows.v ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
+ └─StreamExchange { dist: HashShard(rows.correlated_col, rows.k) }
+ └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
+ └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
+ └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
- name: test cardinality visitor with correlated filter
sql: |
CREATE TABLE t1(i INT);
@@ -818,21 +823,22 @@
└─BatchScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [i, correlated_col, integers._row_id(hidden), 2:Int64(hidden)], stream_key: [integers._row_id, correlated_col, 2:Int64], pk_columns: [integers._row_id, correlated_col, 2:Int64], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: integers.correlated_col IS NOT DISTINCT FROM integers.correlated_col AND 2:Int64 = $expr1, output: [integers.i, integers.correlated_col, integers._row_id, 2:Int64] }
- ├─StreamExchange { dist: HashShard(integers.correlated_col) }
- │ └─StreamProject { exprs: [integers.i, integers.correlated_col, 2:Int64, integers._row_id] }
- │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- └─StreamProject { exprs: [integers.correlated_col, (count(distinct rows.k) + count(distinct rows.v)) as $expr1] }
- └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count(distinct rows.k), count(distinct rows.v), count] }
- └─StreamHashJoin { type: LeftOuter, predicate: integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.correlated_col, rows.k, rows.v, rows._row_id] }
- ├─StreamProject { exprs: [integers.correlated_col] }
- │ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count] }
- │ └─StreamExchange { dist: HashShard(integers.correlated_col) }
- │ └─StreamTableScan { table: integers, columns: [integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
- └─StreamExchange { dist: HashShard(rows.correlated_col) }
- └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
- └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
- └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
+ └─StreamExchange { dist: HashShard(integers.correlated_col, integers._row_id, 2:Int64) }
+ └─StreamHashJoin { type: LeftSemi, predicate: integers.correlated_col IS NOT DISTINCT FROM integers.correlated_col AND 2:Int64 = $expr1, output: [integers.i, integers.correlated_col, integers._row_id, 2:Int64] }
+ ├─StreamExchange { dist: HashShard(integers.correlated_col) }
+ │ └─StreamProject { exprs: [integers.i, integers.correlated_col, 2:Int64, integers._row_id] }
+ │ └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamProject { exprs: [integers.correlated_col, (count(distinct rows.k) + count(distinct rows.v)) as $expr1] }
+ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count(distinct rows.k), count(distinct rows.v), count] }
+ └─StreamHashJoin { type: LeftOuter, predicate: integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.correlated_col, rows.k, rows.v, rows._row_id] }
+ ├─StreamProject { exprs: [integers.correlated_col] }
+ │ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count] }
+ │ └─StreamExchange { dist: HashShard(integers.correlated_col) }
+ │ └─StreamTableScan { table: integers, columns: [integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+ └─StreamExchange { dist: HashShard(rows.correlated_col) }
+ └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
+ └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
+ └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
- name: test hop window subquery 1
sql: |
create table t1 (k int primary key, ts timestamp);
@@ -848,12 +854,13 @@
└─BatchValues { rows: [[1:Int32], [2:Int32]] }
stream_plan: |-
StreamMaterialize { columns: [col, k, ts, window_start, window_end], stream_key: [col, window_start, window_end], pk_columns: [col, window_start, window_end], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: 1:Int32 = t1.k, output: all }
- ├─StreamAppendOnlyDedup { dedup_cols: [1:Int32] }
- │ └─StreamExchange { dist: HashShard(1:Int32) }
- │ └─StreamProject { exprs: [1:Int32] }
- │ └─StreamValues { rows: [[1:Int32, 0:Int64], [2:Int32, 1:Int64]] }
- └─StreamExchange { dist: HashShard(t1.k) }
- └─StreamHopWindow { time_col: t1.ts, slide: 00:10:00, size: 00:30:00, output: all }
- └─StreamFilter { predicate: IsNotNull(t1.ts) }
- └─StreamTableScan { table: t1, columns: [t1.k, t1.ts], pk: [t1.k], dist: UpstreamHashShard(t1.k) }
+ └─StreamExchange { dist: HashShard(1:Int32, window_start, window_end) }
+ └─StreamHashJoin { type: Inner, predicate: 1:Int32 = t1.k, output: all }
+ ├─StreamAppendOnlyDedup { dedup_cols: [1:Int32] }
+ │ └─StreamExchange { dist: HashShard(1:Int32) }
+ │ └─StreamProject { exprs: [1:Int32] }
+ │ └─StreamValues { rows: [[1:Int32, 0:Int64], [2:Int32, 1:Int64]] }
+ └─StreamExchange { dist: HashShard(t1.k) }
+ └─StreamHopWindow { time_col: t1.ts, slide: 00:10:00, size: 00:30:00, output: all }
+ └─StreamFilter { predicate: IsNotNull(t1.ts) }
+ └─StreamTableScan { table: t1, columns: [t1.k, t1.ts], pk: [t1.k], dist: UpstreamHashShard(t1.k) }
diff --git a/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml b/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml
index 6d216ad9c81c4..0d393c378ff85 100644
--- a/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml
@@ -466,14 +466,14 @@
└─LogicalScan { table: c, columns: [c.c1, c.c2, c.c3, c._row_id] }
optimized_logical_plan_for_batch: |-
LogicalAgg { aggs: [count] }
- └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.a3, first_value(a.a3 order_by(a.a3 ASC))) AND IsNotDistinctFrom(b.b2, b.b2), output: [] }
+ └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.a3, internal_last_seen_value(a.a3)) AND IsNotDistinctFrom(b.b2, b.b2), output: [] }
├─LogicalJoin { type: Inner, on: (a.a3 = b.b2), output: all }
│ ├─LogicalScan { table: a, columns: [a.a3] }
│ └─LogicalScan { table: b, columns: [b.b2] }
└─LogicalFilter { predicate: (3:Int32 = count(1:Int32)) }
- └─LogicalAgg { group_key: [first_value(a.a3 order_by(a.a3 ASC)), b.b2], aggs: [count(1:Int32)] }
- └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(first_value(a.a3 order_by(a.a3 ASC)), c.c3) AND IsNotDistinctFrom(b.b2, c.c2), output: [first_value(a.a3 order_by(a.a3 ASC)), b.b2, 1:Int32] }
- ├─LogicalAgg { group_key: [b.b2], aggs: [first_value(a.a3 order_by(a.a3 ASC))] }
+ └─LogicalAgg { group_key: [internal_last_seen_value(a.a3), b.b2], aggs: [count(1:Int32)] }
+ └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(internal_last_seen_value(a.a3), c.c3) AND IsNotDistinctFrom(b.b2, c.c2), output: [internal_last_seen_value(a.a3), b.b2, 1:Int32] }
+ ├─LogicalAgg { group_key: [b.b2], aggs: [internal_last_seen_value(a.a3)] }
│ └─LogicalJoin { type: Inner, on: (a.a3 = b.b2), output: all }
│ ├─LogicalScan { table: a, columns: [a.a3] }
│ └─LogicalScan { table: b, columns: [b.b2] }
@@ -717,15 +717,16 @@
└─BatchScan { table: t2, columns: [t2.x], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [x, y, t1._row_id(hidden)], stream_key: [t1._row_id, x], pk_columns: [t1._row_id, x], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: t1.x IS NOT DISTINCT FROM t2.x, output: all }
- ├─StreamExchange { dist: HashShard(t1.x) }
- │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamProject { exprs: [t2.x] }
- └─StreamGroupTopN { order: [t2.x ASC], limit: 1, offset: 0, group_key: [t2.x] }
- └─StreamExchange { dist: HashShard(t2.x) }
- └─StreamProject { exprs: [t2.x, t2.x, t2._row_id] }
- └─StreamFilter { predicate: IsNotNull(t2.x) }
- └─StreamTableScan { table: t2, columns: [t2.x, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.x, t1._row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: t1.x IS NOT DISTINCT FROM t2.x, output: all }
+ ├─StreamExchange { dist: HashShard(t1.x) }
+ │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamProject { exprs: [t2.x] }
+ └─StreamGroupTopN { order: [t2.x ASC], limit: 1, offset: 0, group_key: [t2.x] }
+ └─StreamExchange { dist: HashShard(t2.x) }
+ └─StreamProject { exprs: [t2.x, t2.x, t2._row_id] }
+ └─StreamFilter { predicate: IsNotNull(t2.x) }
+ └─StreamTableScan { table: t2, columns: [t2.x, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- sql: |
create table t1(x int, y int);
create table t2(x int, y int);
@@ -883,14 +884,15 @@
└─BatchScan { table: t2, columns: [t2.v2, t2.k2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1, k1], pk_columns: [t1._row_id, v1, k1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
- ├─StreamExchange { dist: HashShard(t1.k1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
- └─StreamExchange { dist: HashShard(t2.k2) }
- └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
- └─StreamFilter { predicate: IsNotNull(t2.k2) }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1.k1, t1._row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
+ ├─StreamExchange { dist: HashShard(t1.k1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
+ └─StreamExchange { dist: HashShard(t2.k2) }
+ └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
+ └─StreamFilter { predicate: IsNotNull(t2.k2) }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: test ApplyTopNTransposeRule case 2
sql: |
create table t1 (v1 int, k1 int);
@@ -908,16 +910,17 @@
└─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1], pk_columns: [t1._row_id, v1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2, output: all }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v2) }
- └─StreamProject { exprs: [t2.v2, t2._row_id] }
- └─StreamTopN { order: [t2.v2 ASC], limit: 1, offset: 0 }
- └─StreamExchange { dist: Single }
- └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [$expr1] }
- └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2, output: all }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v2) }
+ └─StreamProject { exprs: [t2.v2, t2._row_id] }
+ └─StreamTopN { order: [t2.v2 ASC], limit: 1, offset: 0 }
+ └─StreamExchange { dist: Single }
+ └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [$expr1] }
+ └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: test ApplyLimitTransposeRule case 1
sql: |
create table t1 (v1 int, k1 int);
@@ -935,11 +938,12 @@
└─BatchScan { table: t2, columns: [t2.v2, t2.k2], distribution: SomeShard }
stream_plan: |-
StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1, k1], pk_columns: [t1._row_id, v1, k1], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
- ├─StreamExchange { dist: HashShard(t1.k1) }
- │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamGroupTopN { order: [t2.k2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
- └─StreamExchange { dist: HashShard(t2.k2) }
- └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
- └─StreamFilter { predicate: IsNotNull(t2.k2) }
- └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1.k1, t1._row_id) }
+ └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
+ ├─StreamExchange { dist: HashShard(t1.k1) }
+ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamGroupTopN { order: [t2.k2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
+ └─StreamExchange { dist: HashShard(t2.k2) }
+ └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
+ └─StreamFilter { predicate: IsNotNull(t2.k2) }
+ └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml
index 6673d86fd9745..29e391853cf8a 100644
--- a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml
@@ -122,19 +122,20 @@
select * from t1 join t2 on a = b AND ta < now() - interval '1 hour' and ta >= now() - interval '2 hour';
stream_plan: |-
StreamMaterialize { columns: [a, ta, b, tb, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, a], pk_columns: [t1._row_id, t2._row_id, a], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.a) }
- │ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
- │ ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
- │ │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ │ └─StreamExchange { dist: Broadcast }
- │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
- │ │ └─StreamNow { output: [now] }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: HashShard(t2.b) }
- └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.a, t1._row_id, t2._row_id) }
+ └─StreamHashJoin { type: Inner, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.a) }
+ │ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
+ │ ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
+ │ │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ │ └─StreamExchange { dist: Broadcast }
+ │ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+ │ │ └─StreamNow { output: [now] }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard(t2.b) }
+ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: Temporal filter in on clause for left join's left side
sql: |
create table t1 (a int, ta timestamp with time zone);
@@ -150,19 +151,20 @@
select * from t1 right join t2 on a = b AND ta < now() - interval '1 hour' and ta >= now() - interval '2 hour';
stream_plan: |-
StreamMaterialize { columns: [a, ta, b, tb, t2._row_id(hidden), t1._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, b], pk_columns: [t2._row_id, t1._row_id, b], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: t2.b = t1.a, output: [t1.a, t1.ta, t2.b, t2.tb, t2._row_id, t1._row_id] }
- ├─StreamExchange { dist: HashShard(t2.b) }
- │ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- └─StreamExchange { dist: HashShard(t1.a) }
- └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
- ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
- │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: Broadcast }
- └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
- └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard(t2.b, t2._row_id, t1._row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: t2.b = t1.a, output: [t1.a, t1.ta, t2.b, t2.tb, t2._row_id, t1._row_id] }
+ ├─StreamExchange { dist: HashShard(t2.b) }
+ │ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.a) }
+ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
+ ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
+ │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: Broadcast }
+ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+ └─StreamNow { output: [now] }
- name: Temporal filter in on clause for full join's left side
sql: |
create table t1 (a int, ta timestamp with time zone);
@@ -178,19 +180,20 @@
select * from t1 left join t2 on a = b AND tb < now() - interval '1 hour' and tb >= now() - interval '2 hour';
stream_plan: |-
StreamMaterialize { columns: [a, ta, b, tb, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, a], pk_columns: [t1._row_id, t2._row_id, a], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftOuter, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.a) }
- │ └─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.b) }
- └─StreamDynamicFilter { predicate: (t2.tb < $expr2), output: [t2.b, t2.tb, t2._row_id] }
- ├─StreamDynamicFilter { predicate: (t2.tb >= $expr1), output_watermarks: [t2.tb], output: [t2.b, t2.tb, t2._row_id], cleaned_by_watermark: true }
- │ ├─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- │ └─StreamExchange { dist: Broadcast }
- │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
- │ └─StreamNow { output: [now] }
- └─StreamExchange { dist: Broadcast }
- └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
- └─StreamNow { output: [now] }
+ └─StreamExchange { dist: HashShard(t1.a, t1._row_id, t2._row_id) }
+ └─StreamHashJoin { type: LeftOuter, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.a) }
+ │ └─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.b) }
+ └─StreamDynamicFilter { predicate: (t2.tb < $expr2), output: [t2.b, t2.tb, t2._row_id] }
+ ├─StreamDynamicFilter { predicate: (t2.tb >= $expr1), output_watermarks: [t2.tb], output: [t2.b, t2.tb, t2._row_id], cleaned_by_watermark: true }
+ │ ├─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ │ └─StreamExchange { dist: Broadcast }
+ │ └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+ │ └─StreamNow { output: [now] }
+ └─StreamExchange { dist: Broadcast }
+ └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+ └─StreamNow { output: [now] }
- name: Temporal filter in on clause for right join's right side
sql: |
create table t1 (a int, ta timestamp with time zone);
diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml
index a1020b8d16ee5..f49a82be2dd78 100644
--- a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml
@@ -6,11 +6,12 @@
select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on id1= id2
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2, output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
- ├─StreamExchange { dist: HashShard(stream.id1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
- └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2, output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+ ├─StreamExchange { dist: HashShard(stream.id1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+ └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
batch_error: |-
Not supported: do not support temporal join for batch queries
HINT: please use temporal join in streaming queries
@@ -21,11 +22,12 @@
select id1, a1, id2, a2 from stream join version FOR SYSTEM_TIME AS OF PROCTIME() on id1 = id2 where a2 < 10;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
- ├─StreamExchange { dist: HashShard(stream.id1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
- └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) }
+ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+ ├─StreamExchange { dist: HashShard(stream.id1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+ └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
- name: implicit join with temporal tables
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -33,11 +35,12 @@
select id1, a1, id2, a2 from stream, version FOR SYSTEM_TIME AS OF PROCTIME() where id1 = id2 AND a2 < 10;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
- ├─StreamExchange { dist: HashShard(stream.id1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
- └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) }
+ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+ ├─StreamExchange { dist: HashShard(stream.id1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+ └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
- name: Multi join key for temporal join
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -45,11 +48,12 @@
select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and id1 = id2 where b2 != a2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1, a1], pk_columns: [stream._row_id, id1, a1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND stream.a1 = version.a2 AND (version.b2 <> version.a2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
- ├─StreamExchange { dist: HashShard(stream.id1, stream.a1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2, version.a2) }
- └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2], pk: [version.id2, version.a2], dist: UpstreamHashShard(version.id2, version.a2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream.a1, stream._row_id) }
+ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND stream.a1 = version.a2 AND (version.b2 <> version.a2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+ ├─StreamExchange { dist: HashShard(stream.id1, stream.a1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2, version.a2) }
+ └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2], pk: [version.id2, version.a2], dist: UpstreamHashShard(version.id2, version.a2) }
- name: Temporal join with Aggregation
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -101,15 +105,16 @@
join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.k = version2.k where a1 < 10;
stream_plan: |-
StreamMaterialize { columns: [k, x1, x2, a1, b1, stream._row_id(hidden), version2.k(hidden)], stream_key: [stream._row_id, k], pk_columns: [stream._row_id, k], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: stream.k = version2.k, output: [stream.k, version1.x1, version2.x2, stream.a1, stream.b1, stream._row_id, version2.k] }
- ├─StreamTemporalJoin { type: Inner, predicate: stream.k = version1.k, output: [stream.k, stream.a1, stream.b1, version1.x1, stream._row_id, version1.k] }
- │ ├─StreamExchange { dist: HashShard(stream.k) }
- │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
- │ │ └─StreamTableScan { table: stream, columns: [stream.k, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.k) }
- │ └─StreamTableScan { table: version1, columns: [version1.k, version1.x1], pk: [version1.k], dist: UpstreamHashShard(version1.k) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.k) }
- └─StreamTableScan { table: version2, columns: [version2.k, version2.x2], pk: [version2.k], dist: UpstreamHashShard(version2.k) }
+ └─StreamExchange { dist: HashShard(stream.k, stream._row_id) }
+ └─StreamTemporalJoin { type: Inner, predicate: stream.k = version2.k, output: [stream.k, version1.x1, version2.x2, stream.a1, stream.b1, stream._row_id, version2.k] }
+ ├─StreamTemporalJoin { type: Inner, predicate: stream.k = version1.k, output: [stream.k, stream.a1, stream.b1, version1.x1, stream._row_id, version1.k] }
+ │ ├─StreamExchange { dist: HashShard(stream.k) }
+ │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
+ │ │ └─StreamTableScan { table: stream, columns: [stream.k, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.k) }
+ │ └─StreamTableScan { table: version1, columns: [version1.k, version1.x1], pk: [version1.k], dist: UpstreamHashShard(version1.k) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.k) }
+ └─StreamTableScan { table: version2, columns: [version2.k, version2.x2], pk: [version2.k], dist: UpstreamHashShard(version2.k) }
- name: multi-way temporal join with different keys
sql: |
create table stream(id1 int, id2 int, a1 int, b1 int) APPEND ONLY;
@@ -121,16 +126,17 @@
join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.id2 = version2.id2 where a1 < 10;
stream_plan: |-
StreamMaterialize { columns: [id1, x1, id2, x2, a1, b1, stream._row_id(hidden), version2.id2(hidden)], stream_key: [stream._row_id, id1, id2], pk_columns: [stream._row_id, id1, id2], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
- ├─StreamExchange { dist: HashShard(stream.id2) }
- │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
- │ ├─StreamExchange { dist: HashShard(stream.id1) }
- │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
- │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
- │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
- └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream.id2, stream._row_id) }
+ └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
+ ├─StreamExchange { dist: HashShard(stream.id2) }
+ │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
+ │ ├─StreamExchange { dist: HashShard(stream.id1) }
+ │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
+ │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
+ │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
+ └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
- name: multi-way temporal join with different keys
sql: |
create table stream(id1 int, id2 int, a1 int, b1 int) APPEND ONLY;
@@ -142,16 +148,17 @@
join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.id2 = version2.id2 where a1 < 10;
stream_plan: |-
StreamMaterialize { columns: [id1, x1, id2, x2, a1, b1, stream._row_id(hidden), version2.id2(hidden)], stream_key: [stream._row_id, id1, id2], pk_columns: [stream._row_id, id1, id2], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
- ├─StreamExchange { dist: HashShard(stream.id2) }
- │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
- │ ├─StreamExchange { dist: HashShard(stream.id1) }
- │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
- │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
- │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
- └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream.id2, stream._row_id) }
+ └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
+ ├─StreamExchange { dist: HashShard(stream.id2) }
+ │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
+ │ ├─StreamExchange { dist: HashShard(stream.id1) }
+ │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
+ │ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
+ │ └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
+ └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
- name: temporal join with an index (distribution key size = 1)
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -160,11 +167,12 @@
select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
- ├─StreamExchange { dist: HashShard(stream.a1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
- └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
+ └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+ ├─StreamExchange { dist: HashShard(stream.a1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
+ └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
- name: temporal join with an index (distribution key size = 2)
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -173,11 +181,12 @@
select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
- ├─StreamExchange { dist: HashShard(stream.a1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
- └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
+ └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+ ├─StreamExchange { dist: HashShard(stream.a1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
+ └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
- name: temporal join with an index (index column size = 1)
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -186,11 +195,12 @@
select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, stream.b1, a1], pk_columns: [stream._row_id, id2, stream.b1, a1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.b1 = idx2.b2 AND (stream.a1 = idx2.a2), output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
- ├─StreamExchange { dist: HashShard(stream.b1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.b2) }
- └─StreamTableScan { table: idx2, columns: [idx2.b2, idx2.id2, idx2.a2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.b2) }
+ └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.b1 = idx2.b2 AND (stream.a1 = idx2.a2), output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+ ├─StreamExchange { dist: HashShard(stream.b1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.b2) }
+ └─StreamTableScan { table: idx2, columns: [idx2.b2, idx2.id2, idx2.a2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.b2) }
- name: temporal join with singleton table
sql: |
create table t (a int) append only;
@@ -212,11 +222,12 @@
select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx.a2 AND stream.b1 = idx.b2, output: [stream.id1, stream.a1, idx.id2, idx.a2, stream._row_id, stream.b1] }
- ├─StreamExchange { dist: HashShard(stream.a1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx.a2) }
- └─StreamTableScan { table: idx, columns: [idx.id2, idx.a2, idx.b2], pk: [idx.id2], dist: UpstreamHashShard(idx.a2) }
+ └─StreamExchange { dist: HashShard(stream.a1, idx.id2, stream._row_id, stream.b1) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx.a2 AND stream.b1 = idx.b2, output: [stream.id1, stream.a1, idx.id2, idx.a2, stream._row_id, stream.b1] }
+ ├─StreamExchange { dist: HashShard(stream.a1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx.a2) }
+ └─StreamTableScan { table: idx, columns: [idx.id2, idx.a2, idx.b2], pk: [idx.id2], dist: UpstreamHashShard(idx.a2) }
- name: index selection for temporal join (with two indexes) and should choose the index with a longer prefix..
sql: |
create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -226,11 +237,12 @@
select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
- ├─StreamExchange { dist: HashShard(stream.a1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
- └─StreamTableScan { table: idx2, columns: [idx2.id2, idx2.a2, idx2.b2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
+ └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+ ├─StreamExchange { dist: HashShard(stream.a1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
+ └─StreamTableScan { table: idx2, columns: [idx2.id2, idx2.a2, idx2.b2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
- name: index selection for temporal join (with three indexes) and should choose primary table.
sql: |
create table stream(id1 int, a1 int, b1 int, c1 int) APPEND ONLY;
@@ -241,11 +253,12 @@
select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2 and c1 = c2 and id1 = id2;
stream_plan: |-
StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden), stream.c1(hidden)], stream_key: [stream._row_id, id1, a1, stream.b1, stream.c1], pk_columns: [stream._row_id, id1, a1, stream.b1, stream.c1], pk_conflict: NoCheck }
- └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2 AND (stream.a1 = version.a2) AND (stream.b1 = version.b2) AND (stream.c1 = version.c2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id, stream.b1, stream.c1] }
- ├─StreamExchange { dist: HashShard(stream.id1) }
- │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream.c1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
- └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
- └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2, version.c2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+ └─StreamExchange { dist: HashShard(stream.id1, stream.a1, stream._row_id, stream.b1, stream.c1) }
+ └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2 AND (stream.a1 = version.a2) AND (stream.b1 = version.b2) AND (stream.c1 = version.c2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id, stream.b1, stream.c1] }
+ ├─StreamExchange { dist: HashShard(stream.id1) }
+ │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream.c1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+ └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2, version.c2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
- name: index selection for temporal join (two index) and no one matches.
sql: |
create table stream(id1 int, a1 int, b1 int, c1 int) APPEND ONLY;
diff --git a/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml b/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml
index eaaa1f8e5c8d5..fdf928a0c9c84 100644
--- a/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml
@@ -243,214 +243,218 @@
└─BatchSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id(hidden), _row_id#1(hidden), r_regionkey(hidden), _row_id#2(hidden), _row_id#3(hidden), _row_id#4(hidden), ps_suppkey(hidden), n_nationkey(hidden), ps_supplycost(hidden), p_partkey#1(hidden)], stream_key: [_row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, p_partkey, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_columns: [s_acctbal, n_name, s_name, p_partkey, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_conflict: NoCheck }
- └─StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] }
- ├─StreamExchange { dist: HashShard(p_partkey) }
- │ └─StreamShare { id: 26 }
- │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
- │ ├─StreamExchange { dist: HashShard(n_nationkey) }
- │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
- │ │ ├─StreamExchange { dist: HashShard(r_regionkey) }
- │ │ │ └─StreamShare { id: 3 }
- │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] }
- │ │ │ └─StreamRowIdGen { row_id_index: 3 }
- │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
- │ │ └─StreamExchange { dist: HashShard(n_regionkey) }
- │ │ └─StreamShare { id: 7 }
- │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
- │ │ └─StreamRowIdGen { row_id_index: 4 }
- │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(s_nationkey) }
- │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
- │ ├─StreamExchange { dist: HashShard(ps_suppkey) }
- │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
- │ │ ├─StreamExchange { dist: HashShard(p_partkey) }
- │ │ │ └─StreamRowIdGen { row_id_index: 9 }
- │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
- │ │ └─StreamExchange { dist: HashShard(ps_partkey) }
- │ │ └─StreamShare { id: 15 }
- │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
- │ │ └─StreamRowIdGen { row_id_index: 5 }
- │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(s_suppkey) }
- │ └─StreamShare { id: 21 }
- │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- │ └─StreamRowIdGen { row_id_index: 7 }
- │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- └─StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
- └─StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] }
- └─StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] }
- ├─StreamAppendOnlyDedup { dedup_cols: [p_partkey] }
- │ └─StreamExchange { dist: HashShard(p_partkey) }
- │ └─StreamProject { exprs: [p_partkey] }
- │ └─StreamShare { id: 26 }
- │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
- │ ├─StreamExchange { dist: HashShard(n_nationkey) }
- │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
- │ │ ├─StreamExchange { dist: HashShard(r_regionkey) }
- │ │ │ └─StreamShare { id: 3 }
- │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] }
- │ │ │ └─StreamRowIdGen { row_id_index: 3 }
- │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
- │ │ └─StreamExchange { dist: HashShard(n_regionkey) }
- │ │ └─StreamShare { id: 7 }
- │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
- │ │ └─StreamRowIdGen { row_id_index: 4 }
- │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(s_nationkey) }
- │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
- │ ├─StreamExchange { dist: HashShard(ps_suppkey) }
- │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
- │ │ ├─StreamExchange { dist: HashShard(p_partkey) }
- │ │ │ └─StreamRowIdGen { row_id_index: 9 }
- │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
- │ │ └─StreamExchange { dist: HashShard(ps_partkey) }
- │ │ └─StreamShare { id: 15 }
- │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
- │ │ └─StreamRowIdGen { row_id_index: 5 }
- │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(s_suppkey) }
- │ └─StreamShare { id: 21 }
- │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- │ └─StreamRowIdGen { row_id_index: 7 }
- │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- └─StreamExchange { dist: HashShard(ps_partkey) }
- └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] }
- ├─StreamExchange { dist: HashShard(s_nationkey) }
- │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] }
- │ ├─StreamExchange { dist: HashShard(ps_suppkey) }
- │ │ └─StreamFilter { predicate: IsNotNull(ps_partkey) }
- │ │ └─StreamShare { id: 15 }
- │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
- │ │ └─StreamRowIdGen { row_id_index: 5 }
- │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(s_suppkey) }
- │ └─StreamShare { id: 21 }
- │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- │ └─StreamRowIdGen { row_id_index: 7 }
- │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- └─StreamExchange { dist: HashShard(n_nationkey) }
- └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] }
- ├─StreamExchange { dist: HashShard(r_regionkey) }
- │ └─StreamShare { id: 3 }
- │ └─StreamProject { exprs: [r_regionkey, _row_id] }
- │ └─StreamRowIdGen { row_id_index: 3 }
- │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
- └─StreamExchange { dist: HashShard(n_regionkey) }
- └─StreamShare { id: 7 }
- └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
- └─StreamRowIdGen { row_id_index: 4 }
- └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(p_partkey, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost) }
+ └─StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] }
+ ├─StreamExchange { dist: HashShard(p_partkey) }
+ │ └─StreamShare { id: 26 }
+ │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
+ │ ├─StreamExchange { dist: HashShard(n_nationkey) }
+ │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
+ │ │ ├─StreamExchange { dist: HashShard(r_regionkey) }
+ │ │ │ └─StreamShare { id: 3 }
+ │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] }
+ │ │ │ └─StreamRowIdGen { row_id_index: 3 }
+ │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
+ │ │ └─StreamExchange { dist: HashShard(n_regionkey) }
+ │ │ └─StreamShare { id: 7 }
+ │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
+ │ │ └─StreamRowIdGen { row_id_index: 4 }
+ │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(s_nationkey) }
+ │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
+ │ ├─StreamExchange { dist: HashShard(ps_suppkey) }
+ │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
+ │ │ ├─StreamExchange { dist: HashShard(p_partkey) }
+ │ │ │ └─StreamRowIdGen { row_id_index: 9 }
+ │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+ │ │ └─StreamExchange { dist: HashShard(ps_partkey) }
+ │ │ └─StreamShare { id: 15 }
+ │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
+ │ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(s_suppkey) }
+ │ └─StreamShare { id: 21 }
+ │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ │ └─StreamRowIdGen { row_id_index: 7 }
+ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ └─StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
+ └─StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] }
+ └─StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] }
+ ├─StreamAppendOnlyDedup { dedup_cols: [p_partkey] }
+ │ └─StreamExchange { dist: HashShard(p_partkey) }
+ │ └─StreamProject { exprs: [p_partkey] }
+ │ └─StreamShare { id: 26 }
+ │ └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
+ │ ├─StreamExchange { dist: HashShard(n_nationkey) }
+ │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
+ │ │ ├─StreamExchange { dist: HashShard(r_regionkey) }
+ │ │ │ └─StreamShare { id: 3 }
+ │ │ │ └─StreamProject { exprs: [r_regionkey, _row_id] }
+ │ │ │ └─StreamRowIdGen { row_id_index: 3 }
+ │ │ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
+ │ │ └─StreamExchange { dist: HashShard(n_regionkey) }
+ │ │ └─StreamShare { id: 7 }
+ │ │ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
+ │ │ └─StreamRowIdGen { row_id_index: 4 }
+ │ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(s_nationkey) }
+ │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
+ │ ├─StreamExchange { dist: HashShard(ps_suppkey) }
+ │ │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
+ │ │ ├─StreamExchange { dist: HashShard(p_partkey) }
+ │ │ │ └─StreamRowIdGen { row_id_index: 9 }
+ │ │ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+ │ │ └─StreamExchange { dist: HashShard(ps_partkey) }
+ │ │ └─StreamShare { id: 15 }
+ │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
+ │ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(s_suppkey) }
+ │ └─StreamShare { id: 21 }
+ │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ │ └─StreamRowIdGen { row_id_index: 7 }
+ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(ps_partkey) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] }
+ ├─StreamExchange { dist: HashShard(s_nationkey) }
+ │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] }
+ │ ├─StreamExchange { dist: HashShard(ps_suppkey) }
+ │ │ └─StreamFilter { predicate: IsNotNull(ps_partkey) }
+ │ │ └─StreamShare { id: 15 }
+ │ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
+ │ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(s_suppkey) }
+ │ └─StreamShare { id: 21 }
+ │ └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ │ └─StreamRowIdGen { row_id_index: 7 }
+ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(n_nationkey) }
+ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] }
+ ├─StreamExchange { dist: HashShard(r_regionkey) }
+ │ └─StreamShare { id: 3 }
+ │ └─StreamProject { exprs: [r_regionkey, _row_id] }
+ │ └─StreamRowIdGen { row_id_index: 3 }
+ │ └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(n_regionkey) }
+ └─StreamShare { id: 7 }
+ └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
+ └─StreamRowIdGen { row_id_index: 4 }
+ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id(hidden), _row_id#1(hidden), r_regionkey(hidden), _row_id#2(hidden), _row_id#3(hidden), _row_id#4(hidden), ps_suppkey(hidden), n_nationkey(hidden), ps_supplycost(hidden), p_partkey#1(hidden)], stream_key: [_row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, p_partkey, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_columns: [s_acctbal, n_name, s_name, p_partkey, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
- └── StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } { intermediate state table: 26, state tables: [ 25 ], distinct tables: [] }
- └── StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } { left table: 27, right table: 29, left degree table: 28, right degree table: 30 }
- ├── StreamAppendOnlyDedup { dedup_cols: [p_partkey] } { state table: 31 }
- │ └── StreamExchange Hash([0]) from 15
- └── StreamExchange Hash([0]) from 16
+ └── StreamExchange Hash([3, 8, 9, 10, 11, 12, 13, 14, 15, 16]) from 1
Fragment 1
- StreamNoOp
- └── StreamExchange NoShuffle from 2
+ StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
+ └── StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } { intermediate state table: 26, state tables: [ 25 ], distinct tables: [] }
+ └── StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } { left table: 27, right table: 29, left degree table: 28, right degree table: 30 }
+ ├── StreamAppendOnlyDedup { dedup_cols: [p_partkey] } { state table: 31 }
+ │ └── StreamExchange Hash([0]) from 16
+ └── StreamExchange Hash([0]) from 17
Fragment 2
- StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
- ├── StreamExchange Hash([0]) from 3
- └── StreamExchange Hash([5]) from 8
+ StreamNoOp
+ └── StreamExchange NoShuffle from 3
Fragment 3
- StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } { left table: 8, right table: 10, left degree table: 9, right degree table: 11 }
+ StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
├── StreamExchange Hash([0]) from 4
- └── StreamExchange Hash([2]) from 6
+ └── StreamExchange Hash([5]) from 9
Fragment 4
- StreamNoOp
- └── StreamExchange NoShuffle from 5
+ StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } { left table: 8, right table: 10, left degree table: 9, right degree table: 11 }
+ ├── StreamExchange Hash([0]) from 5
+ └── StreamExchange Hash([2]) from 7
Fragment 5
+ StreamNoOp
+ └── StreamExchange NoShuffle from 6
+
+ Fragment 6
StreamProject { exprs: [r_regionkey, _row_id] }
└── StreamRowIdGen { row_id_index: 3 }
└── StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } { source state table: 12 }
- Fragment 6
+ Fragment 7
StreamNoOp
- └── StreamExchange NoShuffle from 7
+ └── StreamExchange NoShuffle from 8
- Fragment 7
+ Fragment 8
StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
└── StreamRowIdGen { row_id_index: 4 }
└── StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } { source state table: 13 }
- Fragment 8
+ Fragment 9
StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
- ├── StreamExchange Hash([2]) from 9
- └── StreamExchange Hash([0]) from 13
+ ├── StreamExchange Hash([2]) from 10
+ └── StreamExchange Hash([0]) from 14
- Fragment 9
+ Fragment 10
StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } { left table: 18, right table: 20, left degree table: 19, right degree table: 21 }
- ├── StreamExchange Hash([0]) from 10
- └── StreamExchange Hash([0]) from 11
+ ├── StreamExchange Hash([0]) from 11
+ └── StreamExchange Hash([0]) from 12
- Fragment 10
+ Fragment 11
StreamRowIdGen { row_id_index: 9 }
└── StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } { source state table: 22 }
- Fragment 11
+ Fragment 12
StreamNoOp
- └── StreamExchange NoShuffle from 12
+ └── StreamExchange NoShuffle from 13
- Fragment 12
+ Fragment 13
StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
└── StreamRowIdGen { row_id_index: 5 }
└── StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } { source state table: 23 }
- Fragment 13
+ Fragment 14
StreamNoOp
- └── StreamExchange NoShuffle from 14
+ └── StreamExchange NoShuffle from 15
- Fragment 14
+ Fragment 15
StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
└── StreamRowIdGen { row_id_index: 7 }
└── StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } { source state table: 24 }
- Fragment 15
+ Fragment 16
StreamProject { exprs: [p_partkey] }
- └── StreamExchange NoShuffle from 2
+ └── StreamExchange NoShuffle from 3
- Fragment 16
+ Fragment 17
StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] } { left table: 32, right table: 34, left degree table: 33, right degree table: 35 }
- ├── StreamExchange Hash([2]) from 17
- └── StreamExchange Hash([0]) from 20
+ ├── StreamExchange Hash([2]) from 18
+ └── StreamExchange Hash([0]) from 21
- Fragment 17
+ Fragment 18
StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] } { left table: 36, right table: 38, left degree table: 37, right degree table: 39 }
- ├── StreamExchange Hash([1]) from 18
- └── StreamExchange Hash([0]) from 19
+ ├── StreamExchange Hash([1]) from 19
+ └── StreamExchange Hash([0]) from 20
- Fragment 18
+ Fragment 19
StreamFilter { predicate: IsNotNull(ps_partkey) }
- └── StreamExchange NoShuffle from 12
+ └── StreamExchange NoShuffle from 13
- Fragment 19
+ Fragment 20
StreamNoOp
- └── StreamExchange NoShuffle from 14
+ └── StreamExchange NoShuffle from 15
- Fragment 20
+ Fragment 21
StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] } { left table: 40, right table: 42, left degree table: 41, right degree table: 43 }
- ├── StreamExchange Hash([0]) from 21
- └── StreamExchange Hash([2]) from 22
+ ├── StreamExchange Hash([0]) from 22
+ └── StreamExchange Hash([2]) from 23
- Fragment 21
+ Fragment 22
StreamNoOp
- └── StreamExchange NoShuffle from 5
+ └── StreamExchange NoShuffle from 6
- Fragment 22
+ Fragment 23
StreamNoOp
- └── StreamExchange NoShuffle from 7
+ └── StreamExchange NoShuffle from 8
Table 0 { columns: [ p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id_0, r_regionkey, n_nationkey, _row_id_1, _row_id_2, _row_id_3, ps_suppkey ], primary key: [ $0 ASC, $7 ASC, $9 ASC, $10 ASC, $11 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC, $12 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], distribution key: [ 0 ], read pk prefix len hint: 2 }
@@ -540,7 +544,7 @@
Table 43 { columns: [ n_regionkey, _row_id, _degree ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost, p_partkey#1 ], primary key: [ $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ], distribution key: [ 3 ], read pk prefix len hint: 13 }
+ Table 4294967294 { columns: [ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost, p_partkey#1 ], primary key: [ $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ], distribution key: [ 3, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], read pk prefix len hint: 13 }
- id: tpch_q5
before:
@@ -1797,76 +1801,80 @@
└─BatchSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id], filter: (None, None) }
stream_plan: |-
StreamMaterialize { columns: [s_name, s_address, _row_id(hidden), _row_id#1(hidden), s_nationkey(hidden), s_suppkey(hidden)], stream_key: [_row_id, _row_id#1, s_nationkey, s_suppkey], pk_columns: [s_name, _row_id, _row_id#1, s_nationkey, s_suppkey], pk_conflict: NoCheck }
- └─StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] }
- ├─StreamExchange { dist: HashShard(s_suppkey) }
- │ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] }
- │ ├─StreamExchange { dist: HashShard(s_nationkey) }
- │ │ └─StreamRowIdGen { row_id_index: 7 }
- │ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(n_nationkey) }
- │ └─StreamRowIdGen { row_id_index: 4 }
- │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
- └─StreamExchange { dist: HashShard(ps_suppkey) }
- └─StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] }
- └─StreamFilter { predicate: ($expr1 > $expr2) }
- └─StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all }
- ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
- │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] }
- │ └─StreamShare { id: 13 }
- │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
- │ ├─StreamExchange { dist: HashShard(ps_partkey) }
- │ │ └─StreamRowIdGen { row_id_index: 5 }
- │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(p_partkey) }
- │ └─StreamProject { exprs: [p_partkey, _row_id] }
- │ └─StreamRowIdGen { row_id_index: 9 }
- │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
- └─StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] }
- └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] }
- └─StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] }
- ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
- │ └─StreamProject { exprs: [ps_partkey, ps_suppkey] }
- │ └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] }
- │ └─StreamShare { id: 13 }
- │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
- │ ├─StreamExchange { dist: HashShard(ps_partkey) }
- │ │ └─StreamRowIdGen { row_id_index: 5 }
- │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
- │ └─StreamExchange { dist: HashShard(p_partkey) }
- │ └─StreamProject { exprs: [p_partkey, _row_id] }
- │ └─StreamRowIdGen { row_id_index: 9 }
- │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
- └─StreamExchange { dist: HashShard(l_partkey, l_suppkey) }
- └─StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] }
- └─StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) }
- └─StreamRowIdGen { row_id_index: 16 }
- └─StreamSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(_row_id, _row_id, s_nationkey, s_suppkey) }
+ └─StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] }
+ ├─StreamExchange { dist: HashShard(s_suppkey) }
+ │ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] }
+ │ ├─StreamExchange { dist: HashShard(s_nationkey) }
+ │ │ └─StreamRowIdGen { row_id_index: 7 }
+ │ │ └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(n_nationkey) }
+ │ └─StreamRowIdGen { row_id_index: 4 }
+ │ └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(ps_suppkey) }
+ └─StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] }
+ └─StreamFilter { predicate: ($expr1 > $expr2) }
+ └─StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all }
+ ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
+ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] }
+ │ └─StreamShare { id: 13 }
+ │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
+ │ ├─StreamExchange { dist: HashShard(ps_partkey) }
+ │ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(p_partkey) }
+ │ └─StreamProject { exprs: [p_partkey, _row_id] }
+ │ └─StreamRowIdGen { row_id_index: 9 }
+ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+ └─StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] }
+ └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] }
+ └─StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] }
+ ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
+ │ └─StreamProject { exprs: [ps_partkey, ps_suppkey] }
+ │ └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] }
+ │ └─StreamShare { id: 13 }
+ │ └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
+ │ ├─StreamExchange { dist: HashShard(ps_partkey) }
+ │ │ └─StreamRowIdGen { row_id_index: 5 }
+ │ │ └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+ │ └─StreamExchange { dist: HashShard(p_partkey) }
+ │ └─StreamProject { exprs: [p_partkey, _row_id] }
+ │ └─StreamRowIdGen { row_id_index: 9 }
+ │ └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+ └─StreamExchange { dist: HashShard(l_partkey, l_suppkey) }
+ └─StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] }
+ └─StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) }
+ └─StreamRowIdGen { row_id_index: 16 }
+ └─StreamSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id] }
stream_dist_plan: |+
Fragment 0
StreamMaterialize { columns: [s_name, s_address, _row_id(hidden), _row_id#1(hidden), s_nationkey(hidden), s_suppkey(hidden)], stream_key: [_row_id, _row_id#1, s_nationkey, s_suppkey], pk_columns: [s_name, _row_id, _row_id#1, s_nationkey, s_suppkey], pk_conflict: NoCheck }
├── materialized table: 4294967294
- └── StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
- ├── StreamExchange Hash([0]) from 1
- └── StreamExchange Hash([0]) from 4
+ └── StreamExchange Hash([2, 3, 4, 5]) from 1
Fragment 1
- StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
- ├── StreamExchange Hash([3]) from 2
- └── StreamExchange Hash([0]) from 3
+ StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+ ├── StreamExchange Hash([0]) from 2
+ └── StreamExchange Hash([0]) from 5
Fragment 2
+ StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
+ ├── StreamExchange Hash([3]) from 3
+ └── StreamExchange Hash([0]) from 4
+
+ Fragment 3
StreamRowIdGen { row_id_index: 7 }
└── StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } { source state table: 8 }
- Fragment 3
+ Fragment 4
StreamRowIdGen { row_id_index: 4 }
└── StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } { source state table: 9 }
- Fragment 4
+ Fragment 5
StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] }
└── StreamFilter { predicate: ($expr1 > $expr2) }
└── StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 }
- ├── StreamExchange Hash([0, 1]) from 5
+ ├── StreamExchange Hash([0, 1]) from 6
└── StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] }
└── StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] } { intermediate state table: 20, state tables: [], distinct tables: [] }
└── StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] }
@@ -1874,33 +1882,33 @@
├── right table: 23
├── left degree table: 22
├── right degree table: 24
- ├── StreamExchange Hash([0, 1]) from 9
- └── StreamExchange Hash([0, 1]) from 10
+ ├── StreamExchange Hash([0, 1]) from 10
+ └── StreamExchange Hash([0, 1]) from 11
- Fragment 5
+ Fragment 6
StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] }
- └── StreamExchange NoShuffle from 6
+ └── StreamExchange NoShuffle from 7
- Fragment 6
+ Fragment 7
StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
- ├── StreamExchange Hash([0]) from 7
- └── StreamExchange Hash([0]) from 8
+ ├── StreamExchange Hash([0]) from 8
+ └── StreamExchange Hash([0]) from 9
- Fragment 7
+ Fragment 8
StreamRowIdGen { row_id_index: 5 }
└── StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } { source state table: 18 }
- Fragment 8
+ Fragment 9
StreamProject { exprs: [p_partkey, _row_id] }
└── StreamRowIdGen { row_id_index: 9 }
└── StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } { source state table: 19 }
- Fragment 9
+ Fragment 10
StreamProject { exprs: [ps_partkey, ps_suppkey] }
└── StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] } { intermediate state table: 25, state tables: [], distinct tables: [] }
- └── StreamExchange NoShuffle from 6
+ └── StreamExchange NoShuffle from 7
- Fragment 10
+ Fragment 11
StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] }
└── StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) }
└── StreamRowIdGen { row_id_index: 16 }
@@ -1961,7 +1969,7 @@
Table 26 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 }
- Table 4294967294 { columns: [ s_name, s_address, _row_id, _row_id#1, s_nationkey, s_suppkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 5 ], read pk prefix len hint: 5 }
+ Table 4294967294 { columns: [ s_name, s_address, _row_id, _row_id#1, s_nationkey, s_suppkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 2, 3, 4, 5 ], read pk prefix len hint: 5 }
- id: tpch_q21
before:
diff --git a/src/frontend/planner_test/tests/testdata/output/update.yaml b/src/frontend/planner_test/tests/testdata/output/update.yaml
index f3a27a3d2e514..3db7ac3501018 100644
--- a/src/frontend/planner_test/tests/testdata/output/update.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/update.yaml
@@ -116,3 +116,41 @@
create table t(v1 int as v2-1, v2 int, v3 int as v2+1, primary key (v3));
update t set v2 = 3;
binder_error: 'Bind error: update modifying the column referenced by generated columns that are part of the primary key is not allowed'
+- name: update subquery
+ sql: |
+ create table t (a int, b int);
+ update t set a = 777 where b not in (select a from t);
+ logical_plan: |-
+ LogicalUpdate { table: t, exprs: [777:Int32, $1, $2] }
+ └─LogicalApply { type: LeftAnti, on: (t.b = t.a), correlated_id: 1 }
+ ├─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] }
+ └─LogicalProject { exprs: [t.a] }
+ └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] }
+ batch_plan: |-
+ BatchExchange { order: [], dist: Single }
+ └─BatchUpdate { table: t, exprs: [777:Int32, $1, $2] }
+ └─BatchExchange { order: [], dist: Single }
+ └─BatchHashJoin { type: LeftAnti, predicate: t.b = t.a, output: all }
+ ├─BatchExchange { order: [], dist: HashShard(t.b) }
+ │ └─BatchScan { table: t, columns: [t.a, t.b, t._row_id], distribution: UpstreamHashShard(t._row_id) }
+ └─BatchExchange { order: [], dist: HashShard(t.a) }
+ └─BatchScan { table: t, columns: [t.a], distribution: SomeShard }
+- name: delete subquery
+ sql: |
+ create table t (a int, b int);
+ delete from t where a not in (select b from t);
+ logical_plan: |-
+ LogicalDelete { table: t }
+ └─LogicalApply { type: LeftAnti, on: (t.a = t.b), correlated_id: 1 }
+ ├─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] }
+ └─LogicalProject { exprs: [t.b] }
+ └─LogicalScan { table: t, columns: [t.a, t.b, t._row_id] }
+ batch_plan: |-
+ BatchExchange { order: [], dist: Single }
+ └─BatchDelete { table: t }
+ └─BatchExchange { order: [], dist: Single }
+ └─BatchHashJoin { type: LeftAnti, predicate: t.a = t.b, output: all }
+ ├─BatchExchange { order: [], dist: HashShard(t.a) }
+ │ └─BatchScan { table: t, columns: [t.a, t.b, t._row_id], distribution: UpstreamHashShard(t._row_id) }
+ └─BatchExchange { order: [], dist: HashShard(t.b) }
+ └─BatchScan { table: t, columns: [t.b], distribution: SomeShard }
diff --git a/src/frontend/planner_test/tests/testdata/output/watermark.yaml b/src/frontend/planner_test/tests/testdata/output/watermark.yaml
index e4ef42b121528..d57d41fa76bc3 100644
--- a/src/frontend/planner_test/tests/testdata/output/watermark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/watermark.yaml
@@ -79,11 +79,12 @@
select t1.ts as t1_ts, t2.ts as ts2, t1.v1 as t1_v1, t1.v2 as t1_v2, t2.v1 as t2_v1, t2.v2 as t2_v2 from t1, t2 where t1.ts = t2.ts;
stream_plan: |-
StreamMaterialize { columns: [t1_ts, ts2, t1_v1, t1_v2, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_ts], pk_columns: [t1._row_id, t2._row_id, t1_ts], pk_conflict: NoCheck, watermark_columns: [t1_ts, ts2] }
- └─StreamHashJoin [window, append_only] { type: Inner, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t2.ts, t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.ts) }
- │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.ts) }
- └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.ts, t1._row_id, t2._row_id) }
+ └─StreamHashJoin [window, append_only] { type: Inner, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t2.ts, t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.ts) }
+ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.ts) }
+ └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: left semi window join
sql: |
create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
@@ -91,11 +92,12 @@
select t1.ts as t1_ts, t1.v1 as t1_v1, t1.v2 as t1_v2 from t1 where exists (select * from t2 where t1.ts = t2.ts);
stream_plan: |-
StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t1._row_id(hidden)], stream_key: [t1._row_id, t1_ts], pk_columns: [t1._row_id, t1_ts], pk_conflict: NoCheck, watermark_columns: [t1_ts] }
- └─StreamHashJoin [window] { type: LeftSemi, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts], output: all }
- ├─StreamExchange { dist: HashShard(t1.ts) }
- │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.ts) }
- └─StreamTableScan { table: t2, columns: [t2.ts, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.ts, t1._row_id) }
+ └─StreamHashJoin [window] { type: LeftSemi, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts], output: all }
+ ├─StreamExchange { dist: HashShard(t1.ts) }
+ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.ts) }
+ └─StreamTableScan { table: t2, columns: [t2.ts, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: interval join(left outer join)
sql: |
create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
@@ -108,13 +110,14 @@
└─LogicalScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id] }
stream_plan: |-
StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t2_ts, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1], pk_columns: [t1._row_id, t2._row_id, t1_v1], pk_conflict: NoCheck, watermark_columns: [t1_ts, t2_ts] }
- └─StreamHashJoin [interval] { type: LeftOuter, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
- │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v1) }
- └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
- └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+ └─StreamHashJoin [interval] { type: LeftOuter, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
+ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v1) }
+ └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
+ └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: interval join (inner join)
sql: |
create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
@@ -127,13 +130,14 @@
└─LogicalScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id] }
stream_plan: |-
StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t2_ts, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1], pk_columns: [t1._row_id, t2._row_id, t1_v1], pk_conflict: NoCheck, watermark_columns: [t1_ts, t2_ts] }
- └─StreamHashJoin [interval, append_only] { type: Inner, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
- ├─StreamExchange { dist: HashShard(t1.v1) }
- │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
- │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
- └─StreamExchange { dist: HashShard(t2.v1) }
- └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
- └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+ └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+ └─StreamHashJoin [interval, append_only] { type: Inner, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+ ├─StreamExchange { dist: HashShard(t1.v1) }
+ │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
+ │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+ └─StreamExchange { dist: HashShard(t2.v1) }
+ └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
+ └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
- name: union all
sql: |
create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
diff --git a/src/frontend/planner_test/tests/testdata/output/window_join.yaml b/src/frontend/planner_test/tests/testdata/output/window_join.yaml
index 4113a6021e866..17c5e76f6e806 100644
--- a/src/frontend/planner_test/tests/testdata/output/window_join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/window_join.yaml
@@ -12,15 +12,16 @@
select * from t1, t2 where ts1 = ts2 and a1 = a2;
stream_plan: |-
StreamMaterialize { columns: [ts1, a1, b1, ts2, a2, b2, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, ts1, a1], pk_columns: [_row_id, _row_id#1, ts1, a1], pk_conflict: NoCheck, watermark_columns: [ts1, ts2] }
- └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
- ├─StreamExchange { dist: HashShard(ts1, a1) }
- │ └─StreamRowIdGen { row_id_index: 3 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
- │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
- └─StreamExchange { dist: HashShard(ts2, a2) }
- └─StreamRowIdGen { row_id_index: 3 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
- └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
+ └─StreamExchange { dist: HashShard(ts1, a1, _row_id, _row_id) }
+ └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard(ts1, a1) }
+ │ └─StreamRowIdGen { row_id_index: 3 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
+ │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
+ └─StreamExchange { dist: HashShard(ts2, a2) }
+ └─StreamRowIdGen { row_id_index: 3 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
+ └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
- name: Window join expression reorder
sql: |
create source t1 (ts1 timestamp with time zone, a1 int, b1 int, watermark for ts1 as ts1 - INTERVAL '1' SECOND) with (
@@ -34,12 +35,13 @@
select * from t1, t2 where a1 = a2 and ts1 = ts2;
stream_plan: |-
StreamMaterialize { columns: [ts1, a1, b1, ts2, a2, b2, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, a1, ts1], pk_columns: [_row_id, _row_id#1, a1, ts1], pk_conflict: NoCheck, watermark_columns: [ts1, ts2] }
- └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
- ├─StreamExchange { dist: HashShard(ts1, a1) }
- │ └─StreamRowIdGen { row_id_index: 3 }
- │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
- │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
- └─StreamExchange { dist: HashShard(ts2, a2) }
- └─StreamRowIdGen { row_id_index: 3 }
- └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
- └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
+ └─StreamExchange { dist: HashShard(ts1, a1, _row_id, _row_id) }
+ └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
+ ├─StreamExchange { dist: HashShard(ts1, a1) }
+ │ └─StreamRowIdGen { row_id_index: 3 }
+ │ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
+ │ └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
+ └─StreamExchange { dist: HashShard(ts2, a2) }
+ └─StreamRowIdGen { row_id_index: 3 }
+ └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
+ └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
diff --git a/src/frontend/src/binder/mod.rs b/src/frontend/src/binder/mod.rs
index 974730cd16237..f1038f9bf5943 100644
--- a/src/frontend/src/binder/mod.rs
+++ b/src/frontend/src/binder/mod.rs
@@ -363,6 +363,13 @@ impl Binder {
}
}
+/// The column name stored in [`BindContext`] for a column without an alias.
+pub const UNNAMED_COLUMN: &str = "?column?";
+/// The table name stored in [`BindContext`] for a subquery without an alias.
+const UNNAMED_SUBQUERY: &str = "?subquery?";
+/// The table name stored in [`BindContext`] for a column group.
+const COLUMN_GROUP_PREFIX: &str = "?column_group_id?";
+
#[cfg(test)]
pub mod test_utils {
use risingwave_common::types::DataType;
@@ -380,10 +387,3 @@ pub mod test_utils {
Binder::new_with_param_types(&SessionImpl::mock(), param_types)
}
}
-
-/// The column name stored in [`BindContext`] for a column without an alias.
-pub const UNNAMED_COLUMN: &str = "?column?";
-/// The table name stored in [`BindContext`] for a subquery without an alias.
-const UNNAMED_SUBQUERY: &str = "?subquery?";
-/// The table name stored in [`BindContext`] for a column group.
-const COLUMN_GROUP_PREFIX: &str = "?column_group_id?";
diff --git a/src/frontend/src/handler/create_mv.rs b/src/frontend/src/handler/create_mv.rs
index 3fa9129f39743..053ba5aa30f19 100644
--- a/src/frontend/src/handler/create_mv.rs
+++ b/src/frontend/src/handler/create_mv.rs
@@ -26,6 +26,7 @@ use crate::binder::{Binder, BoundQuery, BoundSetExpr};
use crate::catalog::{check_valid_column_name, CatalogError};
use crate::handler::privilege::resolve_query_privileges;
use crate::handler::HandlerArgs;
+use crate::optimizer::plan_node::generic::GenericPlanRef;
use crate::optimizer::plan_node::Explain;
use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef, RelationCollectorVisitor};
use crate::planner::Planner;
@@ -175,7 +176,7 @@ It only indicates the physical clustering of the data, which may improve the per
let (plan, table) =
gen_create_mv_plan(&session, context.into(), query, name, columns, emit_mode)?;
- let context = plan.plan_base().ctx.clone();
+ let context = plan.plan_base().ctx().clone();
let mut graph = build_graph(plan);
graph.parallelism = session
.config()
diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
index 32279dd4e70eb..ddb1d697b856d 100644
--- a/src/frontend/src/handler/create_sink.rs
+++ b/src/frontend/src/handler/create_sink.rs
@@ -244,6 +244,7 @@ fn bind_sink_format_desc(value: SinkSchema) -> Result {
E::Json => SinkEncode::Json,
E::Protobuf => SinkEncode::Protobuf,
E::Avro => SinkEncode::Avro,
+ E::Template => SinkEncode::Template,
e @ (E::Native | E::Csv | E::Bytes) => {
return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into())
}
@@ -262,6 +263,7 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock vec![Encode::Json],
Format::Debezium => vec![Encode::Json],
),
+ RedisSink::SINK_NAME => hashmap!(
+ Format::Plain => vec![Encode::Json,Encode::Template],
+ Format::Upsert => vec![Encode::Json,Encode::Template],
+ ),
))
});
pub fn validate_compatibility(connector: &str, format_desc: &SinkSchema) -> Result<()> {
diff --git a/src/frontend/src/lib.rs b/src/frontend/src/lib.rs
index 0a036b8e96233..450f49b6394cf 100644
--- a/src/frontend/src/lib.rs
+++ b/src/frontend/src/lib.rs
@@ -15,7 +15,7 @@
#![allow(clippy::derive_partial_eq_without_eq)]
#![feature(map_try_insert)]
#![feature(negative_impls)]
-#![feature(generators)]
+#![feature(coroutines)]
#![feature(proc_macro_hygiene, stmt_expr_attributes)]
#![feature(trait_alias)]
#![feature(extract_if)]
@@ -32,7 +32,6 @@
#![feature(extend_one)]
#![feature(type_alias_impl_trait)]
#![feature(impl_trait_in_assoc_type)]
-#![feature(async_fn_in_trait)]
#![feature(result_flattening)]
#![recursion_limit = "256"]
diff --git a/src/frontend/src/optimizer/mod.rs b/src/frontend/src/optimizer/mod.rs
index b4238f57b1f52..0df387b0a53d5 100644
--- a/src/frontend/src/optimizer/mod.rs
+++ b/src/frontend/src/optimizer/mod.rs
@@ -616,7 +616,7 @@ fn exist_and_no_exchange_before(plan: &PlanRef, is_candidate: fn(&PlanRef) -> bo
fn require_additional_exchange_on_root_in_distributed_mode(plan: PlanRef) -> bool {
fn is_user_table(plan: &PlanRef) -> bool {
plan.as_batch_seq_scan()
- .map(|node| !node.logical().is_sys_table)
+ .map(|node| !node.core().is_sys_table)
.unwrap_or(false)
}
@@ -649,7 +649,7 @@ fn require_additional_exchange_on_root_in_distributed_mode(plan: PlanRef) -> boo
fn require_additional_exchange_on_root_in_local_mode(plan: PlanRef) -> bool {
fn is_user_table(plan: &PlanRef) -> bool {
plan.as_batch_seq_scan()
- .map(|node| !node.logical().is_sys_table)
+ .map(|node| !node.core().is_sys_table)
.unwrap_or(false)
}
diff --git a/src/frontend/src/optimizer/plan_node/batch.rs b/src/frontend/src/optimizer/plan_node/batch.rs
index d62a85095d21c..2cb2360b3e51d 100644
--- a/src/frontend/src/optimizer/plan_node/batch.rs
+++ b/src/frontend/src/optimizer/plan_node/batch.rs
@@ -12,14 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-use super::generic::GenericPlanRef;
+use super::generic::PhysicalPlanRef;
use crate::optimizer::property::Order;
-/// A subtrait of [`GenericPlanRef`] for batch plans.
+/// A subtrait of [`PhysicalPlanRef`] for batch plans.
///
/// Due to the lack of refactoring, all plan nodes currently implement this trait
/// through [`super::PlanBase`]. One may still use this trait as a bound for
-/// expecting a batch plan, in contrast to [`GenericPlanRef`].
-pub trait BatchPlanRef: GenericPlanRef {
+/// accessing a batch plan, in contrast to [`GenericPlanRef`] or
+/// [`PhysicalPlanRef`].
+///
+/// [`GenericPlanRef`]: super::generic::GenericPlanRef
+pub trait BatchPlanRef: PhysicalPlanRef {
fn order(&self) -> &Order;
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_delete.rs b/src/frontend/src/optimizer/plan_node/batch_delete.rs
index 600ec6827e3eb..85d22a46b450e 100644
--- a/src/frontend/src/optimizer/plan_node/batch_delete.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_delete.rs
@@ -27,35 +27,32 @@ use crate::optimizer::property::{Distribution, Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchDelete {
pub base: PlanBase,
- pub logical: generic::Delete,
+ pub core: generic::Delete,
}
impl BatchDelete {
- pub fn new(logical: generic::Delete) -> Self {
- assert_eq!(logical.input.distribution(), &Distribution::Single);
- let base: PlanBase = PlanBase::new_batch_from_logical(
- &logical,
- logical.input.distribution().clone(),
- Order::any(),
- );
- Self { base, logical }
+ pub fn new(core: generic::Delete) -> Self {
+ assert_eq!(core.input.distribution(), &Distribution::Single);
+ let base: PlanBase =
+ PlanBase::new_batch_with_core(&core, core.input.distribution().clone(), Order::any());
+ Self { base, core }
}
}
impl PlanTreeNodeUnary for BatchDelete {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut core = self.logical.clone();
+ let mut core = self.core.clone();
core.input = input;
Self::new(core)
}
}
impl_plan_tree_node_for_unary! { BatchDelete }
-impl_distill_by_unit!(BatchDelete, logical, "BatchDelete");
+impl_distill_by_unit!(BatchDelete, core, "BatchDelete");
impl ToDistributedBatch for BatchDelete {
fn to_distributed(&self) -> Result {
@@ -68,9 +65,9 @@ impl ToDistributedBatch for BatchDelete {
impl ToBatchPb for BatchDelete {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::Delete(DeleteNode {
- table_id: self.logical.table_id.table_id(),
- table_version_id: self.logical.table_version_id,
- returning: self.logical.returning,
+ table_id: self.core.table_id.table_id(),
+ table_version_id: self.core.table_version_id,
+ returning: self.core.returning,
})
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_exchange.rs b/src/frontend/src/optimizer/plan_node/batch_exchange.rs
index 583838e877c5e..6477c7ec213e2 100644
--- a/src/frontend/src/optimizer/plan_node/batch_exchange.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_exchange.rs
@@ -17,6 +17,8 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::{ExchangeNode, MergeSortExchangeNode};
+use super::batch::BatchPlanRef;
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
use super::utils::{childless_record, Distill};
use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
use crate::optimizer::plan_node::ToLocalBatch;
@@ -43,12 +45,12 @@ impl Distill for BatchExchange {
fn distill<'a>(&self) -> XmlNode<'a> {
let input_schema = self.input.schema();
let order = OrderDisplay {
- order: &self.base.order,
+ order: self.base.order(),
input_schema,
}
.distill();
let dist = Pretty::display(&DistributionDisplay {
- distribution: &self.base.dist,
+ distribution: self.base.distribution(),
input_schema,
});
childless_record("BatchExchange", vec![("order", order), ("dist", dist)])
@@ -75,18 +77,18 @@ impl ToDistributedBatch for BatchExchange {
/// The serialization of Batch Exchange is default cuz it will be rewritten in scheduler.
impl ToBatchPb for BatchExchange {
fn to_batch_prost_body(&self) -> NodeBody {
- if self.base.order.is_any() {
+ if self.base.order().is_any() {
NodeBody::Exchange(ExchangeNode {
sources: vec![],
- input_schema: self.base.schema.to_prost(),
+ input_schema: self.base.schema().to_prost(),
})
} else {
NodeBody::MergeSortExchange(MergeSortExchangeNode {
exchange: Some(ExchangeNode {
sources: vec![],
- input_schema: self.base.schema.to_prost(),
+ input_schema: self.base.schema().to_prost(),
}),
- column_orders: self.base.order.to_protobuf(),
+ column_orders: self.base.order().to_protobuf(),
})
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_expand.rs b/src/frontend/src/optimizer/plan_node/batch_expand.rs
index 72caa27858be1..af4413b9e5152 100644
--- a/src/frontend/src/optimizer/plan_node/batch_expand.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_expand.rs
@@ -29,38 +29,38 @@ use crate::optimizer::PlanRef;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchExpand {
pub base: PlanBase,
- logical: generic::Expand,
+ core: generic::Expand,
}
impl BatchExpand {
- pub fn new(logical: generic::Expand) -> Self {
- let dist = match logical.input.distribution() {
+ pub fn new(core: generic::Expand) -> Self {
+ let dist = match core.input.distribution() {
Distribution::Single => Distribution::Single,
Distribution::SomeShard
| Distribution::HashShard(_)
| Distribution::UpstreamHashShard(_, _) => Distribution::SomeShard,
Distribution::Broadcast => unreachable!(),
};
- let base = PlanBase::new_batch_from_logical(&logical, dist, Order::any());
- BatchExpand { base, logical }
+ let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
+ BatchExpand { base, core }
}
pub fn column_subsets(&self) -> &[Vec] {
- &self.logical.column_subsets
+ &self.core.column_subsets
}
}
-impl_distill_by_unit!(BatchExpand, logical, "BatchExpand");
+impl_distill_by_unit!(BatchExpand, core, "BatchExpand");
impl PlanTreeNodeUnary for BatchExpand {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_filter.rs b/src/frontend/src/optimizer/plan_node/batch_filter.rs
index aadbda9800b16..4bff7cbfee3c0 100644
--- a/src/frontend/src/optimizer/plan_node/batch_filter.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_filter.rs
@@ -26,35 +26,35 @@ use crate::utils::Condition;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchFilter {
pub base: PlanBase,
- logical: generic::Filter,
+ core: generic::Filter,
}
impl BatchFilter {
- pub fn new(logical: generic::Filter) -> Self {
+ pub fn new(core: generic::Filter) -> Self {
// TODO: derive from input
- let base = PlanBase::new_batch_from_logical(
- &logical,
- logical.input.distribution().clone(),
- logical.input.order().clone(),
+ let base = PlanBase::new_batch_with_core(
+ &core,
+ core.input.distribution().clone(),
+ core.input.order().clone(),
);
- BatchFilter { base, logical }
+ BatchFilter { base, core }
}
pub fn predicate(&self) -> &Condition {
- &self.logical.predicate
+ &self.core.predicate
}
}
-impl_distill_by_unit!(BatchFilter, logical, "BatchFilter");
+impl_distill_by_unit!(BatchFilter, core, "BatchFilter");
impl PlanTreeNodeUnary for BatchFilter {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -70,7 +70,7 @@ impl ToDistributedBatch for BatchFilter {
impl ToBatchPb for BatchFilter {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::Filter(FilterNode {
- search_condition: Some(ExprImpl::from(self.logical.predicate.clone()).to_expr_proto()),
+ search_condition: Some(ExprImpl::from(self.core.predicate.clone()).to_expr_proto()),
})
}
}
@@ -88,8 +88,8 @@ impl ExprRewritable for BatchFilter {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_group_topn.rs b/src/frontend/src/optimizer/plan_node/batch_group_topn.rs
index 1d61b4e9eb379..70ee8328623f5 100644
--- a/src/frontend/src/optimizer/plan_node/batch_group_topn.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_group_topn.rs
@@ -27,36 +27,33 @@ use crate::optimizer::property::{Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchGroupTopN {
pub base: PlanBase,
- logical: generic::TopN,
+ core: generic::TopN,
}
impl BatchGroupTopN {
- pub fn new(logical: generic::TopN) -> Self {
- assert!(!logical.group_key.is_empty());
- let base = PlanBase::new_batch_from_logical(
- &logical,
- logical.input.distribution().clone(),
- Order::any(),
- );
- BatchGroupTopN { base, logical }
+ pub fn new(core: generic::TopN) -> Self {
+ assert!(!core.group_key.is_empty());
+ let base =
+ PlanBase::new_batch_with_core(&core, core.input.distribution().clone(), Order::any());
+ BatchGroupTopN { base, core }
}
fn group_key(&self) -> &[usize] {
- &self.logical.group_key
+ &self.core.group_key
}
}
-impl_distill_by_unit!(BatchGroupTopN, logical, "BatchGroupTopN");
+impl_distill_by_unit!(BatchGroupTopN, core, "BatchGroupTopN");
impl PlanTreeNodeUnary for BatchGroupTopN {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -73,13 +70,13 @@ impl ToDistributedBatch for BatchGroupTopN {
impl ToBatchPb for BatchGroupTopN {
fn to_batch_prost_body(&self) -> NodeBody {
- let column_orders = self.logical.order.to_protobuf();
+ let column_orders = self.core.order.to_protobuf();
NodeBody::GroupTopN(GroupTopNNode {
- limit: self.logical.limit_attr.limit(),
- offset: self.logical.offset,
+ limit: self.core.limit_attr.limit(),
+ offset: self.core.offset,
column_orders,
group_key: self.group_key().iter().map(|c| *c as u32).collect(),
- with_ties: self.logical.limit_attr.with_ties(),
+ with_ties: self.core.limit_attr.with_ties(),
})
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs b/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs
index 7100125dcee99..b4ab3341ace29 100644
--- a/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs
@@ -31,27 +31,27 @@ use crate::utils::{ColIndexMappingRewriteExt, IndexSet};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchHashAgg {
pub base: PlanBase,
- logical: generic::Agg,
+ core: generic::Agg,
}
impl BatchHashAgg {
- pub fn new(logical: generic::Agg) -> Self {
- assert!(!logical.group_key.is_empty());
- let input = logical.input.clone();
+ pub fn new(core: generic::Agg) -> Self {
+ assert!(!core.group_key.is_empty());
+ let input = core.input.clone();
let input_dist = input.distribution();
- let dist = logical
+ let dist = core
.i2o_col_mapping()
.rewrite_provided_distribution(input_dist);
- let base = PlanBase::new_batch_from_logical(&logical, dist, Order::any());
- BatchHashAgg { base, logical }
+ let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
+ BatchHashAgg { base, core }
}
pub fn agg_calls(&self) -> &[PlanAggCall] {
- &self.logical.agg_calls
+ &self.core.agg_calls
}
pub fn group_key(&self) -> &IndexSet {
- &self.logical.group_key
+ &self.core.group_key
}
fn to_two_phase_agg(&self, dist_input: PlanRef) -> Result {
@@ -68,7 +68,7 @@ impl BatchHashAgg {
// insert total agg
let total_agg_types = self
- .logical
+ .core
.agg_calls
.iter()
.enumerate()
@@ -95,29 +95,27 @@ impl BatchHashAgg {
}
}
-impl_distill_by_unit!(BatchHashAgg, logical, "BatchHashAgg");
+impl_distill_by_unit!(BatchHashAgg, core, "BatchHashAgg");
impl PlanTreeNodeUnary for BatchHashAgg {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
impl_plan_tree_node_for_unary! { BatchHashAgg }
impl ToDistributedBatch for BatchHashAgg {
fn to_distributed(&self) -> Result {
- if self.logical.must_try_two_phase_agg() {
+ if self.core.must_try_two_phase_agg() {
let input = self.input().to_distributed()?;
let input_dist = input.distribution();
- if !self
- .logical
- .hash_agg_dist_satisfied_by_input_dist(input_dist)
+ if !self.core.hash_agg_dist_satisfied_by_input_dist(input_dist)
&& matches!(
input_dist,
Distribution::HashShard(_)
@@ -162,8 +160,8 @@ impl ExprRewritable for BatchHashAgg {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_hash_join.rs b/src/frontend/src/optimizer/plan_node/batch_hash_join.rs
index a4ecf8311a479..bad586d4af1e4 100644
--- a/src/frontend/src/optimizer/plan_node/batch_hash_join.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_hash_join.rs
@@ -36,7 +36,7 @@ use crate::utils::ColIndexMappingRewriteExt;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchHashJoin {
pub base: PlanBase,
- logical: generic::Join,
+ core: generic::Join,
/// The join condition must be equivalent to `logical.on`, but separated into equal and
/// non-equal parts to facilitate execution later
@@ -44,17 +44,13 @@ pub struct BatchHashJoin {
}
impl BatchHashJoin {
- pub fn new(logical: generic::Join, eq_join_predicate: EqJoinPredicate) -> Self {
- let dist = Self::derive_dist(
- logical.left.distribution(),
- logical.right.distribution(),
- &logical,
- );
- let base = PlanBase::new_batch_from_logical(&logical, dist, Order::any());
+ pub fn new(core: generic::Join, eq_join_predicate: EqJoinPredicate) -> Self {
+ let dist = Self::derive_dist(core.left.distribution(), core.right.distribution(), &core);
+ let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
Self {
base,
- logical,
+ core,
eq_join_predicate,
}
}
@@ -62,25 +58,21 @@ impl BatchHashJoin {
pub(super) fn derive_dist(
left: &Distribution,
right: &Distribution,
- logical: &generic::Join,
+ join: &generic::Join,
) -> Distribution {
match (left, right) {
(Distribution::Single, Distribution::Single) => Distribution::Single,
// we can not derive the hash distribution from the side where outer join can generate a
// NULL row
- (Distribution::HashShard(_), Distribution::HashShard(_)) => match logical.join_type {
+ (Distribution::HashShard(_), Distribution::HashShard(_)) => match join.join_type {
JoinType::Unspecified => unreachable!(),
JoinType::FullOuter => Distribution::SomeShard,
JoinType::Inner | JoinType::LeftOuter | JoinType::LeftSemi | JoinType::LeftAnti => {
- let l2o = logical
- .l2i_col_mapping()
- .composite(&logical.i2o_col_mapping());
+ let l2o = join.l2i_col_mapping().composite(&join.i2o_col_mapping());
l2o.rewrite_provided_distribution(left)
}
JoinType::RightSemi | JoinType::RightAnti | JoinType::RightOuter => {
- let r2o = logical
- .r2i_col_mapping()
- .composite(&logical.i2o_col_mapping());
+ let r2o = join.r2i_col_mapping().composite(&join.i2o_col_mapping());
r2o.rewrite_provided_distribution(right)
}
},
@@ -99,11 +91,11 @@ impl BatchHashJoin {
impl Distill for BatchHashJoin {
fn distill<'a>(&self) -> XmlNode<'a> {
- let verbose = self.base.ctx.is_explain_verbose();
+ let verbose = self.base.ctx().is_explain_verbose();
let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
- vec.push(("type", Pretty::debug(&self.logical.join_type)));
+ vec.push(("type", Pretty::debug(&self.core.join_type)));
- let concat_schema = self.logical.concat_schema();
+ let concat_schema = self.core.concat_schema();
vec.push((
"predicate",
Pretty::debug(&EqJoinPredicateDisplay {
@@ -112,7 +104,7 @@ impl Distill for BatchHashJoin {
}),
));
if verbose {
- let data = IndicesDisplay::from_join(&self.logical, &concat_schema);
+ let data = IndicesDisplay::from_join(&self.core, &concat_schema);
vec.push(("output", data));
}
childless_record("BatchHashJoin", vec)
@@ -121,18 +113,18 @@ impl Distill for BatchHashJoin {
impl PlanTreeNodeBinary for BatchHashJoin {
fn left(&self) -> PlanRef {
- self.logical.left.clone()
+ self.core.left.clone()
}
fn right(&self) -> PlanRef {
- self.logical.right.clone()
+ self.core.right.clone()
}
fn clone_with_left_right(&self, left: PlanRef, right: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.left = left;
- logical.right = right;
- Self::new(logical, self.eq_join_predicate.clone())
+ let mut core = self.core.clone();
+ core.left = left;
+ core.right = right;
+ Self::new(core, self.eq_join_predicate.clone())
}
}
@@ -200,7 +192,7 @@ impl ToDistributedBatch for BatchHashJoin {
impl ToBatchPb for BatchHashJoin {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::HashJoin(HashJoinNode {
- join_type: self.logical.join_type as i32,
+ join_type: self.core.join_type as i32,
left_key: self
.eq_join_predicate
.left_eq_indexes()
@@ -219,12 +211,7 @@ impl ToBatchPb for BatchHashJoin {
.other_cond()
.as_expr_unless_true()
.map(|x| x.to_expr_proto()),
- output_indices: self
- .logical
- .output_indices
- .iter()
- .map(|&x| x as u32)
- .collect(),
+ output_indices: self.core.output_indices.iter().map(|&x| x as u32).collect(),
})
}
}
@@ -246,8 +233,8 @@ impl ExprRewritable for BatchHashJoin {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical, self.eq_join_predicate.rewrite_exprs(r)).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core, self.eq_join_predicate.rewrite_exprs(r)).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_hop_window.rs b/src/frontend/src/optimizer/plan_node/batch_hop_window.rs
index c4b84b7232d1a..2a4a27f9a0583 100644
--- a/src/frontend/src/optimizer/plan_node/batch_hop_window.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_hop_window.rs
@@ -30,45 +30,42 @@ use crate::utils::ColIndexMappingRewriteExt;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchHopWindow {
pub base: PlanBase,
- logical: generic::HopWindow,
+ core: generic::HopWindow,
window_start_exprs: Vec,
window_end_exprs: Vec,
}
impl BatchHopWindow {
pub fn new(
- logical: generic::HopWindow,
+ core: generic::HopWindow,
window_start_exprs: Vec,
window_end_exprs: Vec,
) -> Self {
- let distribution = logical
+ let distribution = core
.i2o_col_mapping()
- .rewrite_provided_distribution(logical.input.distribution());
- let base = PlanBase::new_batch_from_logical(
- &logical,
- distribution,
- logical.get_out_column_index_order(),
- );
+ .rewrite_provided_distribution(core.input.distribution());
+ let base =
+ PlanBase::new_batch_with_core(&core, distribution, core.get_out_column_index_order());
BatchHopWindow {
base,
- logical,
+ core,
window_start_exprs,
window_end_exprs,
}
}
}
-impl_distill_by_unit!(BatchHopWindow, logical, "BatchHopWindow");
+impl_distill_by_unit!(BatchHopWindow, core, "BatchHopWindow");
impl PlanTreeNodeUnary for BatchHopWindow {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
+ let mut core = self.core.clone();
+ core.input = input;
Self::new(
- logical,
+ core,
self.window_start_exprs.clone(),
self.window_end_exprs.clone(),
)
@@ -92,13 +89,13 @@ impl ToDistributedBatch for BatchHopWindow {
// communication.
// We pass the required dist to its input.
let input_required = self
- .logical
+ .core
.o2i_col_mapping()
.rewrite_required_distribution(required_dist);
let new_input = self
.input()
.to_distributed_with_required(required_order, &input_required)?;
- let mut new_logical = self.logical.clone();
+ let mut new_logical = self.core.clone();
new_logical.input = new_input;
let batch_plan = BatchHopWindow::new(
new_logical,
@@ -113,15 +110,10 @@ impl ToDistributedBatch for BatchHopWindow {
impl ToBatchPb for BatchHopWindow {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::HopWindow(HopWindowNode {
- time_col: self.logical.time_col.index() as _,
- window_slide: Some(self.logical.window_slide.into()),
- window_size: Some(self.logical.window_size.into()),
- output_indices: self
- .logical
- .output_indices
- .iter()
- .map(|&x| x as u32)
- .collect(),
+ time_col: self.core.time_col.index() as _,
+ window_slide: Some(self.core.window_slide.into()),
+ window_size: Some(self.core.window_size.into()),
+ output_indices: self.core.output_indices.iter().map(|&x| x as u32).collect(),
window_start_exprs: self
.window_start_exprs
.clone()
@@ -152,7 +144,7 @@ impl ExprRewritable for BatchHopWindow {
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
Self::new(
- self.logical.clone(),
+ self.core.clone(),
self.window_start_exprs
.clone()
.into_iter()
diff --git a/src/frontend/src/optimizer/plan_node/batch_insert.rs b/src/frontend/src/optimizer/plan_node/batch_insert.rs
index 305de0e2f6eaa..aec05eee145b8 100644
--- a/src/frontend/src/optimizer/plan_node/batch_insert.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_insert.rs
@@ -18,6 +18,7 @@ use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::InsertNode;
use risingwave_pb::plan_common::{DefaultColumns, IndexAndExpr};
+use super::generic::GenericPlanRef;
use super::utils::{childless_record, Distill};
use super::{generic, ExprRewritable, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
use crate::expr::Expr;
@@ -28,40 +29,37 @@ use crate::optimizer::property::{Distribution, Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchInsert {
pub base: PlanBase,
- pub logical: generic::Insert,
+ pub core: generic::Insert,
}
impl BatchInsert {
- pub fn new(logical: generic::Insert) -> Self {
- assert_eq!(logical.input.distribution(), &Distribution::Single);
- let base: PlanBase = PlanBase::new_batch_from_logical(
- &logical,
- logical.input.distribution().clone(),
- Order::any(),
- );
+ pub fn new(core: generic::Insert) -> Self {
+ assert_eq!(core.input.distribution(), &Distribution::Single);
+ let base: PlanBase =
+ PlanBase::new_batch_with_core(&core, core.input.distribution().clone(), Order::any());
- BatchInsert { base, logical }
+ BatchInsert { base, core }
}
}
impl Distill for BatchInsert {
fn distill<'a>(&self) -> XmlNode<'a> {
let vec = self
- .logical
- .fields_pretty(self.base.ctx.is_explain_verbose());
+ .core
+ .fields_pretty(self.base.ctx().is_explain_verbose());
childless_record("BatchInsert", vec)
}
}
impl PlanTreeNodeUnary for BatchInsert {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -77,14 +75,9 @@ impl ToDistributedBatch for BatchInsert {
impl ToBatchPb for BatchInsert {
fn to_batch_prost_body(&self) -> NodeBody {
- let column_indices = self
- .logical
- .column_indices
- .iter()
- .map(|&i| i as u32)
- .collect();
+ let column_indices = self.core.column_indices.iter().map(|&i| i as u32).collect();
- let default_columns = &self.logical.default_columns;
+ let default_columns = &self.core.default_columns;
let has_default_columns = !default_columns.is_empty();
let default_columns = DefaultColumns {
default_columns: default_columns
@@ -96,16 +89,16 @@ impl ToBatchPb for BatchInsert {
.collect(),
};
NodeBody::Insert(InsertNode {
- table_id: self.logical.table_id.table_id(),
- table_version_id: self.logical.table_version_id,
+ table_id: self.core.table_id.table_id(),
+ table_version_id: self.core.table_version_id,
column_indices,
default_columns: if has_default_columns {
Some(default_columns)
} else {
None
},
- row_id_index: self.logical.row_id_index.map(|index| index as _),
- returning: self.logical.returning,
+ row_id_index: self.core.row_id_index.map(|index| index as _),
+ returning: self.core.returning,
})
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_limit.rs b/src/frontend/src/optimizer/plan_node/batch_limit.rs
index e617f1c2cd544..93b14d0198979 100644
--- a/src/frontend/src/optimizer/plan_node/batch_limit.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_limit.rs
@@ -16,6 +16,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::LimitNode;
+use super::generic::PhysicalPlanRef;
use super::utils::impl_distill_by_unit;
use super::{
generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch,
@@ -27,21 +28,21 @@ use crate::optimizer::property::{Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchLimit {
pub base: PlanBase,
- logical: generic::Limit,
+ core: generic::Limit,
}
impl BatchLimit {
- pub fn new(logical: generic::Limit) -> Self {
- let base = PlanBase::new_batch_from_logical(
- &logical,
- logical.input.distribution().clone(),
- logical.input.order().clone(),
+ pub fn new(core: generic::Limit) -> Self {
+ let base = PlanBase::new_batch_with_core(
+ &core,
+ core.input.distribution().clone(),
+ core.input.order().clone(),
);
- BatchLimit { base, logical }
+ BatchLimit { base, core }
}
fn two_phase_limit(&self, new_input: PlanRef) -> Result {
- let new_limit = self.logical.limit + self.logical.offset;
+ let new_limit = self.core.limit + self.core.offset;
let new_offset = 0;
let logical_partial_limit = generic::Limit::new(new_input.clone(), new_limit, new_offset);
let batch_partial_limit = Self::new(logical_partial_limit);
@@ -60,27 +61,27 @@ impl BatchLimit {
}
pub fn limit(&self) -> u64 {
- self.logical.limit
+ self.core.limit
}
pub fn offset(&self) -> u64 {
- self.logical.offset
+ self.core.offset
}
}
impl PlanTreeNodeUnary for BatchLimit {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut core = self.logical.clone();
+ let mut core = self.core.clone();
core.input = input;
Self::new(core)
}
}
impl_plan_tree_node_for_unary! {BatchLimit}
-impl_distill_by_unit!(BatchLimit, logical, "BatchLimit");
+impl_distill_by_unit!(BatchLimit, core, "BatchLimit");
impl ToDistributedBatch for BatchLimit {
fn to_distributed(&self) -> Result {
@@ -91,8 +92,8 @@ impl ToDistributedBatch for BatchLimit {
impl ToBatchPb for BatchLimit {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::Limit(LimitNode {
- limit: self.logical.limit,
- offset: self.logical.offset,
+ limit: self.core.limit,
+ offset: self.core.offset,
})
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs b/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs
index 3098019499b76..48f99668c3af7 100644
--- a/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs
@@ -18,7 +18,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::{DistributedLookupJoinNode, LocalLookupJoinNode};
-use super::generic::{self};
+use super::generic::{self, GenericPlanRef};
use super::utils::{childless_record, Distill};
use super::ExprRewritable;
use crate::expr::{Expr, ExprRewriter};
@@ -34,7 +34,7 @@ use crate::utils::ColIndexMappingRewriteExt;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchLookupJoin {
pub base: PlanBase,
- logical: generic::Join,
+ core: generic::Join,
/// The join condition must be equivalent to `logical.on`, but separated into equal and
/// non-equal parts to facilitate execution later
@@ -56,7 +56,7 @@ pub struct BatchLookupJoin {
impl BatchLookupJoin {
pub fn new(
- logical: generic::Join,
+ core: generic::Join,
eq_join_predicate: EqJoinPredicate,
right_table_desc: TableDesc,
right_output_column_ids: Vec,
@@ -67,11 +67,11 @@ impl BatchLookupJoin {
// lookup.
assert!(eq_join_predicate.has_eq());
assert!(eq_join_predicate.eq_keys_are_type_aligned());
- let dist = Self::derive_dist(logical.left.distribution(), &logical);
- let base = PlanBase::new_batch_from_logical(&logical, dist, Order::any());
+ let dist = Self::derive_dist(core.left.distribution(), &core);
+ let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
Self {
base,
- logical,
+ core,
eq_join_predicate,
right_table_desc,
right_output_column_ids,
@@ -80,13 +80,11 @@ impl BatchLookupJoin {
}
}
- fn derive_dist(left: &Distribution, logical: &generic::Join) -> Distribution {
+ fn derive_dist(left: &Distribution, core: &generic::Join) -> Distribution {
match left {
Distribution::Single => Distribution::Single,
Distribution::HashShard(_) | Distribution::UpstreamHashShard(_, _) => {
- let l2o = logical
- .l2i_col_mapping()
- .composite(&logical.i2o_col_mapping());
+ let l2o = core.l2i_col_mapping().composite(&core.i2o_col_mapping());
l2o.rewrite_provided_distribution(left)
}
_ => unreachable!(),
@@ -114,11 +112,11 @@ impl BatchLookupJoin {
impl Distill for BatchLookupJoin {
fn distill<'a>(&self) -> XmlNode<'a> {
- let verbose = self.base.ctx.is_explain_verbose();
+ let verbose = self.base.ctx().is_explain_verbose();
let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
- vec.push(("type", Pretty::debug(&self.logical.join_type)));
+ vec.push(("type", Pretty::debug(&self.core.join_type)));
- let concat_schema = self.logical.concat_schema();
+ let concat_schema = self.core.concat_schema();
vec.push((
"predicate",
Pretty::debug(&EqJoinPredicateDisplay {
@@ -128,7 +126,7 @@ impl Distill for BatchLookupJoin {
));
if verbose {
- let data = IndicesDisplay::from_join(&self.logical, &concat_schema);
+ let data = IndicesDisplay::from_join(&self.core, &concat_schema);
vec.push(("output", data));
}
@@ -138,15 +136,15 @@ impl Distill for BatchLookupJoin {
impl PlanTreeNodeUnary for BatchLookupJoin {
fn input(&self) -> PlanRef {
- self.logical.left.clone()
+ self.core.left.clone()
}
// Only change left side
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.left = input;
+ let mut core = self.core.clone();
+ core.left = input;
Self::new(
- logical,
+ core,
self.eq_join_predicate.clone(),
self.right_table_desc.clone(),
self.right_output_column_ids.clone(),
@@ -199,7 +197,7 @@ impl ToBatchPb for BatchLookupJoin {
fn to_batch_prost_body(&self) -> NodeBody {
if self.distributed_lookup {
NodeBody::DistributedLookupJoin(DistributedLookupJoinNode {
- join_type: self.logical.join_type as i32,
+ join_type: self.core.join_type as i32,
condition: self
.eq_join_predicate
.other_cond()
@@ -223,18 +221,13 @@ impl ToBatchPb for BatchLookupJoin {
.iter()
.map(ColumnId::get_id)
.collect(),
- output_indices: self
- .logical
- .output_indices
- .iter()
- .map(|&x| x as u32)
- .collect(),
+ output_indices: self.core.output_indices.iter().map(|&x| x as u32).collect(),
null_safe: self.eq_join_predicate.null_safes(),
lookup_prefix_len: self.lookup_prefix_len as u32,
})
} else {
NodeBody::LocalLookupJoin(LocalLookupJoinNode {
- join_type: self.logical.join_type as i32,
+ join_type: self.core.join_type as i32,
condition: self
.eq_join_predicate
.other_cond()
@@ -259,12 +252,7 @@ impl ToBatchPb for BatchLookupJoin {
.iter()
.map(ColumnId::get_id)
.collect(),
- output_indices: self
- .logical
- .output_indices
- .iter()
- .map(|&x| x as u32)
- .collect(),
+ output_indices: self.core.output_indices.iter().map(|&x| x as u32).collect(),
worker_nodes: vec![], // To be filled in at local.rs
null_safe: self.eq_join_predicate.null_safes(),
lookup_prefix_len: self.lookup_prefix_len as u32,
@@ -289,11 +277,11 @@ impl ExprRewritable for BatchLookupJoin {
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
let base = self.base.clone_with_new_plan_id();
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
Self {
base,
- logical,
+ core,
eq_join_predicate: self.eq_join_predicate.rewrite_exprs(r),
..Self::clone(self)
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs b/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs
index 79d9f07d8eadc..8980ad2f23f6d 100644
--- a/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs
@@ -17,7 +17,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::NestedLoopJoinNode;
-use super::generic::{self};
+use super::generic::{self, GenericPlanRef};
use super::utils::{childless_record, Distill};
use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeBinary, ToBatchPb, ToDistributedBatch};
use crate::expr::{Expr, ExprImpl, ExprRewriter};
@@ -31,14 +31,14 @@ use crate::utils::ConditionDisplay;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchNestedLoopJoin {
pub base: PlanBase,
- logical: generic::Join,
+ core: generic::Join,
}
impl BatchNestedLoopJoin {
- pub fn new(logical: generic::Join) -> Self {
- let dist = Self::derive_dist(logical.left.distribution(), logical.right.distribution());
- let base = PlanBase::new_batch_from_logical(&logical, dist, Order::any());
- Self { base, logical }
+ pub fn new(core: generic::Join) -> Self {
+ let dist = Self::derive_dist(core.left.distribution(), core.right.distribution());
+ let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
+ Self { base, core }
}
fn derive_dist(left: &Distribution, right: &Distribution) -> Distribution {
@@ -51,21 +51,21 @@ impl BatchNestedLoopJoin {
impl Distill for BatchNestedLoopJoin {
fn distill<'a>(&self) -> XmlNode<'a> {
- let verbose = self.base.ctx.is_explain_verbose();
+ let verbose = self.base.ctx().is_explain_verbose();
let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
- vec.push(("type", Pretty::debug(&self.logical.join_type)));
+ vec.push(("type", Pretty::debug(&self.core.join_type)));
- let concat_schema = self.logical.concat_schema();
+ let concat_schema = self.core.concat_schema();
vec.push((
"predicate",
Pretty::debug(&ConditionDisplay {
- condition: &self.logical.on,
+ condition: &self.core.on,
input_schema: &concat_schema,
}),
));
if verbose {
- let data = IndicesDisplay::from_join(&self.logical, &concat_schema);
+ let data = IndicesDisplay::from_join(&self.core, &concat_schema);
vec.push(("output", data));
}
@@ -75,18 +75,18 @@ impl Distill for BatchNestedLoopJoin {
impl PlanTreeNodeBinary for BatchNestedLoopJoin {
fn left(&self) -> PlanRef {
- self.logical.left.clone()
+ self.core.left.clone()
}
fn right(&self) -> PlanRef {
- self.logical.right.clone()
+ self.core.right.clone()
}
fn clone_with_left_right(&self, left: PlanRef, right: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.left = left;
- logical.right = right;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.left = left;
+ core.right = right;
+ Self::new(core)
}
}
@@ -108,14 +108,9 @@ impl ToDistributedBatch for BatchNestedLoopJoin {
impl ToBatchPb for BatchNestedLoopJoin {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::NestedLoopJoin(NestedLoopJoinNode {
- join_type: self.logical.join_type as i32,
- join_cond: Some(ExprImpl::from(self.logical.on.clone()).to_expr_proto()),
- output_indices: self
- .logical
- .output_indices
- .iter()
- .map(|&x| x as u32)
- .collect(),
+ join_type: self.core.join_type as i32,
+ join_cond: Some(ExprImpl::from(self.core.on.clone()).to_expr_proto()),
+ output_indices: self.core.output_indices.iter().map(|&x| x as u32).collect(),
})
}
}
@@ -138,8 +133,8 @@ impl ExprRewritable for BatchNestedLoopJoin {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_over_window.rs b/src/frontend/src/optimizer/plan_node/batch_over_window.rs
index aa6e53246697e..fb455758f331a 100644
--- a/src/frontend/src/optimizer/plan_node/batch_over_window.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_over_window.rs
@@ -17,6 +17,7 @@ use risingwave_common::util::sort_util::{ColumnOrder, OrderType};
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::SortOverWindowNode;
+use super::batch::BatchPlanRef;
use super::generic::PlanWindowFunction;
use super::utils::impl_distill_by_unit;
use super::{
@@ -28,27 +29,26 @@ use crate::optimizer::property::{Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchOverWindow {
pub base: PlanBase,
- logical: generic::OverWindow,
+ core: generic::OverWindow,
}
impl BatchOverWindow {
- pub fn new(logical: generic::OverWindow) -> Self {
- assert!(logical.funcs_have_same_partition_and_order());
+ pub fn new(core: generic::OverWindow) -> Self {
+ assert!(core.funcs_have_same_partition_and_order());
- let input = &logical.input;
+ let input = &core.input;
let input_dist = input.distribution().clone();
let order = Order::new(
- logical
- .partition_key_indices()
+ core.partition_key_indices()
.into_iter()
.map(|idx| ColumnOrder::new(idx, OrderType::default()))
- .chain(logical.order_key().iter().cloned())
+ .chain(core.order_key().iter().cloned())
.collect(),
);
- let base = PlanBase::new_batch_from_logical(&logical, input_dist, order);
- BatchOverWindow { base, logical }
+ let base = PlanBase::new_batch_with_core(&core, input_dist, order);
+ BatchOverWindow { base, core }
}
fn expected_input_order(&self) -> Order {
@@ -56,17 +56,17 @@ impl BatchOverWindow {
}
}
-impl_distill_by_unit!(BatchOverWindow, logical, "BatchOverWindow");
+impl_distill_by_unit!(BatchOverWindow, core, "BatchOverWindow");
impl PlanTreeNodeUnary for BatchOverWindow {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -78,7 +78,7 @@ impl ToDistributedBatch for BatchOverWindow {
&self.expected_input_order(),
&RequiredDist::shard_by_key(
self.input().schema().len(),
- &self.logical.partition_key_indices(),
+ &self.core.partition_key_indices(),
),
)?;
Ok(self.clone_with_input(new_input).into())
@@ -98,13 +98,13 @@ impl ToBatchPb for BatchOverWindow {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::SortOverWindow(SortOverWindowNode {
calls: self
- .logical
+ .core
.window_functions()
.iter()
.map(PlanWindowFunction::to_protobuf)
.collect(),
partition_by: self
- .logical
+ .core
.partition_key_indices()
.into_iter()
.map(|idx| idx as _)
diff --git a/src/frontend/src/optimizer/plan_node/batch_project.rs b/src/frontend/src/optimizer/plan_node/batch_project.rs
index d3979b8aebdee..642683967c5c3 100644
--- a/src/frontend/src/optimizer/plan_node/batch_project.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_project.rs
@@ -18,6 +18,7 @@ use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::ProjectNode;
use risingwave_pb::expr::ExprNode;
+use super::generic::GenericPlanRef;
use super::utils::{childless_record, Distill};
use super::{
generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch,
@@ -31,46 +32,46 @@ use crate::utils::ColIndexMappingRewriteExt;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchProject {
pub base: PlanBase,
- logical: generic::Project,
+ core: generic::Project,
}
impl BatchProject {
- pub fn new(logical: generic::Project) -> Self {
- let distribution = logical
+ pub fn new(core: generic::Project) -> Self {
+ let distribution = core
.i2o_col_mapping()
- .rewrite_provided_distribution(logical.input.distribution());
- let order = logical
+ .rewrite_provided_distribution(core.input.distribution());
+ let order = core
.i2o_col_mapping()
- .rewrite_provided_order(logical.input.order());
+ .rewrite_provided_order(core.input.order());
- let base = PlanBase::new_batch_from_logical(&logical, distribution, order);
- BatchProject { base, logical }
+ let base = PlanBase::new_batch_with_core(&core, distribution, order);
+ BatchProject { base, core }
}
pub fn as_logical(&self) -> &generic::Project {
- &self.logical
+ &self.core
}
pub fn exprs(&self) -> &Vec {
- &self.logical.exprs
+ &self.core.exprs
}
}
impl Distill for BatchProject {
fn distill<'a>(&self) -> XmlNode<'a> {
- childless_record("BatchProject", self.logical.fields_pretty(self.schema()))
+ childless_record("BatchProject", self.core.fields_pretty(self.schema()))
}
}
impl PlanTreeNodeUnary for BatchProject {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -86,7 +87,7 @@ impl ToDistributedBatch for BatchProject {
impl ToBatchPb for BatchProject {
fn to_batch_prost_body(&self) -> NodeBody {
let select_list = self
- .logical
+ .core
.exprs
.iter()
.map(|expr| expr.to_expr_proto())
@@ -108,8 +109,8 @@ impl ExprRewritable for BatchProject {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_project_set.rs b/src/frontend/src/optimizer/plan_node/batch_project_set.rs
index b86211aaaa211..5888df9d15889 100644
--- a/src/frontend/src/optimizer/plan_node/batch_project_set.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_project_set.rs
@@ -29,35 +29,32 @@ use crate::utils::ColIndexMappingRewriteExt;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchProjectSet {
pub base: PlanBase,
- logical: generic::ProjectSet,
+ core: generic::ProjectSet,
}
impl BatchProjectSet {
- pub fn new(logical: generic::ProjectSet) -> Self {
- let distribution = logical
+ pub fn new(core: generic::ProjectSet) -> Self {
+ let distribution = core
.i2o_col_mapping()
- .rewrite_provided_distribution(logical.input.distribution());
+ .rewrite_provided_distribution(core.input.distribution());
- let base = PlanBase::new_batch_from_logical(
- &logical,
- distribution,
- logical.get_out_column_index_order(),
- );
- BatchProjectSet { base, logical }
+ let base =
+ PlanBase::new_batch_with_core(&core, distribution, core.get_out_column_index_order());
+ BatchProjectSet { base, core }
}
}
-impl_distill_by_unit!(BatchProjectSet, logical, "BatchProjectSet");
+impl_distill_by_unit!(BatchProjectSet, core, "BatchProjectSet");
impl PlanTreeNodeUnary for BatchProjectSet {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -76,7 +73,7 @@ impl ToBatchPb for BatchProjectSet {
fn to_batch_prost_body(&self) -> NodeBody {
NodeBody::ProjectSet(ProjectSetNode {
select_list: self
- .logical
+ .core
.select_list
.iter()
.map(|select_item| select_item.to_project_set_select_item_proto())
@@ -98,8 +95,8 @@ impl ExprRewritable for BatchProjectSet {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs
index cfc557fe375c6..6834ed29353b9 100644
--- a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs
@@ -24,6 +24,8 @@ use risingwave_pb::batch_plan::row_seq_scan_node::ChunkSize;
use risingwave_pb::batch_plan::{RowSeqScanNode, SysRowSeqScanNode};
use risingwave_pb::plan_common::PbColumnDesc;
+use super::batch::BatchPlanRef;
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
use super::utils::{childless_record, Distill};
use super::{generic, ExprRewritable, PlanBase, PlanRef, ToBatchPb, ToDistributedBatch};
use crate::catalog::ColumnId;
@@ -35,25 +37,25 @@ use crate::optimizer::property::{Distribution, DistributionDisplay, Order};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchSeqScan {
pub base: PlanBase,
- logical: generic::Scan,
+ core: generic::Scan,
scan_ranges: Vec,
}
impl BatchSeqScan {
- fn new_inner(logical: generic::Scan, dist: Distribution, scan_ranges: Vec) -> Self {
+ fn new_inner(core: generic::Scan, dist: Distribution, scan_ranges: Vec) -> Self {
let order = if scan_ranges.len() > 1 {
Order::any()
} else {
- logical.get_out_column_index_order()
+ core.get_out_column_index_order()
};
- let base = PlanBase::new_batch_from_logical(&logical, dist, order);
+ let base = PlanBase::new_batch_with_core(&core, dist, order);
{
// validate scan_range
scan_ranges.iter().for_each(|scan_range| {
assert!(!scan_range.is_full_table_scan());
let scan_pk_prefix_len = scan_range.eq_conds.len();
- let order_len = logical.table_desc.order_column_indices().len();
+ let order_len = core.table_desc.order_column_indices().len();
assert!(
scan_pk_prefix_len < order_len
|| (scan_pk_prefix_len == order_len && is_full_range(&scan_range.range)),
@@ -64,23 +66,23 @@ impl BatchSeqScan {
Self {
base,
- logical,
+ core,
scan_ranges,
}
}
- pub fn new(logical: generic::Scan, scan_ranges: Vec) -> Self {
+ pub fn new(core: generic::Scan, scan_ranges: Vec) -> Self {
// Use `Single` by default, will be updated later with `clone_with_dist`.
- Self::new_inner(logical, Distribution::Single, scan_ranges)
+ Self::new_inner(core, Distribution::Single, scan_ranges)
}
fn clone_with_dist(&self) -> Self {
Self::new_inner(
- self.logical.clone(),
- if self.logical.is_sys_table {
+ self.core.clone(),
+ if self.core.is_sys_table {
Distribution::Single
} else {
- match self.logical.distribution_key() {
+ match self.core.distribution_key() {
None => Distribution::SomeShard,
Some(distribution_key) => {
if distribution_key.is_empty() {
@@ -97,7 +99,7 @@ impl BatchSeqScan {
// inserted.
Distribution::UpstreamHashShard(
distribution_key,
- self.logical.table_desc.table_id,
+ self.core.table_desc.table_id,
)
}
}
@@ -109,8 +111,8 @@ impl BatchSeqScan {
/// Get a reference to the batch seq scan's logical.
#[must_use]
- pub fn logical(&self) -> &generic::Scan {
- &self.logical
+ pub fn core(&self) -> &generic::Scan {
+ &self.core
}
pub fn scan_ranges(&self) -> &[ScanRange] {
@@ -119,8 +121,8 @@ impl BatchSeqScan {
fn scan_ranges_as_strs(&self, verbose: bool) -> Vec {
let order_names = match verbose {
- true => self.logical.order_names_with_table_prefix(),
- false => self.logical.order_names(),
+ true => self.core.order_names_with_table_prefix(),
+ false => self.core.order_names(),
};
let mut range_strs = vec![];
@@ -180,10 +182,10 @@ fn range_to_string(name: &str, range: &(Bound, Bound)) -
impl Distill for BatchSeqScan {
fn distill<'a>(&self) -> XmlNode<'a> {
- let verbose = self.base.ctx.is_explain_verbose();
+ let verbose = self.base.ctx().is_explain_verbose();
let mut vec = Vec::with_capacity(4);
- vec.push(("table", Pretty::from(self.logical.table_name.clone())));
- vec.push(("columns", self.logical.columns_pretty(verbose)));
+ vec.push(("table", Pretty::from(self.core.table_name.clone())));
+ vec.push(("columns", self.core.columns_pretty(verbose)));
if !self.scan_ranges.is_empty() {
let range_strs = self.scan_ranges_as_strs(verbose);
@@ -196,7 +198,7 @@ impl Distill for BatchSeqScan {
if verbose {
let dist = Pretty::display(&DistributionDisplay {
distribution: self.distribution(),
- input_schema: &self.base.schema,
+ input_schema: self.base.schema(),
});
vec.push(("distribution", dist));
}
@@ -214,22 +216,22 @@ impl ToDistributedBatch for BatchSeqScan {
impl ToBatchPb for BatchSeqScan {
fn to_batch_prost_body(&self) -> NodeBody {
let column_descs = self
- .logical
+ .core
.column_descs()
.iter()
.map(PbColumnDesc::from)
.collect();
- if self.logical.is_sys_table {
+ if self.core.is_sys_table {
NodeBody::SysRowSeqScan(SysRowSeqScanNode {
- table_id: self.logical.table_desc.table_id.table_id,
+ table_id: self.core.table_desc.table_id.table_id,
column_descs,
})
} else {
NodeBody::RowSeqScan(RowSeqScanNode {
- table_desc: Some(self.logical.table_desc.to_protobuf()),
+ table_desc: Some(self.core.table_desc.to_protobuf()),
column_ids: self
- .logical
+ .core
.output_column_ids()
.iter()
.map(ColumnId::get_id)
@@ -239,7 +241,7 @@ impl ToBatchPb for BatchSeqScan {
vnode_bitmap: None,
ordered: !self.order().is_any(),
chunk_size: self
- .logical
+ .core
.chunk_size
.map(|chunk_size| ChunkSize { chunk_size }),
})
@@ -249,18 +251,18 @@ impl ToBatchPb for BatchSeqScan {
impl ToLocalBatch for BatchSeqScan {
fn to_local(&self) -> Result {
- let dist = if self.logical.is_sys_table {
+ let dist = if self.core.is_sys_table {
Distribution::Single
- } else if let Some(distribution_key) = self.logical.distribution_key()
+ } else if let Some(distribution_key) = self.core.distribution_key()
&& !distribution_key.is_empty()
{
- Distribution::UpstreamHashShard(distribution_key, self.logical.table_desc.table_id)
+ Distribution::UpstreamHashShard(distribution_key, self.core.table_desc.table_id)
} else {
// NOTE(kwannoel): This is a hack to force an exchange to always be inserted before
// scan.
Distribution::SomeShard
};
- Ok(Self::new_inner(self.logical.clone(), dist, self.scan_ranges.clone()).into())
+ Ok(Self::new_inner(self.core.clone(), dist, self.scan_ranges.clone()).into())
}
}
@@ -270,8 +272,8 @@ impl ExprRewritable for BatchSeqScan {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical, self.scan_ranges.clone()).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core, self.scan_ranges.clone()).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs b/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs
index b414779385200..bae8d70c2eedf 100644
--- a/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs
@@ -16,7 +16,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::SortAggNode;
-use super::generic::{self, PlanAggCall};
+use super::generic::{self, GenericPlanRef, PlanAggCall};
use super::utils::impl_distill_by_unit;
use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
use crate::expr::ExprRewriter;
@@ -26,44 +26,47 @@ use crate::optimizer::property::{Distribution, Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchSimpleAgg {
pub base: PlanBase,
- logical: generic::Agg,
+ core: generic::Agg,
}
impl BatchSimpleAgg {
- pub fn new(logical: generic::Agg) -> Self {
- let input_dist = logical.input.distribution().clone();
- let base = PlanBase::new_batch_from_logical(&logical, input_dist, Order::any());
- BatchSimpleAgg { base, logical }
+ pub fn new(core: generic::Agg) -> Self {
+ let input_dist = core.input.distribution().clone();
+ let base = PlanBase::new_batch_with_core(&core, input_dist, Order::any());
+ BatchSimpleAgg { base, core }
}
pub fn agg_calls(&self) -> &[PlanAggCall] {
- &self.logical.agg_calls
+ &self.core.agg_calls
}
fn two_phase_agg_enabled(&self) -> bool {
- let session_ctx = self.base.ctx.session_ctx();
- session_ctx.config().get_enable_two_phase_agg()
+ self.base
+ .ctx()
+ .session_ctx()
+ .config()
+ .get_enable_two_phase_agg()
}
pub(crate) fn can_two_phase_agg(&self) -> bool {
- self.logical.can_two_phase_agg() && self.two_phase_agg_enabled()
+ self.core.can_two_phase_agg() && self.two_phase_agg_enabled()
}
}
impl PlanTreeNodeUnary for BatchSimpleAgg {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
Self::new(generic::Agg {
input,
- ..self.logical.clone()
+ ..self.core.clone()
})
}
}
impl_plan_tree_node_for_unary! { BatchSimpleAgg }
-impl_distill_by_unit!(BatchSimpleAgg, logical, "BatchSimpleAgg");
+impl_distill_by_unit!(BatchSimpleAgg, core, "BatchSimpleAgg");
impl ToDistributedBatch for BatchSimpleAgg {
fn to_distributed(&self) -> Result {
@@ -83,7 +86,7 @@ impl ToDistributedBatch for BatchSimpleAgg {
// insert total agg
let total_agg_types = self
- .logical
+ .core
.agg_calls
.iter()
.enumerate()
@@ -92,7 +95,7 @@ impl ToDistributedBatch for BatchSimpleAgg {
})
.collect();
let total_agg_logical =
- generic::Agg::new(total_agg_types, self.logical.group_key.clone(), exchange);
+ generic::Agg::new(total_agg_types, self.core.group_key.clone(), exchange);
Ok(BatchSimpleAgg::new(total_agg_logical).into())
} else {
let new_input = self
@@ -134,8 +137,8 @@ impl ExprRewritable for BatchSimpleAgg {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut logical = self.logical.clone();
- logical.rewrite_exprs(r);
- Self::new(logical).into()
+ let mut core = self.core.clone();
+ core.rewrite_exprs(r);
+ Self::new(core).into()
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_sort.rs b/src/frontend/src/optimizer/plan_node/batch_sort.rs
index 8576a18c19333..e7bff6d51d85b 100644
--- a/src/frontend/src/optimizer/plan_node/batch_sort.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_sort.rs
@@ -17,6 +17,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::SortNode;
+use super::batch::BatchPlanRef;
use super::utils::{childless_record, Distill};
use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
use crate::optimizer::plan_node::ToLocalBatch;
@@ -56,7 +57,7 @@ impl PlanTreeNodeUnary for BatchSort {
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- Self::new(input, self.base.order.clone())
+ Self::new(input, self.base.order().clone())
}
}
impl_plan_tree_node_for_unary! {BatchSort}
@@ -70,7 +71,7 @@ impl ToDistributedBatch for BatchSort {
impl ToBatchPb for BatchSort {
fn to_batch_prost_body(&self) -> NodeBody {
- let column_orders = self.base.order.to_protobuf();
+ let column_orders = self.base.order().to_protobuf();
NodeBody::Sort(SortNode { column_orders })
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs b/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs
index 241f1195352e3..2252d4c0c0ee0 100644
--- a/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs
@@ -28,18 +28,18 @@ use crate::utils::{ColIndexMappingRewriteExt, IndexSet};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchSortAgg {
pub base: PlanBase,
- logical: generic::Agg,
+ core: generic::Agg,
input_order: Order,
}
impl BatchSortAgg {
- pub fn new(logical: generic::Agg) -> Self {
- assert!(!logical.group_key.is_empty());
- assert!(logical.input_provides_order_on_group_keys());
+ pub fn new(core: generic::Agg) -> Self {
+ assert!(!core.group_key.is_empty());
+ assert!(core.input_provides_order_on_group_keys());
- let input = logical.input.clone();
+ let input = core.input.clone();
let input_dist = input.distribution();
- let dist = logical
+ let dist = core
.i2o_col_mapping()
.rewrite_provided_distribution(input_dist);
let input_order = Order {
@@ -47,46 +47,44 @@ impl BatchSortAgg {
.order()
.column_orders
.iter()
- .filter(|o| logical.group_key.indices().any(|g_k| g_k == o.column_index))
+ .filter(|o| core.group_key.indices().any(|g_k| g_k == o.column_index))
.cloned()
.collect(),
};
- let order = logical
- .i2o_col_mapping()
- .rewrite_provided_order(&input_order);
+ let order = core.i2o_col_mapping().rewrite_provided_order(&input_order);
- let base = PlanBase::new_batch_from_logical(&logical, dist, order);
+ let base = PlanBase::new_batch_with_core(&core, dist, order);
BatchSortAgg {
base,
- logical,
+ core,
input_order,
}
}
pub fn agg_calls(&self) -> &[PlanAggCall] {
- &self.logical.agg_calls
+ &self.core.agg_calls
}
pub fn group_key(&self) -> &IndexSet {
- &self.logical.group_key
+ &self.core.group_key
}
}
impl PlanTreeNodeUnary for BatchSortAgg {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
impl_plan_tree_node_for_unary! { BatchSortAgg }
-impl_distill_by_unit!(BatchSortAgg, logical, "BatchSortAgg");
+impl_distill_by_unit!(BatchSortAgg, core, "BatchSortAgg");
impl ToDistributedBatch for BatchSortAgg {
fn to_distributed(&self) -> Result {
@@ -136,7 +134,7 @@ impl ExprRewritable for BatchSortAgg {
}
fn rewrite_exprs(&self, r: &mut dyn ExprRewriter) -> PlanRef {
- let mut new_logical = self.logical.clone();
+ let mut new_logical = self.core.clone();
new_logical.rewrite_exprs(r);
Self::new(new_logical).into()
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_source.rs b/src/frontend/src/optimizer/plan_node/batch_source.rs
index 3adfbf670343a..9e2cd6006db0b 100644
--- a/src/frontend/src/optimizer/plan_node/batch_source.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_source.rs
@@ -19,6 +19,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::SourceNode;
+use super::generic::GenericPlanRef;
use super::utils::{childless_record, column_names_pretty, Distill};
use super::{
generic, ExprRewritable, PlanBase, PlanRef, ToBatchPb, ToDistributedBatch, ToLocalBatch,
@@ -30,19 +31,19 @@ use crate::optimizer::property::{Distribution, Order};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchSource {
pub base: PlanBase,
- logical: generic::Source,
+ core: generic::Source,
}
impl BatchSource {
- pub fn new(logical: generic::Source) -> Self {
- let base = PlanBase::new_batch_from_logical(
- &logical,
+ pub fn new(core: generic::Source) -> Self {
+ let base = PlanBase::new_batch_with_core(
+ &core,
// Use `Single` by default, will be updated later with `clone_with_dist`.
Distribution::Single,
Order::any(),
);
- Self { base, logical }
+ Self { base, core }
}
pub fn column_names(&self) -> Vec<&str> {
@@ -50,19 +51,20 @@ impl BatchSource {
}
pub fn source_catalog(&self) -> Option> {
- self.logical.catalog.clone()
+ self.core.catalog.clone()
}
pub fn kafka_timestamp_range_value(&self) -> (Option, Option) {
- self.logical.kafka_timestamp_range_value()
+ self.core.kafka_timestamp_range_value()
}
pub fn clone_with_dist(&self) -> Self {
- let mut base = self.base.clone();
- base.dist = Distribution::SomeShard;
+ let base = self
+ .base
+ .clone_with_new_distribution(Distribution::SomeShard);
Self {
base,
- logical: self.logical.clone(),
+ core: self.core.clone(),
}
}
}
@@ -100,7 +102,7 @@ impl ToBatchPb for BatchSource {
source_id: source_catalog.id,
info: Some(source_catalog.info.clone()),
columns: self
- .logical
+ .core
.column_catalog
.iter()
.map(|c| c.to_protobuf())
diff --git a/src/frontend/src/optimizer/plan_node/batch_table_function.rs b/src/frontend/src/optimizer/plan_node/batch_table_function.rs
index 91aa1af0abbe7..0b9887cd4aaba 100644
--- a/src/frontend/src/optimizer/plan_node/batch_table_function.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_table_function.rs
@@ -17,6 +17,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::TableFunctionNode;
+use super::generic::GenericPlanRef;
use super::utils::{childless_record, Distill};
use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchPb, ToDistributedBatch};
use crate::expr::ExprRewriter;
@@ -39,7 +40,7 @@ impl BatchTableFunction {
}
pub fn with_dist(logical: LogicalTableFunction, dist: Distribution) -> Self {
- let ctx = logical.base.ctx.clone();
+ let ctx = logical.base.ctx().clone();
let base = PlanBase::new_batch(ctx, logical.schema().clone(), dist, Order::any());
BatchTableFunction { base, logical }
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_topn.rs b/src/frontend/src/optimizer/plan_node/batch_topn.rs
index b8b5ba710e468..b2eda24046d28 100644
--- a/src/frontend/src/optimizer/plan_node/batch_topn.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_topn.rs
@@ -29,38 +29,34 @@ use crate::optimizer::property::{Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchTopN {
pub base: PlanBase,
- logical: generic::TopN,
+ core: generic::TopN,
}
impl BatchTopN {
- pub fn new(logical: generic::TopN) -> Self {
- assert!(logical.group_key.is_empty());
- let base = PlanBase::new_batch_from_logical(
- &logical,
- logical.input.distribution().clone(),
+ pub fn new(core: generic::TopN) -> Self {
+ assert!(core.group_key.is_empty());
+ let base = PlanBase::new_batch_with_core(
+ &core,
+ core.input.distribution().clone(),
// BatchTopN outputs data in the order of specified order
- logical.order.clone(),
+ core.order.clone(),
);
- BatchTopN { base, logical }
+ BatchTopN { base, core }
}
fn two_phase_topn(&self, input: PlanRef) -> Result {
let new_limit = TopNLimit::new(
- self.logical.limit_attr.limit() + self.logical.offset,
- self.logical.limit_attr.with_ties(),
+ self.core.limit_attr.limit() + self.core.offset,
+ self.core.limit_attr.with_ties(),
);
let new_offset = 0;
- let partial_input: PlanRef = if input.order().satisfies(&self.logical.order) {
+ let partial_input: PlanRef = if input.order().satisfies(&self.core.order) {
let logical_partial_limit = generic::Limit::new(input, new_limit.limit(), new_offset);
let batch_partial_limit = BatchLimit::new(logical_partial_limit);
batch_partial_limit.into()
} else {
- let logical_partial_topn = generic::TopN::without_group(
- input,
- new_limit,
- new_offset,
- self.logical.order.clone(),
- );
+ let logical_partial_topn =
+ generic::TopN::without_group(input, new_limit, new_offset, self.core.order.clone());
let batch_partial_topn = Self::new(logical_partial_topn);
batch_partial_topn.into()
};
@@ -78,17 +74,17 @@ impl BatchTopN {
}
}
-impl_distill_by_unit!(BatchTopN, logical, "BatchTopN");
+impl_distill_by_unit!(BatchTopN, core, "BatchTopN");
impl PlanTreeNodeUnary for BatchTopN {
fn input(&self) -> PlanRef {
- self.logical.input.clone()
+ self.core.input.clone()
}
fn clone_with_input(&self, input: PlanRef) -> Self {
- let mut logical = self.logical.clone();
- logical.input = input;
- Self::new(logical)
+ let mut core = self.core.clone();
+ core.input = input;
+ Self::new(core)
}
}
@@ -102,12 +98,12 @@ impl ToDistributedBatch for BatchTopN {
impl ToBatchPb for BatchTopN {
fn to_batch_prost_body(&self) -> NodeBody {
- let column_orders = self.logical.order.to_protobuf();
+ let column_orders = self.core.order.to_protobuf();
NodeBody::TopN(TopNNode {
- limit: self.logical.limit_attr.limit(),
- offset: self.logical.offset,
+ limit: self.core.limit_attr.limit(),
+ offset: self.core.offset,
column_orders,
- with_ties: self.logical.limit_attr.with_ties(),
+ with_ties: self.core.limit_attr.with_ties(),
})
}
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_union.rs b/src/frontend/src/optimizer/plan_node/batch_union.rs
index 1626d32db2cc8..c7c71111174c6 100644
--- a/src/frontend/src/optimizer/plan_node/batch_union.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_union.rs
@@ -25,12 +25,12 @@ use crate::optimizer::property::{Distribution, Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchUnion {
pub base: PlanBase,
- logical: generic::Union,
+ core: generic::Union,
}
impl BatchUnion {
- pub fn new(logical: generic::Union) -> Self {
- let dist = if logical
+ pub fn new(core: generic::Union) -> Self {
+ let dist = if core
.inputs
.iter()
.all(|input| *input.distribution() == Distribution::Single)
@@ -40,21 +40,21 @@ impl BatchUnion {
Distribution::SomeShard
};
- let base = PlanBase::new_batch_from_logical(&logical, dist, Order::any());
- BatchUnion { base, logical }
+ let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
+ BatchUnion { base, core }
}
}
-impl_distill_by_unit!(BatchUnion, logical, "BatchUnion");
+impl_distill_by_unit!(BatchUnion, core, "BatchUnion");
impl PlanTreeNode for BatchUnion {
fn inputs(&self) -> smallvec::SmallVec<[crate::optimizer::PlanRef; 2]> {
- smallvec::SmallVec::from_vec(self.logical.inputs.clone())
+ smallvec::SmallVec::from_vec(self.core.inputs.clone())
}
fn clone_with_inputs(&self, inputs: &[crate::optimizer::PlanRef]) -> PlanRef {
// For batch query, we don't need to clone `source_col`, so just use new.
- let mut new = self.logical.clone();
+ let mut new = self.core.clone();
new.inputs = inputs.to_vec();
Self::new(new).into()
}
diff --git a/src/frontend/src/optimizer/plan_node/batch_update.rs b/src/frontend/src/optimizer/plan_node/batch_update.rs
index 19bb60b9aa1d8..20e4b8b6b966c 100644
--- a/src/frontend/src/optimizer/plan_node/batch_update.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_update.rs
@@ -18,6 +18,7 @@ use risingwave_common::error::Result;
use risingwave_pb::batch_plan::plan_node::NodeBody;
use risingwave_pb::batch_plan::UpdateNode;
+use super::generic::GenericPlanRef;
use super::utils::impl_distill_by_unit;
use super::{
generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch,
@@ -30,32 +31,32 @@ use crate::optimizer::property::{Distribution, Order, RequiredDist};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct BatchUpdate {
pub base: PlanBase,
- pub logical: generic::Update,
+ pub core: generic::Update