diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 235fb3bbacfbb..acd75f253b699 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -3,5 +3,5 @@ contact_links:
     url: https://github.com/risingwavelabs/risingwave/discussions
     about: Have questions? Welcome to open a discussion.
   - name: Community Chat
-    url: https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw
+    url: https://risingwave.com/slack
     about: Join the RisingWave Slack community and chat with us.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index b2d58279b5290..51242d0425e28 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -16,6 +16,7 @@ updates:
     arrow:
       patterns:
         - "arrow*"
+        - "parquet"
     aws:
       patterns:
         - "aws*"
diff --git a/.github/workflows/auto-create-doc-issue-by-issue.yml b/.github/workflows/auto-create-doc-issue-by-issue.yml
new file mode 100644
index 0000000000000..0c8d78062977a
--- /dev/null
+++ b/.github/workflows/auto-create-doc-issue-by-issue.yml
@@ -0,0 +1,31 @@
+name: Issue Documentation Checker
+
+on:
+  issues:
+    types:
+      - closed
+      - labeled
+
+jobs:
+  create-issue:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Log the event payload
+        run: echo "${{ toJSON(github.event) }}"
+      - name: Check if issue is done and labeled 'user-facing-changes'
+        uses: dacbd/create-issue-action@main
+        if: ${{ github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user-facing-changes') }}
+        with:
+          token: ${{ secrets.ACCESS_TOKEN }}
+          owner: risingwavelabs
+          repo: risingwave-docs
+          title: |
+            Document: ${{ github.event.issue.title }}
+          body: |
+            ## Context
+            Source Issue URL: ${{ github.event.issue.html_url }}
+            Created At: ${{ github.event.issue.created_at }}
+            Created By: ${{ github.event.issue.user.login }}
+            Closed At: ${{ github.event.issue.closed_at }}
diff --git a/.github/workflows/auto-create-docs-pr.yml b/.github/workflows/auto-create-doc-issue-by-pr.yml
similarity index 100%
rename from .github/workflows/auto-create-docs-pr.yml
rename to .github/workflows/auto-create-doc-issue-by-pr.yml
diff --git a/.gitignore b/.gitignore
index 19fb6643dd8a6..375738f67093e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -74,4 +74,7 @@ simulation-it-test.tar.zst
 # hummock-trace
 .trace
 
+# spark binary
+e2e_test/iceberg/spark-*-bin*
+
 **/poetry.lock
\ No newline at end of file
diff --git a/.licenserc.yaml b/.licenserc.yaml
index c1745a4d1ad74..7b49108b6b2f3 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -17,6 +17,6 @@ header:
     - "**/*.d.ts"
     - "src/sqlparser/**/*.rs"
     - "java/connector-node/risingwave-source-cdc/src/main/java/com/risingwave/connector/cdc/debezium/internal/*.java"
-    - "src/meta/src/model_v2/migration/**/*.rs"
+    - "src/meta/model_v2/migration/**/*.rs"
 
   comment: on-failure
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9b519c16010ba..c0b3991fc1f61 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,7 +4,7 @@ Thanks for your interest in contributing to RisingWave! We welcome and appreciat
 
 This document describes how to submit your code changes. To learn about the development process, see the [developer guide](docs/developer-guide.md). To understand the design and implementation of RisingWave, refer to the design docs listed in [docs/README.md](docs/README.md).
 
-If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw) to join the channel.
+If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://risingwave.com/slack) to join the channel.
 
 To report bugs, create a [GitHub issue](https://github.com/risingwavelabs/risingwave/issues/new/choose).
 
diff --git a/Cargo.lock b/Cargo.lock
index 4550cc2d7faeb..50519aeccab88 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -243,9 +243,9 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
 
 [[package]]
 name = "arrow-arith"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc1d4e368e87ad9ee64f28b9577a3834ce10fe2703a26b28417d485bbbdff956"
+checksum = "c5c3d17fc5b006e7beeaebfb1d2edfc92398b981f82d9744130437909b72a468"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -258,9 +258,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d02efa7253ede102d45a4e802a129e83bcc3f49884cab795b1ac223918e4318d"
+checksum = "55705ada5cdde4cb0f202ffa6aa756637e33fea30e13d8d0d0fd6a24ffcee1e3"
 dependencies = [
  "ahash 0.8.3",
  "arrow-buffer",
@@ -274,9 +274,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fda119225204141138cb0541c692fbfef0e875ba01bfdeaed09e9d354f9d6195"
+checksum = "a722f90a09b94f295ab7102542e97199d3500128843446ef63e410ad546c5333"
 dependencies = [
  "bytes",
  "half 2.3.1",
@@ -285,9 +285,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d825d51b9968868d50bc5af92388754056796dbc62a4e25307d588a1fc84dee"
+checksum = "af01fc1a06f6f2baf31a04776156d47f9f31ca5939fe6d00cd7a059f95a46ff1"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -302,9 +302,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "475a4c3699c8b4095ca61cecf15da6f67841847a5f5aac983ccb9a377d02f73a"
+checksum = "d0a547195e607e625e7fafa1a7269b8df1a4a612c919efd9b26bd86e74538f3a"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -314,9 +314,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-flight"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd938ea4a0e8d0db2b9f47ebba792f73f6188f4289707caeaf93a3be705e5ed5"
+checksum = "c58645809ced5acd6243e89a63ae8535a2ab50d780affcd7efe8c7473a0da661"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -334,9 +334,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1248005c8ac549f869b7a840859d942bf62471479c1a2d82659d453eebcd166a"
+checksum = "e36bf091502ab7e37775ff448413ef1ffff28ff93789acb669fffdd51b394d51"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -348,9 +348,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03b87aa408ea6a6300e49eb2eba0c032c88ed9dc19e0a9948489c55efdca71f4"
+checksum = "4502123d2397319f3a13688432bc678c61cb1582f2daa01253186da650bf5841"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -363,9 +363,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "114a348ab581e7c9b6908fcab23cb39ff9f060eb19e72b13f8fb8eaa37f65d22"
+checksum = "249fc5a07906ab3f3536a6e9f118ec2883fbcde398a97a5ba70053f0276abda4"
 dependencies = [
  "ahash 0.8.3",
  "arrow-array",
@@ -378,15 +378,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d1d179c117b158853e0101bfbed5615e86fe97ee356b4af901f1c5001e1ce4b"
+checksum = "9d7a8c3f97f5ef6abd862155a6f39aaba36b029322462d72bbcfa69782a50614"
 
 [[package]]
 name = "arrow-select"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5c71e003202e67e9db139e5278c79f5520bb79922261dfe140e4637ee8b6108"
+checksum = "f868f4a5001429e20f7c1994b5cd1aa68b82e3db8cf96c559cdb56dc8be21410"
 dependencies = [
  "ahash 0.8.3",
  "arrow-array",
@@ -2448,10 +2448,11 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
+checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3"
 dependencies = [
+ "powerfmt",
  "serde",
 ]
 
@@ -2554,8 +2555,7 @@ checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632"
 [[package]]
 name = "dlv-list"
 version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aead04dc46b5f263c25721cf25c9e595951d15055f8063f92392fa0d7f64cf4"
+source = "git+https://github.com/sgodwincs/dlv-list-rs.git?rev=5bbc5d0#5bbc5d0cc84f257e173d851f8dc1674fb6e46f95"
 dependencies = [
  "const-random",
 ]
@@ -3018,7 +3018,7 @@ dependencies = [
 [[package]]
 name = "foyer"
 version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
 dependencies = [
  "foyer-common",
  "foyer-intrusive",
@@ -3029,10 +3029,11 @@ dependencies = [
 [[package]]
 name = "foyer-common"
 version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
 dependencies = [
  "bytes",
  "foyer-workspace-hack",
+ "itertools 0.11.0",
  "madsim-tokio",
  "parking_lot 0.12.1",
  "paste",
@@ -3043,13 +3044,13 @@ dependencies = [
 [[package]]
 name = "foyer-intrusive"
 version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
 dependencies = [
  "bytes",
  "cmsketch",
  "foyer-common",
  "foyer-workspace-hack",
- "itertools 0.10.5",
+ "itertools 0.11.0",
  "memoffset",
  "parking_lot 0.12.1",
  "paste",
@@ -3060,7 +3061,7 @@ dependencies = [
 [[package]]
 name = "foyer-storage"
 version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
 dependencies = [
  "anyhow",
  "async-channel",
@@ -3089,7 +3090,7 @@ dependencies = [
 [[package]]
 name = "foyer-workspace-hack"
 version = "0.1.0"
-source = "git+https://github.com/mrcroxx/foyer?rev=438eec8#438eec87e90c7a80cb53a06b711c6ea1ad7a0f41"
+source = "git+https://github.com/MrCroxx/foyer?rev=2261151#2261151107ad362851f5fff9ce4fa56e61911b10"
 dependencies = [
  "crossbeam-utils",
  "either",
@@ -3098,7 +3099,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "hyper",
- "itertools 0.10.5",
+ "itertools 0.11.0",
  "libc",
  "memchr",
  "parking_lot 0.12.1",
@@ -3227,9 +3228,9 @@ dependencies = [
 
 [[package]]
 name = "futures-async-stream"
-version = "0.2.7"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f529ccdeacfa2446a9577041686cf1abb839b1b3e15fee4c1b1232ab3b7d799f"
+checksum = "379790776b0d953337df4ab7ecc51936c66ea112484cad7912907b1d34253ebf"
 dependencies = [
  "futures-async-stream-macro",
  "futures-core",
@@ -3238,13 +3239,13 @@ dependencies = [
 
 [[package]]
 name = "futures-async-stream-macro"
-version = "0.2.7"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2b48ee06dc8d2808ba5ebad075d06c3406085bb19deaac33be64c39113bf80"
+checksum = "5df2c13d48c8cb8a3ec093ede6f0f4482f327d7bb781120c5fb483ef0f17e758"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -4141,6 +4142,18 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "jsonbb"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44376417b2ff0cd879b5c84976fa9e0855c316321b4e0502e33e52963bf84f74"
+dependencies = [
+ "bytes",
+ "serde",
+ "serde_json",
+ "smallvec",
+]
+
 [[package]]
 name = "jsonschema-transpiler"
 version = "1.10.0"
@@ -4464,6 +4477,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "lz4_flex"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8"
+dependencies = [
+ "twox-hash",
+]
+
 [[package]]
 name = "lzma-sys"
 version = "0.1.20"
@@ -4782,15 +4804,6 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "model_migration"
-version = "0.1.0"
-dependencies = [
- "async-std",
- "sea-orm-migration",
- "uuid",
-]
-
 [[package]]
 name = "moka"
 version = "0.12.0"
@@ -5519,8 +5532,7 @@ dependencies = [
 [[package]]
 name = "ordered-multimap"
 version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ed8acf08e98e744e5384c8bc63ceb0364e68a6854187221c18df61c4797690e"
+source = "git+https://github.com/risingwavelabs/ordered-multimap-rs.git?rev=19c743f#19c743f3e3d106c99ba37628f06a2ca6faa2284f"
 dependencies = [
  "dlv-list",
  "hashbrown 0.13.2",
@@ -5643,9 +5655,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "47.0.0"
+version = "48.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0463cc3b256d5f50408c49a4be3a16674f4c8ceef60941709620a062b1f6bf4d"
+checksum = "239229e6a668ab50c61de3dce61cf0fa1069345f7aa0f4c934491f92205a4945"
 dependencies = [
  "ahash 0.8.3",
  "arrow-array",
@@ -5662,7 +5674,7 @@ dependencies = [
  "flate2",
  "futures",
  "hashbrown 0.14.0",
- "lz4",
+ "lz4_flex",
  "num",
  "num-bigint",
  "paste",
@@ -5671,7 +5683,7 @@ dependencies = [
  "thrift",
  "tokio",
  "twox-hash",
- "zstd 0.12.4",
+ "zstd 0.13.0",
 ]
 
 [[package]]
@@ -6050,6 +6062,12 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
 [[package]]
 name = "pprof"
 version = "0.13.0"
@@ -7083,6 +7101,7 @@ dependencies = [
  "hytra",
  "itertools 0.11.0",
  "itoa",
+ "jsonbb",
  "libc",
  "lru 0.7.6",
  "mach2",
@@ -7461,6 +7480,7 @@ dependencies = [
  "futures-util",
  "hex",
  "itertools 0.11.0",
+ "jsonbb",
  "madsim-tokio",
  "md5",
  "num-traits",
@@ -7641,6 +7661,7 @@ dependencies = [
 name = "risingwave_jni_core"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
  "bytes",
  "cfg-or-panic",
  "futures",
@@ -7691,7 +7712,6 @@ dependencies = [
  "maplit",
  "memcomparable",
  "mime_guess",
- "model_migration",
  "num-integer",
  "num-traits",
  "parking_lot 0.12.1",
@@ -7705,6 +7725,8 @@ dependencies = [
  "risingwave_common_heap_profiling",
  "risingwave_connector",
  "risingwave_hummock_sdk",
+ "risingwave_meta_model_migration",
+ "risingwave_meta_model_v2",
  "risingwave_object_store",
  "risingwave_pb",
  "risingwave_rpc_client",
@@ -7714,7 +7736,6 @@ dependencies = [
  "sea-orm",
  "serde",
  "serde_json",
- "sqlx",
  "sync-point",
  "thiserror",
  "tokio-retry",
@@ -7727,6 +7748,25 @@ dependencies = [
  "workspace-hack",
 ]
 
+[[package]]
+name = "risingwave_meta_model_migration"
+version = "1.3.0-alpha"
+dependencies = [
+ "async-std",
+ "sea-orm-migration",
+ "uuid",
+]
+
+[[package]]
+name = "risingwave_meta_model_v2"
+version = "1.3.0-alpha"
+dependencies = [
+ "risingwave_pb",
+ "sea-orm",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "risingwave_meta_node"
 version = "1.3.0-alpha"
@@ -7739,13 +7779,13 @@ dependencies = [
  "madsim-etcd-client",
  "madsim-tokio",
  "madsim-tonic",
- "model_migration",
  "prometheus-http-query",
  "regex",
  "risingwave_common",
  "risingwave_common_heap_profiling",
  "risingwave_common_service",
  "risingwave_meta",
+ "risingwave_meta_model_migration",
  "risingwave_meta_service",
  "risingwave_pb",
  "risingwave_rpc_client",
@@ -7769,6 +7809,7 @@ dependencies = [
  "risingwave_common",
  "risingwave_connector",
  "risingwave_meta",
+ "risingwave_meta_model_v2",
  "risingwave_pb",
  "sea-orm",
  "sync-point",
@@ -9832,14 +9873,15 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17f6bb557fd245c28e6411aa56b6403c689ad95061f50e4be16c274e70a17e48"
+checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
 dependencies = [
  "deranged",
  "itoa",
  "libc",
  "num_threads",
+ "powerfmt",
  "serde",
  "time-core",
  "time-macros",
@@ -9847,15 +9889,15 @@ dependencies = [
 
 [[package]]
 name = "time-core"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
 
 [[package]]
 name = "time-macros"
-version = "0.2.14"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a942f44339478ef67935ab2bbaec2fb0322496cf3cbe84b261e06ac3814c572"
+checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20"
 dependencies = [
  "time-core",
 ]
@@ -11004,7 +11046,6 @@ dependencies = [
  "futures-util",
  "hashbrown 0.12.3",
  "hashbrown 0.14.0",
- "heck 0.4.1",
  "hyper",
  "indexmap 1.9.3",
  "itertools 0.10.5",
diff --git a/Cargo.toml b/Cargo.toml
index ef09221b818a2..ac533e733f7a8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,9 +19,10 @@ members = [
   "src/java_binding",
   "src/jni_core",
   "src/meta",
+  "src/meta/model_v2",
+  "src/meta/model_v2/migration",
   "src/meta/node",
   "src/meta/service",
-  "src/meta/src/model_v2/migration",
   "src/object_store",
   "src/prost",
   "src/prost/helpers",
@@ -97,7 +98,7 @@ aws-smithy-types = "0.55"
 aws-endpoint = "0.55"
 aws-types = "0.55"
 etcd-client = { package = "madsim-etcd-client", version = "0.4" }
-futures-async-stream = "0.2"
+futures-async-stream = "0.2.9"
 hytra = "0.1"
 rdkafka = { package = "madsim-rdkafka", version = "0.3.0", features = [
   "cmake-build",
@@ -112,13 +113,13 @@ tonic = { package = "madsim-tonic", version = "0.4.0" }
 tonic-build = { package = "madsim-tonic-build", version = "0.4.2" }
 prost = { version = "0.12" }
 icelake = { git = "https://github.com/icelake-io/icelake", rev = "16dab0e36ab337e58ee8002d828def2d212fa116" }
-arrow-array = "47"
-arrow-cast = "47"
-arrow-schema = "47"
-arrow-buffer = "47"
-arrow-flight = "47"
-arrow-select = "47"
-arrow-ord = "47"
+arrow-array = "48"
+arrow-cast = "48"
+arrow-schema = "48"
+arrow-buffer = "48"
+arrow-flight = "48"
+arrow-select = "48"
+arrow-ord = "48"
 tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "64a2d9" }
 tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
   "profiling",
@@ -143,6 +144,8 @@ risingwave_hummock_test = { path = "./src/storage/hummock_test" }
 risingwave_hummock_trace = { path = "./src/storage/hummock_trace" }
 risingwave_meta = { path = "./src/meta" }
 risingwave_meta_service = { path = "./src/meta/service" }
+risingwave_meta_model_migration = { path = "src/meta/model_v2/migration" }
+risingwave_meta_model_v2 = { path = "./src/meta/model_v2" }
 risingwave_meta_node = { path = "./src/meta/node" }
 risingwave_object_store = { path = "./src/object_store" }
 risingwave_pb = { path = "./src/prost" }
@@ -165,6 +168,8 @@ unused_must_use = "forbid"
 future_incompatible = "warn"
 nonstandard_style = "warn"
 rust_2018_idioms = "warn"
+# Backward compatibility is not important for an application.
+async_fn_in_trait = "allow"
 
 [workspace.lints.clippy]
 uninlined_format_args = "allow"
@@ -229,8 +234,8 @@ opt-level = 2
 incremental = false
 debug = 1
 
-# Patch third-party crates for deterministic simulation.
 [patch.crates-io]
+# Patch third-party crates for deterministic simulation.
 quanta = { git = "https://github.com/madsim-rs/quanta.git", rev = "948bdc3" }
 getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "8daf97e" }
 tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "fe39bb8e" }
@@ -238,3 +243,8 @@ tokio-retry = { git = "https://github.com/madsim-rs/rust-tokio-retry.git", rev =
 tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88" }
 # patch: unlimit 4MB message size for grpc client
 etcd-client = { git = "https://github.com/risingwavelabs/etcd-client.git", rev = "4e84d40" }
+
+# Patch for coverage_attribute.
+# https://github.com/sgodwincs/dlv-list-rs/pull/19#issuecomment-1774786289
+dlv-list = { git = "https://github.com/sgodwincs/dlv-list-rs.git", rev = "5bbc5d0" }
+ordered-multimap = { git = "https://github.com/risingwavelabs/ordered-multimap-rs.git", rev = "19c743f" }
diff --git a/README.md b/README.md
index c1878a2717159..29a7d7e51888a 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+
 <p align="center">
   <picture>
     <source srcset=".github/RisingWave-logo-dark.svg" width="500px" media="(prefers-color-scheme: dark)">
@@ -5,23 +6,110 @@
   </picture>
 </p>
 
-[![Slack](https://badgen.net/badge/Slack/Join%20RisingWave/0abd59?icon=slack)](https://risingwave.com/slack)
-[![Build status](https://badge.buildkite.com/9394d2bca0f87e2e97aa78b25f765c92d4207c0b65e7f6648f.svg)](https://buildkite.com/risingwavelabs/main)
-[![codecov](https://codecov.io/gh/risingwavelabs/risingwave/branch/main/graph/badge.svg?token=EB44K9K38B)](https://codecov.io/gh/risingwavelabs/risingwave)
-
-RisingWave is a distributed SQL streaming database. It is designed to reduce the complexity and cost of building stream processing applications. RisingWave consumes streaming data, performs incremental computations when new data comes in, and updates results dynamically. As a database system, RisingWave maintains results inside its own storage so that users can access data efficiently.
 
-RisingWave offers wire compatibility with PostgreSQL and demonstrates exceptional performance surpassing the previous generation of stream processing systems, including Apache Flink, by several orders of magnitude.
-It particularly excels in handling complex stateful operations like multi-stream joins.
+<div align="center">
 
-RisingWave ingests data from sources like Apache Kafka, Apache Pulsar, Amazon Kinesis, Redpanda, and materialized CDC sources. Data in RisingWave can be delivered to external targets such as message brokers, data warehouses, and data lakes for storage or additional processing.
+### 🌊Stream Processing Redefined.
 
-RisingWave 1.0 is a battle-tested version that has undergone rigorous stress tests and performance evaluations. It has proven its reliability and efficiency through successful deployments in numerous production environments across dozens of companies.
+</div>
 
-Learn more at [Introduction to RisingWave](https://docs.risingwave.com/docs/current/intro/).
+<p align="center">
+  <a
+    href="https://docs.risingwave.com/"
+    target="_blank"
+  ><b>Documentation</b></a>&nbsp;&nbsp;&nbsp;📑&nbsp;&nbsp;&nbsp;
+  <a
+    href="https://tutorials.risingwave.com/"
+    target="_blank"
+  ><b>Hands-on Tutorials</b></a>&nbsp;&nbsp;&nbsp;🎯&nbsp;&nbsp;&nbsp;
+  <a
+    href="https://cloud.risingwave.com/"
+    target="_blank"
+  ><b>RisingWave Cloud</b></a>&nbsp;&nbsp;&nbsp;🚀&nbsp;&nbsp;&nbsp;
+  <a
+    href="https://risingwave.com/slack"
+    target="_blank"
+  >
+    <b>Get Instant Help</b>
+  </a>
+</p>
+<div align="center">
+  <a
+    href="https://risingwave.com/slack"
+    target="_blank"
+  >
+    <img alt="Slack" src="https://badgen.net/badge/Slack/Join%20RisingWave/0abd59?icon=slack" />
+  </a>
+  <a
+    href="https://buildkite.com/risingwavelabs/main"
+    target="_blank"
+  >
+    <img alt="Build status" src="https://badge.buildkite.com/9394d2bca0f87e2e97aa78b25f765c92d4207c0b65e7f6648f.svg" />
+  </a>
+  <a
+    href="https://codecov.io/gh/risingwavelabs/risingwave"
+    target="_blank"
+  >
+    <img alt="codecov" src="https://codecov.io/gh/risingwavelabs/risingwave/branch/main/graph/badge.svg?token=EB44K9K38B" />
+  </a>
+</div>
+
+RisingWave is a distributed SQL streaming database that enables <b>simple</b>, <b>efficient</b>, and <b>reliable</b> processing of streaming data.
 
 ![RisingWave](https://github.com/risingwavelabs/risingwave-docs/blob/0f7e1302b22493ba3c1c48e78810750ce9a5ff42/docs/images/archi_simple.png)
 
+## How to install
+**Ubuntu**
+```
+wget https://github.com/risingwavelabs/risingwave/releases/download/v1.3.0/risingwave-v1.3.0-x86_64-unknown-linux.tar.gz
+tar xvf risingwave-v1.3.0-x86_64-unknown-linux.tar.gz
+./risingwave playground
+```
+**Mac**
+```
+brew tap risingwavelabs/risingwave
+brew install risingwave
+risingwave playground
+```
+Now connect to RisingWave using `psql`:
+```
+psql -h localhost -p 4566 -d dev -U root
+```
+
+Learn more at [Quick Start](https://docs.risingwave.com/docs/current/get-started/).
+
+## Why RisingWave for stream processing?
+RisingWave adaptly tackles some of the most challenging problems in stream processing. Compared to existing stream processing systems, RisingWave shines through with the following key features:
+* **Easy to learn**
+  * RisingWave speaks PostgreSQL-style SQL, enabling users to dive into stream processing in much the same way as operating a PostgreSQL database.
+* **Highly efficient in multi-stream joins**
+  * RisingWave has made significant optimizations for multiple stream join scenarios. Users can easily join 10-20 streams (or more) efficiently in a production environment.
+* **High resource utilization**
+  * Queries in RisingWave leverage shared computational resources, eliminating the need for users to manually allocate resources for each query.
+* **No compromise on large state management**
+  * The decoupled compute-storage architecture of RisingWave ensures remote persistence of internal states, and users never need to worry about the size of internal states when handling complex queries.
+* **Transparent dynamic scaling**
+  * RisingWave supports near-instantaneous dynamic scaling without any service interruptions.
+* **Instant failure recovery**
+  * RisingWave's state management mechanism allows it to recover from failure in seconds, not minutes or hours.
+* **Easy to verify correctness**
+  * RisingWave persists results in materialized views and allow users to break down complex stream computation programs into stacked materialized views, simplifying program development and result verification.
+* **Simplified data stack**
+  * RisingWave's ability to store data and serve queries eliminates the need for separate maintenance of stream processors and databases. Users can effortlessly connect RisingWave to their preferred BI tools or through client libraries.
+* **Simple to maintain and operate**
+  * RisingWave abstracts away unnecessary low-level details, allowing users to concentrate solely on SQL code-level issues.
+* **Rich ecosystem**
+  * With integrations to a diverse range of cloud systems and the PostgreSQL ecosystem, RisingWave boasts a rich and expansive ecosystem.
+
+## RisingWave's limitations
+RisingWave isn’t a panacea for all data engineering hurdles. It has its own set of limitations:
+* **No programmable interfaces**
+  * RisingWave does not provide low-level APIs in languages like Java and Scala, and does not allow users to manage internal states manually (unless you want to hack!). For coding in Java, Scala, and other languages, please consider using RisingWave's User-Defined Functions (UDF).
+* **No support for transaction processing**
+  * RisingWave isn’t cut out for transactional workloads, thus it’s not a viable substitute for operational databases dedicated to transaction processing. However, it supports read-only transactions, ensuring data freshness and consistency. It also comprehends the transactional semantics of upstream database Change Data Capture (CDC).
+* **Not tailored for ad-hoc analytical queries**
+  * RisingWave's row store design is tailored for optimal stream processing performance rather than interactive analytical workloads. Hence, it's not a suitable replacement for OLAP databases. Yet, a reliable integration with many OLAP databases exists, and a collaborative use of RisingWave and OLAP databases is a common practice among many users.
+
 
 ## RisingWave Cloud
 
@@ -29,19 +117,10 @@ RisingWave Cloud is a fully-managed and scalable stream processing platform powe
 
 ## Notes on telemetry
 
-RisingWave collects anonymous usage statistics to better understand how the community is using RisingWave. The sole intention of this exercise is to help improve the product. These statistics are related to system resource usage, OS versions and system uptime. RisingWave doesn't have access to any user data or metadata running on RisingWave clusters including source and sink connection parameters, sources, sinks, materialized views, and tables. Users have the option to opt out of this collection using a system parameter. Please refer to the RisingWave user documentation for more details.
-
-## Get started
-
-- To learn about how to install and run RisingWave, see [Get started](https://docs.risingwave.com/docs/current/get-started/).
-- To learn about how to ingest data and the supported data sources, see [Sources](https://docs.risingwave.com/docs/current/data-ingestion/).
-- To learn about how to transform data using the PostgreSQL-compatible SQL of RisingWave, see [SQL reference](https://docs.risingwave.com/docs/current/sql-references/).
-- To learn about how to deliver data and the supported data sinks, see [Sinks](https://docs.risingwave.com/docs/current/data-delivery/).
-- To learn about new features and changes in the current and previous versions, see [Release notes](https://docs.risingwave.com/release-notes/).
-
-## Documentation
+RisingWave collects anonymous usage statistics to better understand how the community is using RisingWave. The sole intention of this exercise is to help improve the product. Users may opt out easily at any time. Please refer to the [user documentation](https://docs.risingwave.com/docs/current/telemetry/) for more details.
 
-To learn about how to use RisingWave, refer to [RisingWave User Documentation](https://docs.risingwave.com/). To learn about the development process, see the [developer guide](docs/developer-guide.md). To understand the design and implementation of RisingWave, refer to the design docs listed in [readme.md](docs/README.md).
+## In-production use cases
+Like other stream processing systems, the primary use cases of RisingWave include monitoring, alerting, real-time dashboard reporting, streaming ETL (Extract, Transform, Load), machine learning feature engineering, and more. It has already been adopted in fields such as financial trading, manufacturing, new media, logistics, gaming, and more. Check out [customer stories](https://www.risingwave.com/use-cases/).
 
 ## Community
 
diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh
index 43ff81ade2b85..59c88e5e9a9ae 100755
--- a/ci/build-ci-image.sh
+++ b/ci/build-ci-image.sh
@@ -13,7 +13,7 @@ cat ../rust-toolchain
 # !!! CHANGE THIS WHEN YOU WANT TO BUMP CI IMAGE !!! #
 #          AND ALSO docker-compose.yml               #
 ######################################################
-export BUILD_ENV_VERSION=v20230919
+export BUILD_ENV_VERSION=v20231022
 
 export BUILD_TAG="public.ecr.aws/x5u3w5h6/rw-build-env:${BUILD_ENV_VERSION}"
 
diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
index 6fe7cfbfdeca2..66dd2d175e675 100644
--- a/ci/docker-compose.yml
+++ b/ci/docker-compose.yml
@@ -71,7 +71,7 @@ services:
       retries: 5
 
   source-test-env:
-    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
     depends_on:
       - mysql
       - db
@@ -81,10 +81,11 @@ services:
       - ..:/risingwave
 
   sink-test-env:
-    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
     depends_on:
       - mysql
       - db
+      - message_queue
       - elasticsearch
       - clickhouse-server
       - pulsar
@@ -92,12 +93,12 @@ services:
       - ..:/risingwave
 
   rw-build-env:
-    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
     volumes:
       - ..:/risingwave
 
   ci-flamegraph-env:
-    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
     # NOTE(kwannoel): This is used in order to permit
     # syscalls for `nperf` (perf_event_open),
     # so it can do CPU profiling.
@@ -108,7 +109,7 @@ services:
       - ..:/risingwave
 
   regress-test-env:
-    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20230919
+    image: public.ecr.aws/x5u3w5h6/rw-build-env:v20231022
     depends_on:
       db:
         condition: service_healthy
diff --git a/ci/rust-toolchain b/ci/rust-toolchain
index ebc0b6c285a4e..fe2a026f6e40f 100644
--- a/ci/rust-toolchain
+++ b/ci/rust-toolchain
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "nightly-2023-09-09"
+channel = "nightly-2023-10-21"
diff --git a/ci/scripts/deterministic-recovery-test.sh b/ci/scripts/deterministic-recovery-test.sh
index 6514fe1f7c0c3..c5f89a2bbc7e0 100755
--- a/ci/scripts/deterministic-recovery-test.sh
+++ b/ci/scripts/deterministic-recovery-test.sh
@@ -11,6 +11,7 @@ chmod +x ./risingwave_simulation
 
 export RUST_LOG="info,\
 risingwave_meta::barrier::recovery=debug,\
+risingwave_meta::manager::catalog=debug,\
 risingwave_meta::rpc::ddl_controller=debug,\
 risingwave_meta::barrier::mod=debug,\
 risingwave_simulation=debug"
diff --git a/ci/scripts/e2e-iceberg-cdc.sh b/ci/scripts/e2e-iceberg-cdc.sh
new file mode 100755
index 0000000000000..081f5bbd2afcb
--- /dev/null
+++ b/ci/scripts/e2e-iceberg-cdc.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+
+# Exits as soon as any line fails.
+set -euo pipefail
+
+source ci/scripts/common.sh
+
+# prepare environment
+export CONNECTOR_RPC_ENDPOINT="localhost:50051"
+export CONNECTOR_LIBS_PATH="./connector-node/libs"
+
+while getopts 'p:' opt; do
+    case ${opt} in
+        p )
+            profile=$OPTARG
+            ;;
+        \? )
+            echo "Invalid Option: -$OPTARG" 1>&2
+            exit 1
+            ;;
+        : )
+            echo "Invalid option: $OPTARG requires an argument" 1>&2
+            ;;
+    esac
+done
+shift $((OPTIND -1))
+
+download_and_prepare_rw "$profile" source
+
+echo "--- Download connector node package"
+buildkite-agent artifact download risingwave-connector.tar.gz ./
+mkdir ./connector-node
+tar xf ./risingwave-connector.tar.gz -C ./connector-node
+
+echo "--- e2e, ci-1cn-1fe, iceberg cdc"
+
+node_port=50051
+node_timeout=10
+
+wait_for_connector_node_start() {
+  start_time=$(date +%s)
+  while :
+  do
+      if nc -z localhost $node_port; then
+          echo "Port $node_port is listened! Connector Node is up!"
+          break
+      fi
+
+      current_time=$(date +%s)
+      elapsed_time=$((current_time - start_time))
+      if [ $elapsed_time -ge $node_timeout ]; then
+          echo "Timeout waiting for port $node_port to be listened!"
+          exit 1
+      fi
+      sleep 0.1
+  done
+  sleep 2
+}
+
+echo "--- starting risingwave cluster with connector node"
+
+RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
+cargo make ci-start ci-1cn-1fe-with-recovery
+./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-node.log 2>&1 &
+echo "waiting for connector node to start"
+wait_for_connector_node_start
+
+# prepare minio iceberg sink
+echo "--- preparing iceberg"
+.risingwave/bin/mcli -C .risingwave/config/mcli mb hummock-minio/icebergdata
+
+cd e2e_test/iceberg
+bash ./start_spark_connect_server.sh
+
+# Don't remove the `--quiet` option since poetry has a bug when printing output, see
+# https://github.com/python-poetry/poetry/issues/3412
+"$HOME"/.local/bin/poetry update --quiet
+
+# 1. import data to mysql
+mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc.sql
+
+# 2. create table and sink
+"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc_init.toml
+
+# 3. insert new data to mysql
+mysql --host=mysql --port=3306 -u root -p123456 < ./test_case/cdc/mysql_cdc_insert.sql
+
+sleep 20
+
+# 4. check change
+"$HOME"/.local/bin/poetry run python main.py -t ./test_case/cdc/no_partition_cdc.toml
\ No newline at end of file
diff --git a/ci/scripts/e2e-kafka-sink-test.sh b/ci/scripts/e2e-kafka-sink-test.sh
index 06ef185f46e8b..71a91f2d8fba9 100755
--- a/ci/scripts/e2e-kafka-sink-test.sh
+++ b/ci/scripts/e2e-kafka-sink-test.sh
@@ -3,10 +3,10 @@
 # Exits as soon as any line fails.
 set -euo pipefail
 
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --create > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --create > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --create > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --create > /dev/null 2>&1
 
 sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/create_sink.slt'
 sleep 2
@@ -14,7 +14,7 @@ sleep 2
 # test append-only kafka sink
 echo "testing append-only kafka sink"
 diff ./e2e_test/sink/kafka/append_only1.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 10 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 10 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for append-only sink is not as expected."
   exit 1
@@ -23,7 +23,7 @@ fi
 # test upsert kafka sink
 echo "testing upsert kafka sink"
 diff ./e2e_test/sink/kafka/upsert1.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for upsert sink is not as expected."
   exit 1
@@ -32,7 +32,7 @@ fi
 # test upsert kafka sink with schema
 echo "testing upsert kafka sink with schema"
 diff ./e2e_test/sink/kafka/upsert_schema1.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 10 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for upsert sink with schema is not as expected."
   exit 1
@@ -40,7 +40,7 @@ fi
 
 # test debezium kafka sink
 echo "testing debezium kafka sink"
-(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 10 | sort) > ./e2e_test/sink/kafka/debezium1.tmp.result 2> /dev/null
+(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 10 | sort) > ./e2e_test/sink/kafka/debezium1.tmp.result 2> /dev/null
 python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium1.result e2e_test/sink/kafka/debezium1.tmp.result
 if [ $? -ne 0 ]; then
   echo "The output for debezium sink is not as expected."
@@ -57,7 +57,7 @@ psql -h localhost -p 4566 -d dev -U root -c "update t_kafka set v_varchar = '',
 # test append-only kafka sink after update
 echo "testing append-only kafka sink after updating data"
 diff ./e2e_test/sink/kafka/append_only2.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 11 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --from-beginning --max-messages 11 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for append-only sink after update is not as expected."
   exit 1
@@ -66,7 +66,7 @@ fi
 # test upsert kafka sink after update
 echo "testing upsert kafka sink after updating data"
 diff ./e2e_test/sink/kafka/upsert2.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for upsert sink after update is not as expected."
   exit 1
@@ -75,7 +75,7 @@ fi
 # test upsert kafka sink with schema after update
 echo "testing upsert kafka sink with schema after updating data"
 diff ./e2e_test/sink/kafka/upsert_schema2.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 11 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for upsert sink with schema is not as expected."
   exit 1
@@ -83,7 +83,7 @@ fi
 
 # test debezium kafka sink after update
 echo "testing debezium kafka sink after updating data"
-(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 11  | sort) > ./e2e_test/sink/kafka/debezium2.tmp.result 2> /dev/null
+(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 11  | sort) > ./e2e_test/sink/kafka/debezium2.tmp.result 2> /dev/null
 python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium2.result e2e_test/sink/kafka/debezium2.tmp.result
 if [ $? -ne 0 ]; then
   echo "The output for debezium sink after update is not as expected."
@@ -100,7 +100,7 @@ psql -h localhost -p 4566 -d dev -U root -c "delete from t_kafka where id = 1;"
 # test upsert kafka sink after delete
 echo "testing upsert kafka sink after deleting data"
 diff ./e2e_test/sink/kafka/upsert3.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for upsert sink after update is not as expected."
   exit 1
@@ -109,7 +109,7 @@ fi
 # test upsert kafka sink with schema after delete
 echo "testing upsert kafka sink with schema after deleting data"
 diff ./e2e_test/sink/kafka/upsert_schema3.result \
-<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
+<((./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert-schema --from-beginning --property print.key=true --max-messages 12 | sort) 2> /dev/null)
 if [ $? -ne 0 ]; then
   echo "The output for upsert sink with schema is not as expected."
   exit 1
@@ -117,7 +117,7 @@ fi
 
 # test debezium kafka sink after delete
 echo "testing debezium kafka sink after deleting data"
-(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 13 | sort) > ./e2e_test/sink/kafka/debezium3.tmp.result 2> /dev/null
+(./.risingwave/bin/kafka/bin/kafka-console-consumer.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --property print.key=true --from-beginning --max-messages 13 | sort) > ./e2e_test/sink/kafka/debezium3.tmp.result 2> /dev/null
 python3 e2e_test/sink/kafka/debezium.py e2e_test/sink/kafka/debezium3.result e2e_test/sink/kafka/debezium3.tmp.result
 if [ $? -ne 0 ]; then
   echo "The output for debezium sink after delete is not as expected."
@@ -128,13 +128,13 @@ else
 fi
 
 sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/drop_sink.slt'
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only --delete > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-upsert --delete > /dev/null 2>&1
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-debezium --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-upsert --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-debezium --delete > /dev/null 2>&1
 
 # test different encoding
 echo "testing protobuf"
 cp src/connector/src/test_data/proto_recursive/recursive.pb ./proto-recursive
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only-protobuf --create > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only-protobuf --create > /dev/null 2>&1
 sqllogictest -p 4566 -d dev 'e2e_test/sink/kafka/protobuf.slt'
-./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server 127.0.0.1:29092 --topic test-rw-sink-append-only-protobuf --delete > /dev/null 2>&1
+./.risingwave/bin/kafka/bin/kafka-topics.sh --bootstrap-server message_queue:29092 --topic test-rw-sink-append-only-protobuf --delete > /dev/null 2>&1
diff --git a/ci/scripts/e2e-sink-test.sh b/ci/scripts/e2e-sink-test.sh
index 2dc02f0eada7a..ce2cc46381eba 100755
--- a/ci/scripts/e2e-sink-test.sh
+++ b/ci/scripts/e2e-sink-test.sh
@@ -57,7 +57,7 @@ node_port=50051
 node_timeout=10
 
 echo "--- starting risingwave cluster with connector node"
-cargo make ci-start ci-kafka
+cargo make ci-start ci-1cn-1fe
 ./connector-node/start-service.sh -p $node_port > .risingwave/log/connector-node.log 2>&1 &
 
 echo "waiting for connector node to start"
diff --git a/ci/scripts/run-micro-benchmarks.sh b/ci/scripts/run-micro-benchmarks.sh
index 568c90de425ca..371cc416e7ac5 100755
--- a/ci/scripts/run-micro-benchmarks.sh
+++ b/ci/scripts/run-micro-benchmarks.sh
@@ -46,6 +46,8 @@ main() {
   echo "--- Getting aws instance type"
   local instance_type=$(get_instance_type)
   echo "instance_type: $instance_type"
+  echo "$instance_type" > microbench_instance_type.txt
+  buildkite-agent artifact upload ./microbench_instance_type.txt
   if [[ $instance_type != "m6i.4xlarge" ]]; then
     echo "Only m6i.4xlarge is supported, skipping microbenchmark"
     exit 0
diff --git a/ci/scripts/upload-micro-bench-results.sh b/ci/scripts/upload-micro-bench-results.sh
index 2644ca936c5da..e72b69950bb7b 100755
--- a/ci/scripts/upload-micro-bench-results.sh
+++ b/ci/scripts/upload-micro-bench-results.sh
@@ -36,6 +36,19 @@ get_commit() {
   | sed 's/\"//g'
 }
 
+get_machine() {
+  buildkite-agent artifact download microbench_instance_type.txt ./
+  cat ./microbench_instance_type.txt
+}
+
+echo "--- Checking microbench_instance_type"
+INSTANCE_TYPE=$(get_machine)
+echo "instance type: $INSTANCE_TYPE"
+if [[ $INSTANCE_TYPE != "m6i.4xlarge" ]]; then
+  echo "Only m6i.4xlarge is supported, microbenchmark was skipped"
+  exit 0
+fi
+
 setup
 
 BUILDKITE_BUILD_URL="https://buildkite.com/risingwavelabs/main-cron/builds/$BUILDKITE_BUILD_NUMBER"
diff --git a/ci/workflows/integration-tests.yml b/ci/workflows/integration-tests.yml
index 4bd0ec1a000b1..455f29b210ec1 100644
--- a/ci/workflows/integration-tests.yml
+++ b/ci/workflows/integration-tests.yml
@@ -29,6 +29,7 @@ steps:
           - "postgres-cdc"
           - "mysql-sink"
           - "postgres-sink"
+          - "iceberg-cdc"
           # - "iceberg-sink"
           - "debezium-mysql"
         format:
@@ -79,6 +80,10 @@ steps:
         #    testcase: "iceberg-sink"
         #    format: "protobuf"
         #  skip: true
+        - with:
+            testcase: "iceberg-cdc"
+            format: "protobuf"
+          skip: true
         - with:
             testcase: "debezium-mysql"
             format: "protobuf"
diff --git a/ci/workflows/pull-request.yml b/ci/workflows/pull-request.yml
index 985bd0be4b822..3aaa09f0d7716 100644
--- a/ci/workflows/pull-request.yml
+++ b/ci/workflows/pull-request.yml
@@ -209,6 +209,21 @@ steps:
     timeout_in_minutes: 10
     retry: *auto-retry
 
+  - label: "end-to-end iceberg cdc test"
+    if: build.pull_request.labels includes "ci/run-e2e-iceberg-sink-tests"
+    command: "ci/scripts/e2e-iceberg-cdc.sh -p ci-dev"
+    depends_on:
+      - "build"
+      - "build-other"
+    plugins:
+      - docker-compose#v4.9.0:
+          run: sink-test-env
+          config: ci/docker-compose.yml
+          mount-buildkite-agent: true
+      - ./ci/plugins/upload-failure-logs
+    timeout_in_minutes: 10
+    retry: *auto-retry
+
   - label: "end-to-end pulsar sink test"
     if: build.pull_request.labels includes "ci/run-e2e-pulsar-sink-tests"
     command: "ci/scripts/e2e-pulsar-sink-test.sh -p ci-dev"
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index 89aa99a1c8b5d..4dbd5fe5bb28d 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -2,7 +2,7 @@
 version: "3"
 services:
   compactor-0:
-    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
     command:
       - compactor-node
       - "--listen-addr"
@@ -37,7 +37,7 @@ services:
       timeout: 5s
       retries: 5
   compute-node-0:
-    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
     command:
       - compute-node
       - "--listen-addr"
@@ -122,7 +122,7 @@ services:
       timeout: 5s
       retries: 5
   frontend-node-0:
-    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
     command:
       - frontend-node
       - "--listen-addr"
@@ -179,7 +179,7 @@ services:
       timeout: 5s
       retries: 5
   meta-node-0:
-    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}"
+    image: "ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}"
     command:
       - meta-node
       - "--listen-addr"
@@ -260,6 +260,7 @@ services:
       MINIO_PROMETHEUS_URL: "http://prometheus-0:9500"
       MINIO_ROOT_PASSWORD: hummockadmin
       MINIO_ROOT_USER: hummockadmin
+      MINIO_DOMAIN: "minio-0"
     container_name: minio-0
     healthcheck:
       test:
@@ -295,7 +296,7 @@ services:
       timeout: 5s
       retries: 5
   connector-node:
-    image: ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.2.0}
+    image: ghcr.io/risingwavelabs/risingwave:${RW_IMAGE_VERSION:-v1.3.0}
     entrypoint: "/risingwave/bin/connector-node/start-service.sh"
     ports:
       - 50051
diff --git a/docs/developer-guide.md b/docs/developer-guide.md
index 4ecc756131dff..7d072e7da2e44 100644
--- a/docs/developer-guide.md
+++ b/docs/developer-guide.md
@@ -2,7 +2,7 @@
 
 This guide is intended to be used by contributors to learn about how to develop RisingWave. The instructions about how to submit code changes are included in [contributing guidelines](../CONTRIBUTING.md).
 
-If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://join.slack.com/t/risingwave-community/shared_invite/zt-120rft0mr-d8uGk3d~NZiZAQWPnElOfw) to join the channel.
+If you have questions, you can search for existing discussions or start a new discussion in the [Discussions forum of RisingWave](https://github.com/risingwavelabs/risingwave/discussions), or ask in the RisingWave Community channel on Slack. Please use the [invitation link](https://risingwave.com/slack) to join the channel.
 
 To report bugs, create a [GitHub issue](https://github.com/risingwavelabs/risingwave/issues/new/choose).
 
diff --git a/e2e_test/iceberg/main.py b/e2e_test/iceberg/main.py
index fa07aa367a9b3..3f3120227e6e7 100644
--- a/e2e_test/iceberg/main.py
+++ b/e2e_test/iceberg/main.py
@@ -42,14 +42,16 @@ def init_iceberg_table(args,init_sqls):
         spark.sql(sql)
 
 
-def init_risingwave_mv(args,slt):
+def execute_slt(args,slt):
+    if slt is None or slt == "":
+        return
     rw_config = args['risingwave']
     cmd = f"sqllogictest -p {rw_config['port']} -d {rw_config['db']} {slt}"
     print(f"Command line is [{cmd}]")
     subprocess.run(cmd,
                    shell=True,
                    check=True)
-    time.sleep(10)
+    time.sleep(30)
 
 
 def verify_result(args,verify_sql,verify_schema,verify_data):
@@ -110,6 +112,6 @@ def drop_table(args,drop_sqls):
         print({section: dict(config[section]) for section in config.sections()})
 
         init_iceberg_table(config,init_sqls)
-        init_risingwave_mv(config,slt)
+        execute_slt(config,slt)
         verify_result(config,verify_sql,verify_schema,verify_data)
         drop_table(config,drop_sqls)
diff --git a/e2e_test/iceberg/test_case/cdc/load.slt b/e2e_test/iceberg/test_case/cdc/load.slt
new file mode 100644
index 0000000000000..caefd1326bbda
--- /dev/null
+++ b/e2e_test/iceberg/test_case/cdc/load.slt
@@ -0,0 +1,46 @@
+# CDC source basic test
+
+# enable cdc backfill in ci
+statement ok
+set cdc_backfill='true';
+
+statement ok
+create table products ( id INT,
+ name STRING,
+ description STRING,
+ PRIMARY KEY (id)
+) with (
+ connector = 'mysql-cdc',
+ hostname = 'mysql',
+ port = '3306',
+ username = 'root',
+ password = '123456',
+ database.name = 'my@db',
+ table.name = 'products',
+ server.id = '5085'
+);
+
+
+statement ok
+CREATE SINK s1 AS select * from products WITH (
+    connector = 'iceberg',
+    type = 'upsert',
+    force_append_only = 'false',
+    database.name = 'demo',
+    table.name = 'demo_db.demo_table',
+    catalog.type = 'storage',
+    warehouse.path = 's3://icebergdata/demo',
+    s3.endpoint = 'http://127.0.0.1:9301',
+    s3.region = 'us-east-1',
+    s3.access.key = 'hummockadmin',
+    s3.secret.key = 'hummockadmin',
+    primary_key = 'id'
+);
+
+query I
+select count(*) from products;
+----
+8
+
+statement ok
+flush;
diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql
new file mode 100644
index 0000000000000..b7b6f13af83cf
--- /dev/null
+++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc.sql
@@ -0,0 +1,21 @@
+DROP DATABASE IF EXISTS `my@db`;
+CREATE DATABASE `my@db`;
+
+USE `my@db`;
+
+CREATE TABLE products (
+    id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
+    name VARCHAR(255) NOT NULL,
+    description VARCHAR(512)
+);
+
+ALTER TABLE products AUTO_INCREMENT = 101;
+
+INSERT INTO products VALUES (default,"101","101"),
+(default,"102","102"),
+(default,"103","103"),
+(default,"104","104"),
+(default,"105","105"),
+(default,"106","106"),
+(default,"107","107"),
+(default,"108","108")
diff --git a/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql
new file mode 100644
index 0000000000000..641d6220ea8dc
--- /dev/null
+++ b/e2e_test/iceberg/test_case/cdc/mysql_cdc_insert.sql
@@ -0,0 +1,7 @@
+USE `my@db`;
+
+INSERT INTO products VALUES (default,"109","109"),
+(default,"110","110"),
+(default,"111","111"),
+(default,"112","112"),
+(default,"113","113");
diff --git a/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml b/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml
new file mode 100644
index 0000000000000..5ab9647b12eb0
--- /dev/null
+++ b/e2e_test/iceberg/test_case/cdc/no_partition_cdc.toml
@@ -0,0 +1,25 @@
+init_sqls = []
+
+slt = ''
+
+verify_schema = ['int','string','string']
+
+verify_sql = 'SELECT * FROM demo_db.demo_table ORDER BY id ASC'
+
+verify_data = """
+101,101,101
+102,102,102
+103,103,103
+104,104,104
+105,105,105
+106,106,106
+107,107,107
+108,108,108
+109,109,109
+110,110,110
+111,111,111
+112,112,112
+113,113,113
+"""
+
+drop_sqls = []
diff --git a/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml b/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml
new file mode 100644
index 0000000000000..17e5f7497aae5
--- /dev/null
+++ b/e2e_test/iceberg/test_case/cdc/no_partition_cdc_init.toml
@@ -0,0 +1,31 @@
+init_sqls = [
+    'CREATE SCHEMA IF NOT EXISTS demo_db',
+    'DROP TABLE IF EXISTS demo_db.demo_table',
+    '''
+    CREATE TABLE demo_db.demo_table (
+    id int,
+    name string,
+    description string
+    ) USING iceberg
+    TBLPROPERTIES ('format-version'='2');
+    '''
+]
+
+slt = 'test_case/cdc/load.slt'
+
+verify_schema = ['int','string','string']
+
+verify_sql = 'SELECT * FROM demo_db.demo_table ORDER BY id ASC'
+
+verify_data = """
+101,101,101
+102,102,102
+103,103,103
+104,104,104
+105,105,105
+106,106,106
+107,107,107
+108,108,108
+"""
+
+drop_sqls = []
diff --git a/e2e_test/sink/kafka/create_sink.slt b/e2e_test/sink/kafka/create_sink.slt
index 25e3a59fdff3a..a1f296774f526 100644
--- a/e2e_test/sink/kafka/create_sink.slt
+++ b/e2e_test/sink/kafka/create_sink.slt
@@ -31,7 +31,7 @@ create connection mock with (
 statement error
 create sink si_kafka_append_only_conn from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-append-only',
     type = 'append-only',
     force_append_only = 'true',
@@ -42,7 +42,7 @@ create sink si_kafka_append_only_conn from t_kafka with (
 statement ok
 create sink si_kafka_append_only_conn from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-append-only',
     type = 'append-only',
     force_append_only = 'true',
@@ -66,7 +66,7 @@ drop connection mock;
 statement error sink cannot be append-only
 create sink si_kafka_append_only from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-append-only',
     type = 'append-only',
 );
@@ -74,7 +74,7 @@ create sink si_kafka_append_only from t_kafka with (
 statement ok
 create sink si_kafka_append_only from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-append-only',
     type = 'append-only',
     force_append_only = 'true'
@@ -83,7 +83,7 @@ create sink si_kafka_append_only from t_kafka with (
 statement error primary key not defined
 create sink si_kafka_upsert from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-upsert',
     type = 'upsert',
 );
@@ -91,7 +91,7 @@ create sink si_kafka_upsert from t_kafka with (
 statement ok
 create sink si_kafka_upsert from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-upsert',
     type = 'upsert',
     primary_key = 'id',
@@ -100,7 +100,7 @@ create sink si_kafka_upsert from t_kafka with (
 statement ok
 create sink si_kafka_upsert_schema from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-upsert-schema',
     primary_key = 'id',
 ) format upsert encode json (
@@ -110,7 +110,7 @@ create sink si_kafka_upsert_schema from t_kafka with (
 statement ok
 create sink si_kafka_debezium from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-debezium',
     type = 'debezium',
     primary_key = 'id',
@@ -119,7 +119,7 @@ create sink si_kafka_debezium from t_kafka with (
 statement error primary key not defined
 create sink debezium_without_pk from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-debezium',
     type = 'debezium',
 );
@@ -127,7 +127,7 @@ create sink debezium_without_pk from t_kafka with (
 statement ok
 create sink multiple_pk from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-debezium',
     type = 'debezium',
     primary_key = 'id,v_varchar'
@@ -139,7 +139,7 @@ drop sink multiple_pk;
 statement error Sink primary key column not found: invalid.
 create sink invalid_pk_column from t_kafka with (
     connector = 'kafka',
-    properties.bootstrap.server = '127.0.0.1:29092',
+    properties.bootstrap.server = 'message_queue:29092',
     topic = 'test-rw-sink-debezium',
     type = 'debezium',
     primary_key = 'id,invalid'
diff --git a/e2e_test/sink/kafka/protobuf.slt b/e2e_test/sink/kafka/protobuf.slt
index f69c4a9d07110..87ab884eddbde 100644
--- a/e2e_test/sink/kafka/protobuf.slt
+++ b/e2e_test/sink/kafka/protobuf.slt
@@ -2,7 +2,7 @@ statement ok
 create table from_kafka with (
   connector = 'kafka',
   topic = 'test-rw-sink-append-only-protobuf',
-  properties.bootstrap.server = '127.0.0.1:29092')
+  properties.bootstrap.server = 'message_queue:29092')
 format plain encode protobuf (
   schema.location = 'file:///risingwave/proto-recursive',
   message = 'recursive.AllTypes');
@@ -37,7 +37,7 @@ statement ok
 create sink sink0 from into_kafka with (
   connector = 'kafka',
   topic = 'test-rw-sink-append-only-protobuf',
-  properties.bootstrap.server = '127.0.0.1:29092')
+  properties.bootstrap.server = 'message_queue:29092')
 format plain encode protobuf (
   force_append_only = true,
   schema.location = 'file:///risingwave/proto-recursive',
@@ -70,7 +70,7 @@ statement error failed to read file
 create sink sink_err from into_kafka with (
   connector = 'kafka',
   topic = 'test-rw-sink-append-only-protobuf',
-  properties.bootstrap.server = '127.0.0.1:29092')
+  properties.bootstrap.server = 'message_queue:29092')
 format plain encode protobuf (
   force_append_only = true,
   schema.location = 'file:///risingwave/proto-recursiv',
@@ -80,7 +80,7 @@ statement error encode extra_column error: field not in proto
 create sink sink_err as select 1 as extra_column with (
   connector = 'kafka',
   topic = 'test-rw-sink-append-only-protobuf',
-  properties.bootstrap.server = '127.0.0.1:29092')
+  properties.bootstrap.server = 'message_queue:29092')
 format plain encode protobuf (
   force_append_only = true,
   schema.location = 'file:///risingwave/proto-recursive',
@@ -90,7 +90,7 @@ statement error s3 URL not supported yet
 create sink sink_err from into_kafka with (
   connector = 'kafka',
   topic = 'test-rw-sink-append-only-protobuf',
-  properties.bootstrap.server = '127.0.0.1:29092')
+  properties.bootstrap.server = 'message_queue:29092')
 format plain encode protobuf (
   force_append_only = true,
   schema.location = 's3:///risingwave/proto-recursive',
diff --git a/integration_tests/clickhouse-sink/README.md b/integration_tests/clickhouse-sink/README.md
index 607621faefeae..a383f3fba5ee4 100644
--- a/integration_tests/clickhouse-sink/README.md
+++ b/integration_tests/clickhouse-sink/README.md
@@ -23,6 +23,8 @@ docker compose exec clickhouse-server bash /opt/clickhouse/clickhouse-sql/run-sq
 - create_mv.sql
 - create_sink.sql
 
+We only support `upsert` with clickhouse' `CollapsingMergeTree` and `VersionedCollapsingMergeTree`
+
 4. Execute a simple query:
 
 ```sh
diff --git a/integration_tests/iceberg-cdc/README.md b/integration_tests/iceberg-cdc/README.md
new file mode 100644
index 0000000000000..56f40172c3dfa
--- /dev/null
+++ b/integration_tests/iceberg-cdc/README.md
@@ -0,0 +1,5 @@
+# Iceberg CDC Integration Tests
+`mysql -> rw -> iceberg`
+
+# How to run
+./run_test.sh
\ No newline at end of file
diff --git a/integration_tests/iceberg-cdc/docker-compose.yaml b/integration_tests/iceberg-cdc/docker-compose.yaml
new file mode 100644
index 0000000000000..8e9ad1062ef38
--- /dev/null
+++ b/integration_tests/iceberg-cdc/docker-compose.yaml
@@ -0,0 +1,142 @@
+version: '3.8'
+
+services:
+  compactor-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: compactor-0
+  compute-node-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: compute-node-0
+  etcd-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: etcd-0
+  frontend-node-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: frontend-node-0
+  meta-node-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: meta-node-0
+  grafana-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: grafana-0
+  prometheus-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: prometheus-0
+  minio-0:
+    extends:
+      file: ../../docker/docker-compose.yml
+      service: minio-0
+  mc:
+    depends_on:
+      - minio-0
+    image: minio/mc
+    environment:
+      - AWS_ACCESS_KEY_ID=hummockadmin
+      - AWS_SECRET_ACCESS_KEY=hummockadmin
+      - AWS_REGION=us-east-1
+    entrypoint: >
+      /bin/sh -c "
+      until (/usr/bin/mc config host add minio http://minio-0:9301 hummockadmin hummockadmin) do echo '...waiting...' && sleep 1; done;
+      /usr/bin/mc rm -r --force minio/icebergdata;
+      /usr/bin/mc mb minio/icebergdata;
+      /usr/bin/mc anonymous set public minio/icebergdata;
+      tail -f /dev/null
+      "
+
+  mysql:
+    image: mysql:8.0
+    expose:
+      -  3306
+    ports:
+      - "3306:3306"
+    environment:
+      - MYSQL_ROOT_PASSWORD=123456
+      - MYSQL_USER=mysqluser
+      - MYSQL_PASSWORD=mysqlpw
+      - MYSQL_DATABASE=mydb
+    healthcheck:
+      test: [ "CMD-SHELL", "mysqladmin ping -h 127.0.0.1 -u root -p123456" ]
+      interval: 5s
+      timeout: 5s
+      retries: 5
+    container_name: mysql
+  prepare_mysql:
+    image: mysql:8.0
+    depends_on:
+      - mysql
+    command:
+      - /bin/sh
+      - -c
+      - "mysql -p123456 -h mysql mydb < mysql_prepare.sql"
+    volumes:
+      - "./mysql_prepare.sql:/mysql_prepare.sql"
+    container_name: prepare_mysql
+    restart: on-failure
+
+  rest:
+    image: tabulario/iceberg-rest:0.6.0
+    environment:
+      - AWS_ACCESS_KEY_ID=hummockadmin
+      - AWS_SECRET_ACCESS_KEY=hummockadmin
+      - AWS_REGION=us-east-1
+      - CATALOG_CATOLOG__IMPL=org.apache.iceberg.jdbc.JdbcCatalog
+      - CATALOG_URI=jdbc:sqlite:file:/tmp/iceberg_rest_mode=memory
+      - CATALOG_WAREHOUSE=s3://icebergdata/demo
+      - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
+      - CATALOG_S3_ENDPOINT=http://minio-0:9301
+    depends_on:
+      - minio-0
+    # let the rest access minio through: hummock001.minio-0
+    links:
+      - minio-0:icebergdata.minio-0
+    expose:
+      - 8181
+    ports:
+      - "8181:8181"
+
+  spark:
+    depends_on:
+      - minio-0
+      - rest
+    image: ghcr.io/icelake-io/icelake-spark:latest
+    environment:
+      - AWS_ACCESS_KEY_ID=hummockadmin
+      - AWS_SECRET_ACCESS_KEY=hummockadmin
+      - AWS_REGION=us-east-1
+      - SPARK_HOME=/opt/spark
+      - PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark/bin:/opt/spark/sbin
+    user: root
+    links:
+      - minio-0:icebergdata.minio-0
+    expose:
+      - 15002
+    ports:
+      - "15002:15002"
+    healthcheck:
+      test: netstat -ltn | grep -c 15002
+      interval: 1s
+      retries: 1200
+    volumes:
+      - ./spark:/spark
+    command: [ "bash", "/spark/spark-connect-server.sh" ]
+
+volumes:
+  compute-node-0:
+    external: false
+  etcd-0:
+    external: false
+  grafana-0:
+    external: false
+  minio-0:
+    external: false
+  prometheus-0:
+    external: false
+  spark:
+    external: false
diff --git a/integration_tests/iceberg-cdc/mysql_prepare.sql b/integration_tests/iceberg-cdc/mysql_prepare.sql
new file mode 100644
index 0000000000000..3e5a236a41205
--- /dev/null
+++ b/integration_tests/iceberg-cdc/mysql_prepare.sql
@@ -0,0 +1,15 @@
+-- mysql -p123456 -uroot -h 127.0.0.1 mydb < mysql_prepare.sql
+--
+-- Mysql
+USE mydb;
+
+CREATE TABLE user_behaviors (
+  user_id VARCHAR(60),
+  target_id VARCHAR(60),
+  target_type VARCHAR(60),
+  event_timestamp VARCHAR(100),
+  behavior_type VARCHAR(60),
+  parent_target_type VARCHAR(60),
+  parent_target_id VARCHAR(60),
+  PRIMARY KEY(user_id, target_id, event_timestamp)
+);
diff --git a/integration_tests/iceberg-cdc/python/check.py b/integration_tests/iceberg-cdc/python/check.py
new file mode 100644
index 0000000000000..699fa4df29c30
--- /dev/null
+++ b/integration_tests/iceberg-cdc/python/check.py
@@ -0,0 +1,25 @@
+from pyspark.sql import SparkSession
+import configparser
+import psycopg2
+
+def check_spark_table(args):
+    expect_row_count = 0
+    rw_config = args['risingwave']
+    with psycopg2.connect(database=rw_config['db'], user=rw_config['user'], host=rw_config['host'],
+                          port=rw_config['port']) as conn:
+        with conn.cursor() as cursor:
+            cursor.execute("SELECT COUNT(*) FROM user_behaviors")
+            expect_row_count = cursor.fetchone()[0]
+    print(f"expect_row_count is {expect_row_count}")
+    spark_config = args['spark']
+    spark = SparkSession.builder.remote(spark_config['url']).getOrCreate()
+    actual_row_count = spark.sql("SELECT COUNT(*) FROM s1.t1").collect()[0][0]
+    print(f"actual_row_count is {actual_row_count}")
+    assert actual_row_count==expect_row_count
+
+
+if __name__ == "__main__":
+    config = configparser.ConfigParser()
+    config.read("config.ini")
+    print({section: dict(config[section]) for section in config.sections()})
+    check_spark_table(config)
diff --git a/integration_tests/iceberg-cdc/python/config.ini b/integration_tests/iceberg-cdc/python/config.ini
new file mode 100644
index 0000000000000..bd95eddc5b80e
--- /dev/null
+++ b/integration_tests/iceberg-cdc/python/config.ini
@@ -0,0 +1,8 @@
+[spark]
+url=sc://localhost:15002
+
+[risingwave]
+db=dev
+user=root
+host=127.0.0.1
+port=4566
diff --git a/integration_tests/iceberg-cdc/python/init.py b/integration_tests/iceberg-cdc/python/init.py
new file mode 100644
index 0000000000000..289fa2f161889
--- /dev/null
+++ b/integration_tests/iceberg-cdc/python/init.py
@@ -0,0 +1,103 @@
+from pyspark.sql import SparkSession
+import configparser
+import psycopg2
+
+
+def init_spark_table(args):
+    spark_config = args['spark']
+    spark = SparkSession.builder.remote(spark_config['url']).getOrCreate()
+
+    init_table_sqls = [
+        "CREATE SCHEMA IF NOT EXISTS s1",
+        "DROP TABLE IF EXISTS s1.t1",
+        """
+        CREATE TABLE s1.t1
+        (
+            user_id string,
+            target_id string,
+            target_type string,
+            event_timestamp string,
+            behavior_type string,
+            parent_target_type string,
+            parent_target_id string
+        ) USING iceberg
+        TBLPROPERTIES ('format-version'='2');
+        """,
+    ]
+
+    for sql in init_table_sqls:
+        print(f"Executing sql: {sql}")
+        spark.sql(sql)
+
+
+def init_risingwave_mv(args):
+    rw_config = args['risingwave']
+    sqls = [
+        "set streaming_parallelism = 4",
+        """
+        CREATE TABLE user_behaviors (
+            user_id VARCHAR,
+            target_id VARCHAR,
+            target_type VARCHAR,
+            event_timestamp VARCHAR,
+            behavior_type VARCHAR,
+            parent_target_type VARCHAR,
+            parent_target_id VARCHAR,
+            PRIMARY KEY(user_id, target_id, event_timestamp)
+        ) with (
+            connector = 'mysql-cdc',
+            hostname = 'mysql',
+            port = '3306',
+            username = 'root',
+            password = '123456',
+            database.name = 'mydb',
+            table.name = 'user_behaviors',
+            server.id = '1'
+        );
+        """,
+        # f"""
+        # CREATE SINK s1
+        # AS SELECT * FROM user_behaviors
+        # WITH (
+        #     connector='iceberg',
+        #     type='upsert',
+        #     primary_key = 'user_id, target_id, event_timestamp',
+        #     catalog.type = 'storage',
+        #     s3.endpoint = 'http://minio-0:9301',
+        #     s3.access.key = 'hummockadmin',
+        #     s3.secret.key = 'hummockadmin',
+        #     database.name='demo',
+        #     table.name='s1.t1',warehouse.path = 's3://hummock001/icebergdata/demo',s3.region = 'us-east-1'
+        # );
+        # """
+        f"""
+        CREATE SINK s1
+        AS SELECT * FROM user_behaviors
+        WITH (
+            connector='iceberg',
+            type='upsert',
+            primary_key = 'user_id, target_id, event_timestamp',
+            catalog.type = 'rest',
+            catalog.uri = 'http://rest:8181',
+            s3.endpoint = 'http://minio-0:9301',
+            s3.access.key = 'hummockadmin',
+            s3.secret.key = 'hummockadmin',
+            database.name='demo',
+            table.name='s1.t1',warehouse.path = 's3://icebergdata/demo/s1/t1',s3.region = 'us-east-1'
+        );
+        """
+    ]
+    with psycopg2.connect(database=rw_config['db'], user=rw_config['user'], host=rw_config['host'],
+                          port=rw_config['port']) as conn:
+        with conn.cursor() as cursor:
+            for sql in sqls:
+                print(f"Executing sql {sql}")
+                cursor.execute(sql)
+
+
+if __name__ == "__main__":
+    config = configparser.ConfigParser()
+    config.read("config.ini")
+    print({section: dict(config[section]) for section in config.sections()})
+    init_spark_table(config)
+    init_risingwave_mv(config)
diff --git a/integration_tests/iceberg-cdc/python/pyproject.toml b/integration_tests/iceberg-cdc/python/pyproject.toml
new file mode 100644
index 0000000000000..4c7bce1165796
--- /dev/null
+++ b/integration_tests/iceberg-cdc/python/pyproject.toml
@@ -0,0 +1,16 @@
+[tool.poetry]
+name = "icelake-integration-tests"
+version = "0.0.9"
+description = ""
+authors = ["Renjie Liu <liurenjie2008@gmail.com>"]
+readme = "README.md"
+packages = [{include = "icelake_integration_tests"}]
+
+[tool.poetry.dependencies]
+python = "^3.11"
+pyspark = { version = "3.4.1", extras = ["sql", "connect"] }
+psycopg2-binary = "^2.9"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/integration_tests/iceberg-cdc/run_test.sh b/integration_tests/iceberg-cdc/run_test.sh
new file mode 100755
index 0000000000000..2d8b691bc7284
--- /dev/null
+++ b/integration_tests/iceberg-cdc/run_test.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Start test environment.
+docker-compose up -d --wait
+
+# To avoid exiting by unhealth, set it after start environment.
+set -ex
+
+# Generate data
+docker build -t iceberg-cdc-datagen ../datagen
+timeout 20 docker run --network=iceberg-cdc_default iceberg-cdc-datagen /datagen --mode clickstream --qps 1 mysql --user mysqluser --password mysqlpw --host mysql --port 3306 --db mydb &
+
+cd python
+poetry update --quiet
+# Init source, mv, and sink.
+poetry run python init.py
+# Wait for sink to be finished.
+sleep 40;
+poetry run python check.py
diff --git a/integration_tests/iceberg-cdc/spark/.gitignore b/integration_tests/iceberg-cdc/spark/.gitignore
new file mode 100644
index 0000000000000..51dcf07222856
--- /dev/null
+++ b/integration_tests/iceberg-cdc/spark/.gitignore
@@ -0,0 +1,3 @@
+derby.log
+metastore_db
+.ivy
\ No newline at end of file
diff --git a/integration_tests/iceberg-cdc/spark/spark-connect-server.sh b/integration_tests/iceberg-cdc/spark/spark-connect-server.sh
new file mode 100755
index 0000000000000..7c1cd64f1a2f2
--- /dev/null
+++ b/integration_tests/iceberg-cdc/spark/spark-connect-server.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -ex
+
+JARS=$(find /opt/spark/deps -type f -name "*.jar" | tr '\n' ':')
+
+/opt/spark/sbin/start-connect-server.sh  \
+  --master local[3] \
+  --driver-class-path $JARS \
+  --conf spark.driver.bindAddress=0.0.0.0 \
+  --conf spark.sql.catalog.demo=org.apache.iceberg.spark.SparkCatalog \
+  --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions \
+  --conf spark.sql.catalog.demo.catalog-impl=org.apache.iceberg.rest.RESTCatalog \
+  --conf spark.sql.catalog.demo.uri=http://rest:8181 \
+  --conf spark.sql.catalog.demo.s3.endpoint=http://minio-0:9301 \
+  --conf spark.sql.catalog.demo.s3.path.style.access=true \
+  --conf spark.sql.catalog.demo.s3.access.key=hummockadmin \
+  --conf spark.sql.catalog.demo.s3.secret.key=hummockadmin \
+  --conf spark.sql.defaultCatalog=demo
+
+tail -f /opt/spark/logs/spark*.out
diff --git a/integration_tests/redis-sink/create_sink.sql b/integration_tests/redis-sink/create_sink.sql
index 03bfc2d0b0df1..2ba9ba67feb39 100644
--- a/integration_tests/redis-sink/create_sink.sql
+++ b/integration_tests/redis-sink/create_sink.sql
@@ -3,19 +3,13 @@ FROM
     bhv_mv WITH (
     primary_key = 'user_id',
     connector = 'redis',
-    type = 'append-only',
-    force_append_only='true',
     redis.url= 'redis://127.0.0.1:6379/',
-);
+)FORMAT PLAIN ENCODE JSON(force_append_only='true');
 
 CREATE SINK bhv_redis_sink_2
 FROM
     bhv_mv WITH (
     primary_key = 'user_id',
     connector = 'redis',
-    type = 'append-only',
-    force_append_only='true',
     redis.url= 'redis://127.0.0.1:6379/',
-    redis.keyformat='user_id:{user_id}',
-    redis.valueformat='username:{username},event_timestamp{event_timestamp}'
-);
\ No newline at end of file
+)FORMAT PLAIN ENCODE TEMPLATE(force_append_only='true', key_format = 'UserID:{user_id}', value_format = 'TargetID:{target_id},EventTimestamp{event_timestamp}');
\ No newline at end of file
diff --git a/integration_tests/scripts/run_demos.py b/integration_tests/scripts/run_demos.py
index 28623f7ddc4a7..da2519e18db44 100644
--- a/integration_tests/scripts/run_demos.py
+++ b/integration_tests/scripts/run_demos.py
@@ -42,6 +42,13 @@ def run_demo(demo: str, format: str, wait_time = 40):
         run_sql_file(sql_file, demo_dir)
         sleep(10)
 
+def iceberg_cdc_demo():
+    demo = "iceberg-cdc"
+    file_dir = dirname(abspath(__file__))
+    project_dir = dirname(file_dir)
+    demo_dir = os.path.join(project_dir, demo)
+    print("Running demo: iceberg-cdc")
+    subprocess.run(["bash","./run_test.sh"], cwd=demo_dir, check=True)
 
 def run_iceberg_demo():
     demo = "iceberg-sink"
@@ -149,5 +156,7 @@ def run_clickhouse_demo():
         run_iceberg_demo()
 elif args.case == "clickhouse-sink":
     run_clickhouse_demo()
+elif args.case == "iceberg-cdc":
+    iceberg_cdc_demo()
 else:
     run_demo(args.case, args.format)
diff --git a/proto/ddl_service.proto b/proto/ddl_service.proto
index 27c9f2ee82f83..1efc933a7d033 100644
--- a/proto/ddl_service.proto
+++ b/proto/ddl_service.proto
@@ -314,6 +314,10 @@ message GetTablesResponse {
   map<uint32, catalog.Table> tables = 1;
 }
 
+message WaitRequest {}
+
+message WaitResponse {}
+
 service DdlService {
   rpc CreateDatabase(CreateDatabaseRequest) returns (CreateDatabaseResponse);
   rpc DropDatabase(DropDatabaseRequest) returns (DropDatabaseResponse);
@@ -343,4 +347,5 @@ service DdlService {
   rpc ListConnections(ListConnectionsRequest) returns (ListConnectionsResponse);
   rpc DropConnection(DropConnectionRequest) returns (DropConnectionResponse);
   rpc GetTables(GetTablesRequest) returns (GetTablesResponse);
+  rpc Wait(WaitRequest) returns (WaitResponse);
 }
diff --git a/proto/expr.proto b/proto/expr.proto
index 769532d8dbe19..2f252d67c8400 100644
--- a/proto/expr.proto
+++ b/proto/expr.proto
@@ -348,6 +348,7 @@ message AggCall {
     MODE = 24;
     LAST_VALUE = 25;
     GROUPING = 26;
+    INTERNAL_LAST_SEEN_VALUE = 27;
   }
   Type type = 1;
   repeated InputRef args = 2;
diff --git a/proto/plan_common.proto b/proto/plan_common.proto
index a88242a572693..d4c7a2e04f138 100644
--- a/proto/plan_common.proto
+++ b/proto/plan_common.proto
@@ -106,6 +106,7 @@ enum EncodeType {
   ENCODE_TYPE_PROTOBUF = 4;
   ENCODE_TYPE_JSON = 5;
   ENCODE_TYPE_BYTES = 6;
+  ENCODE_TYPE_TEMPLATE = 7;
 }
 
 enum RowFormatType {
diff --git a/risedev.yml b/risedev.yml
index a5ba8a7b43f97..135a33f602a6a 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -685,40 +685,6 @@ profile:
       - use: pubsub
         persist-data: true
 
-  ci-kafka:
-    config-path: src/config/ci.toml
-    steps:
-      - use: minio
-      - use: etcd
-        unsafe-no-fsync: true
-      - use: meta-node
-      - use: compute-node
-        enable-tiered-cache: true
-      - use: frontend
-      - use: compactor
-      - use: zookeeper
-        persist-data: true
-      - use: kafka
-        persist-data: true
-
-  ci-kafka-plus-pubsub:
-    config-path: src/config/ci.toml
-    steps:
-      - use: minio
-      - use: etcd
-        unsafe-no-fsync: true
-      - use: meta-node
-      - use: compute-node
-        enable-tiered-cache: true
-      - use: frontend
-      - use: compactor
-      - use: zookeeper
-        persist-data: true
-      - use: kafka
-        persist-data: true
-      - use: pubsub
-        persist-data: true
-
   ci-redis:
     config-path: src/config/ci.toml
     steps:
diff --git a/src/batch/src/executor/aggregation/filter.rs b/src/batch/src/executor/aggregation/filter.rs
index 2db2320ed3534..9cfbeabffe417 100644
--- a/src/batch/src/executor/aggregation/filter.rs
+++ b/src/batch/src/executor/aggregation/filter.rs
@@ -75,7 +75,7 @@ impl AggregateFunction for Filter {
 mod tests {
     use risingwave_common::test_prelude::StreamChunkTestExt;
     use risingwave_expr::aggregate::{build_append_only, AggCall};
-    use risingwave_expr::expr::{build_from_pretty, Expression, LiteralExpression};
+    use risingwave_expr::expr::{build_from_pretty, ExpressionBoxExt, LiteralExpression};
 
     use super::*;
 
diff --git a/src/batch/src/executor/project_set.rs b/src/batch/src/executor/project_set.rs
index 670933a6bb50c..fa3dfac917e8a 100644
--- a/src/batch/src/executor/project_set.rs
+++ b/src/batch/src/executor/project_set.rs
@@ -171,7 +171,7 @@ mod tests {
     use risingwave_common::catalog::{Field, Schema};
     use risingwave_common::test_prelude::*;
     use risingwave_common::types::DataType;
-    use risingwave_expr::expr::{Expression, InputRefExpression, LiteralExpression};
+    use risingwave_expr::expr::{ExpressionBoxExt, InputRefExpression, LiteralExpression};
     use risingwave_expr::table_function::repeat;
 
     use super::*;
diff --git a/src/batch/src/executor/source.rs b/src/batch/src/executor/source.rs
index 8bf9fc5b7e610..ae3fc7056a6a6 100644
--- a/src/batch/src/executor/source.rs
+++ b/src/batch/src/executor/source.rs
@@ -159,7 +159,10 @@ impl SourceExecutor {
         for chunk in stream {
             match chunk {
                 Ok(chunk) => {
-                    yield covert_stream_chunk_to_batch_chunk(chunk.chunk)?;
+                    let data_chunk = covert_stream_chunk_to_batch_chunk(chunk.chunk)?;
+                    if data_chunk.capacity() > 0 {
+                        yield data_chunk;
+                    }
                 }
                 Err(e) => {
                     return Err(e);
diff --git a/src/batch/src/lib.rs b/src/batch/src/lib.rs
index 9104c96c951f5..809c096eb49df 100644
--- a/src/batch/src/lib.rs
+++ b/src/batch/src/lib.rs
@@ -17,8 +17,8 @@
 #![feature(trait_alias)]
 #![feature(exact_size_is_empty)]
 #![feature(type_alias_impl_trait)]
-#![cfg_attr(coverage, feature(no_coverage))]
-#![feature(generators)]
+#![cfg_attr(coverage, feature(coverage_attribute))]
+#![feature(coroutines)]
 #![feature(proc_macro_hygiene, stmt_expr_attributes)]
 #![feature(iterator_try_collect)]
 #![feature(lint_reasons)]
@@ -27,13 +27,11 @@
 #![feature(let_chains)]
 #![feature(bound_map)]
 #![feature(int_roundings)]
-#![feature(async_fn_in_trait)]
 #![feature(allocator_api)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(result_option_inspect)]
 #![feature(assert_matches)]
 #![feature(lazy_cell)]
-#![feature(return_position_impl_trait_in_trait)]
 
 mod error;
 pub mod exchange_source;
diff --git a/src/batch/src/rpc/service/task_service.rs b/src/batch/src/rpc/service/task_service.rs
index b49a023acb22b..fb60e352ec293 100644
--- a/src/batch/src/rpc/service/task_service.rs
+++ b/src/batch/src/rpc/service/task_service.rs
@@ -53,7 +53,7 @@ impl TaskService for BatchServiceImpl {
     type CreateTaskStream = ReceiverStream<TaskInfoResponseResult>;
     type ExecuteStream = ReceiverStream<GetDataResponseResult>;
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn create_task(
         &self,
         request: Request<CreateTaskRequest>,
@@ -97,7 +97,7 @@ impl TaskService for BatchServiceImpl {
         }
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn cancel_task(
         &self,
         req: Request<CancelTaskRequest>,
@@ -109,7 +109,7 @@ impl TaskService for BatchServiceImpl {
         Ok(Response::new(CancelTaskResponse { status: None }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn execute(
         &self,
         req: Request<ExecuteRequest>,
diff --git a/src/batch/src/task/task_execution.rs b/src/batch/src/task/task_execution.rs
index 6bd83c5d62c67..445c71ee51d66 100644
--- a/src/batch/src/task/task_execution.rs
+++ b/src/batch/src/task/task_execution.rs
@@ -656,7 +656,7 @@ impl<C: BatchTaskContext> BatchTaskExecution<C> {
 
         let error = error.map(Arc::new);
         *self.failure.lock() = error.clone().map(to_rw_error);
-        let err_str = error.as_ref().map(|e| format!("{:?}", e));
+        let err_str = error.as_ref().map(|e| e.to_string());
         if let Err(e) = sender.close(error).await {
             match e {
                 SenderError => {
diff --git a/src/cmd/src/bin/compactor.rs b/src/cmd/src/bin/compactor.rs
index 21b7db2405e2d..554168d8a6683 100644
--- a/src/cmd/src/bin/compactor.rs
+++ b/src/cmd/src/bin/compactor.rs
@@ -12,6 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 risingwave_cmd::main!(compactor);
diff --git a/src/cmd/src/bin/compute_node.rs b/src/cmd/src/bin/compute_node.rs
index 0bb1e5211ac57..a24d132b70b94 100644
--- a/src/cmd/src/bin/compute_node.rs
+++ b/src/cmd/src/bin/compute_node.rs
@@ -12,6 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 risingwave_cmd::main!(compute);
diff --git a/src/cmd/src/bin/ctl.rs b/src/cmd/src/bin/ctl.rs
index 38345c7a3fc2e..7b4c3132e747d 100644
--- a/src/cmd/src/bin/ctl.rs
+++ b/src/cmd/src/bin/ctl.rs
@@ -12,6 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 risingwave_cmd::main!(ctl);
diff --git a/src/cmd/src/bin/frontend_node.rs b/src/cmd/src/bin/frontend_node.rs
index 32d563be109fc..546bacbf1a901 100644
--- a/src/cmd/src/bin/frontend_node.rs
+++ b/src/cmd/src/bin/frontend_node.rs
@@ -12,6 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 risingwave_cmd::main!(frontend);
diff --git a/src/cmd/src/bin/meta_node.rs b/src/cmd/src/bin/meta_node.rs
index 032cc6bc28285..4bebfc5f915a2 100644
--- a/src/cmd/src/bin/meta_node.rs
+++ b/src/cmd/src/bin/meta_node.rs
@@ -12,6 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 risingwave_cmd::main!(meta);
diff --git a/src/cmd/src/lib.rs b/src/cmd/src/lib.rs
index 12de26657bd33..93df94a63816a 100644
--- a/src/cmd/src/lib.rs
+++ b/src/cmd/src/lib.rs
@@ -30,7 +30,7 @@ macro_rules! main {
         #[cfg(not(enable_task_local_alloc))]
         risingwave_common::enable_jemalloc!();
 
-        #[cfg_attr(coverage, no_coverage)]
+        #[cfg_attr(coverage, coverage(off))]
         fn main() {
             let opts = clap::Parser::parse();
             $crate::$component(opts);
diff --git a/src/cmd_all/src/bin/risingwave.rs b/src/cmd_all/src/bin/risingwave.rs
index 3e9088e16b9e2..b7693c6fa06a2 100644
--- a/src/cmd_all/src/bin/risingwave.rs
+++ b/src/cmd_all/src/bin/risingwave.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 use std::str::FromStr;
 
@@ -158,7 +158,7 @@ impl Component {
     }
 }
 
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 fn main() -> Result<()> {
     let risingwave = || {
         command!(BINARY_NAME)
diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml
index ddd1fe5a33cdb..168ba836d4c1b 100644
--- a/src/common/Cargo.toml
+++ b/src/common/Cargo.toml
@@ -49,6 +49,7 @@ hyper = "0.14"
 hytra = { workspace = true }
 itertools = "0.11"
 itoa = "1.0"
+jsonbb = "0.1"
 lru = { git = "https://github.com/risingwavelabs/lru-rs.git", rev = "cb2d7c7" }
 memcomparable = { version = "0.2", features = ["decimal"] }
 num-integer = "0.1"
diff --git a/src/common/proc_macro/src/config.rs b/src/common/proc_macro/src/config.rs
index 285834eb123cf..6e369fbad33eb 100644
--- a/src/common/proc_macro/src/config.rs
+++ b/src/common/proc_macro/src/config.rs
@@ -41,7 +41,7 @@ fn type_is_option(ty: &syn::Type) -> bool {
     false
 }
 
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 pub fn produce_override_config(input: DeriveInput) -> TokenStream {
     let syn::Data::Struct(syn::DataStruct { fields, .. }) = input.data else {
         abort!(input, "Only struct is supported");
diff --git a/src/common/proc_macro/src/lib.rs b/src/common/proc_macro/src/lib.rs
index 060ee1950624e..a11e407c6c053 100644
--- a/src/common/proc_macro/src/lib.rs
+++ b/src/common/proc_macro/src/lib.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 use estimate_size::{
     add_trait_bounds, extract_ignored_generics_list, has_nested_flag_attribute_list,
@@ -52,7 +52,7 @@ mod estimate_size;
 ///     }
 /// }
 /// ```
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 #[proc_macro_derive(OverrideConfig, attributes(override_opts))]
 #[proc_macro_error]
 pub fn override_config(input: TokenStream) -> TokenStream {
diff --git a/src/common/src/array/jsonb_array.rs b/src/common/src/array/jsonb_array.rs
index 0e9ba7c48511d..3c4ca23fff04e 100644
--- a/src/common/src/array/jsonb_array.rs
+++ b/src/common/src/array/jsonb_array.rs
@@ -12,36 +12,35 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::mem::size_of;
-
 use risingwave_pb::data::{PbArray, PbArrayType};
-use serde_json::Value;
 
-use super::{Array, ArrayBuilder};
+use super::{Array, ArrayBuilder, ArrayImpl, ArrayResult};
 use crate::buffer::{Bitmap, BitmapBuilder};
 use crate::estimate_size::EstimateSize;
-use crate::types::{DataType, JsonbRef, JsonbVal, F32, F64};
-use crate::util::iter_util::ZipEqFast;
+use crate::types::{DataType, JsonbRef, JsonbVal, Scalar};
 
 #[derive(Debug)]
 pub struct JsonbArrayBuilder {
     bitmap: BitmapBuilder,
-    data: Vec<Value>,
+    builder: jsonbb::Builder,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct JsonbArray {
     bitmap: Bitmap,
-    data: Vec<Value>,
+    /// Elements are stored as a single JSONB array value.
+    data: jsonbb::Value,
 }
 
 impl ArrayBuilder for JsonbArrayBuilder {
     type ArrayType = JsonbArray;
 
     fn new(capacity: usize) -> Self {
+        let mut builder = jsonbb::Builder::with_capacity(capacity);
+        builder.begin_array();
         Self {
             bitmap: BitmapBuilder::with_capacity(capacity),
-            data: Vec::with_capacity(capacity),
+            builder,
         }
     }
 
@@ -54,13 +53,15 @@ impl ArrayBuilder for JsonbArrayBuilder {
         match value {
             Some(x) => {
                 self.bitmap.append_n(n, true);
-                self.data
-                    .extend(std::iter::repeat(x).take(n).map(|x| x.0.clone()));
+                for _ in 0..n {
+                    self.builder.add_value(x.0);
+                }
             }
             None => {
                 self.bitmap.append_n(n, false);
-                self.data
-                    .extend(std::iter::repeat(*JsonbVal::dummy().0).take(n));
+                for _ in 0..n {
+                    self.builder.add_null();
+                }
             }
         }
     }
@@ -69,29 +70,44 @@ impl ArrayBuilder for JsonbArrayBuilder {
         for bit in other.bitmap.iter() {
             self.bitmap.append(bit);
         }
-        self.data.extend_from_slice(&other.data);
+        for value in other.data.as_array().unwrap().iter() {
+            self.builder.add_value(value);
+        }
     }
 
     fn pop(&mut self) -> Option<()> {
-        self.data.pop().map(|_| self.bitmap.pop().unwrap())
+        self.bitmap.pop()?;
+        self.builder.pop();
+        Some(())
     }
 
     fn len(&self) -> usize {
         self.bitmap.len()
     }
 
-    fn finish(self) -> Self::ArrayType {
+    fn finish(mut self) -> Self::ArrayType {
+        self.builder.end_array();
         Self::ArrayType {
             bitmap: self.bitmap.finish(),
-            data: self.data,
+            data: self.builder.finish(),
         }
     }
 }
 
-impl JsonbArrayBuilder {
-    pub fn append_move(&mut self, value: JsonbVal) {
-        self.bitmap.append(true);
-        self.data.push(*value.0);
+impl JsonbArray {
+    /// Loads a `JsonbArray` from a protobuf array.
+    ///
+    /// See also `JsonbArray::to_protobuf`.
+    pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
+        ensure!(
+            array.values.len() == 1,
+            "Must have exactly 1 buffer in a jsonb array"
+        );
+        let arr = JsonbArray {
+            bitmap: array.get_null_bitmap()?.into(),
+            data: jsonbb::Value::from_bytes(&array.values[0].body),
+        };
+        Ok(arr.into())
     }
 }
 
@@ -101,52 +117,23 @@ impl Array for JsonbArray {
     type RefItem<'a> = JsonbRef<'a>;
 
     unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
-        JsonbRef(self.data.get_unchecked(idx))
+        JsonbRef(self.data.as_array().unwrap().get(idx).unwrap())
     }
 
     fn len(&self) -> usize {
-        self.data.len()
+        self.bitmap.len()
     }
 
     fn to_protobuf(&self) -> PbArray {
-        // The memory layout contains `serde_json::Value` trees, but in protobuf we transmit this as
-        // variable length bytes in value encoding. That is, one buffer of length n+1 containing
-        // start and end offsets into the 2nd buffer containing all value bytes concatenated.
-
         use risingwave_pb::common::buffer::CompressionType;
         use risingwave_pb::common::Buffer;
 
-        let mut offset_buffer =
-            Vec::<u8>::with_capacity((1 + self.data.len()) * std::mem::size_of::<u64>());
-        let mut data_buffer = Vec::<u8>::with_capacity(self.data.len());
-
-        let mut offset = 0;
-        for (v, not_null) in self.data.iter().zip_eq_fast(self.null_bitmap().iter()) {
-            if !not_null {
-                continue;
-            }
-            let d = JsonbRef(v).value_serialize();
-            offset_buffer.extend_from_slice(&(offset as u64).to_be_bytes());
-            data_buffer.extend_from_slice(&d);
-            offset += d.len();
-        }
-        offset_buffer.extend_from_slice(&(offset as u64).to_be_bytes());
-
-        let values = vec![
-            Buffer {
-                compression: CompressionType::None as i32,
-                body: offset_buffer,
-            },
-            Buffer {
-                compression: CompressionType::None as i32,
-                body: data_buffer,
-            },
-        ];
-
-        let null_bitmap = self.null_bitmap().to_protobuf();
         PbArray {
-            null_bitmap: Some(null_bitmap),
-            values,
+            null_bitmap: Some(self.null_bitmap().to_protobuf()),
+            values: vec![Buffer {
+                compression: CompressionType::None as i32,
+                body: self.data.as_bytes().to_vec(),
+            }],
             array_type: PbArrayType::Jsonb as i32,
             struct_array_data: None,
             list_array_data: None,
@@ -176,7 +163,7 @@ impl FromIterator<Option<JsonbVal>> for JsonbArray {
         let mut builder = <Self as Array>::Builder::new(iter.size_hint().0);
         for i in iter {
             match i {
-                Some(x) => builder.append_move(x),
+                Some(x) => builder.append(Some(x.as_scalar_ref())),
                 None => builder.append(None),
             }
         }
@@ -190,31 +177,8 @@ impl FromIterator<JsonbVal> for JsonbArray {
     }
 }
 
-// TODO: We need to fix this later.
 impl EstimateSize for JsonbArray {
     fn estimated_heap_size(&self) -> usize {
-        self.bitmap.estimated_heap_size() + self.data.capacity() * size_of::<Value>()
-    }
-}
-
-impl From<F32> for Value {
-    fn from(v: F32) -> Value {
-        serde_json::Number::from_f64(v.0 as f64)
-            .expect("todo: convert Inf/NaN to jsonb")
-            .into()
-    }
-}
-
-impl From<F64> for Value {
-    fn from(v: F64) -> Value {
-        serde_json::Number::from_f64(v.0)
-            .expect("todo: convert Inf/NaN to jsonb")
-            .into()
-    }
-}
-
-impl From<JsonbRef<'_>> for Value {
-    fn from(v: JsonbRef<'_>) -> Value {
-        v.0.clone()
+        self.bitmap.estimated_heap_size() + self.data.capacity()
     }
 }
diff --git a/src/common/src/array/proto_reader.rs b/src/common/src/array/proto_reader.rs
index 55d505343dadd..4ca6bf7b70d05 100644
--- a/src/common/src/array/proto_reader.rs
+++ b/src/common/src/array/proto_reader.rs
@@ -52,9 +52,7 @@ impl ArrayImpl {
             PbArrayType::Timestamp => read_timestamp_array(array, cardinality)?,
             PbArrayType::Timestamptz => read_timestamptz_array(array, cardinality)?,
             PbArrayType::Interval => read_interval_array(array, cardinality)?,
-            PbArrayType::Jsonb => {
-                read_string_array::<JsonbArrayBuilder, JsonbValueReader>(array, cardinality)?
-            }
+            PbArrayType::Jsonb => JsonbArray::from_protobuf(array)?,
             PbArrayType::Struct => StructArray::from_protobuf(array)?,
             PbArrayType::List => ListArray::from_protobuf(array)?,
             PbArrayType::Unspecified => unreachable!(),
diff --git a/src/common/src/array/value_reader.rs b/src/common/src/array/value_reader.rs
index 96ed7c31b88aa..45db47f23242b 100644
--- a/src/common/src/array/value_reader.rs
+++ b/src/common/src/array/value_reader.rs
@@ -19,8 +19,7 @@ use byteorder::{BigEndian, ReadBytesExt};
 
 use super::ArrayResult;
 use crate::array::{
-    ArrayBuilder, BytesArrayBuilder, JsonbArrayBuilder, PrimitiveArrayItemType, Serial,
-    Utf8ArrayBuilder,
+    ArrayBuilder, BytesArrayBuilder, PrimitiveArrayItemType, Serial, Utf8ArrayBuilder,
 };
 use crate::types::{Decimal, F32, F64};
 
@@ -89,15 +88,3 @@ impl VarSizedValueReader<BytesArrayBuilder> for BytesValueReader {
         Ok(())
     }
 }
-
-pub struct JsonbValueReader;
-
-impl VarSizedValueReader<JsonbArrayBuilder> for JsonbValueReader {
-    fn read(buf: &[u8], builder: &mut JsonbArrayBuilder) -> ArrayResult<()> {
-        let Some(v) = super::JsonbVal::value_deserialize(buf) else {
-            bail!("failed to read jsonb from bytes");
-        };
-        builder.append_move(v);
-        Ok(())
-    }
-}
diff --git a/src/common/src/lib.rs b/src/common/src/lib.rs
index 2a3575d8dae78..fbcd3854fa572 100644
--- a/src/common/src/lib.rs
+++ b/src/common/src/lib.rs
@@ -24,12 +24,11 @@
 #![feature(trusted_len)]
 #![feature(allocator_api)]
 #![feature(lint_reasons)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(map_try_insert)]
 #![feature(lazy_cell)]
 #![feature(error_generic_member_access)]
 #![feature(let_chains)]
-#![feature(return_position_impl_trait_in_trait)]
 #![feature(portable_simd)]
 #![feature(array_chunks)]
 #![feature(inline_const_pat)]
@@ -43,7 +42,6 @@
 #![feature(result_option_inspect)]
 #![feature(map_entry_replace)]
 #![feature(negative_impls)]
-#![feature(async_fn_in_trait)]
 #![feature(bound_map)]
 #![feature(array_methods)]
 
diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs
index 70d0cb73d4dfa..f2dd8ad42854b 100644
--- a/src/common/src/test_utils/rand_array.rs
+++ b/src/common/src/test_utils/rand_array.rs
@@ -135,7 +135,7 @@ impl RandValue for Int256 {
 
 impl RandValue for JsonbVal {
     fn rand_value<R: rand::Rng>(_rand: &mut R) -> Self {
-        JsonbVal::dummy()
+        JsonbVal::null()
     }
 }
 
diff --git a/src/common/src/types/jsonb.rs b/src/common/src/types/jsonb.rs
index 7f4c002037060..590b693e47891 100644
--- a/src/common/src/types/jsonb.rs
+++ b/src/common/src/types/jsonb.rs
@@ -15,23 +15,21 @@
 use std::fmt;
 use std::hash::Hash;
 
-use postgres_types::{FromSql as _, ToSql as _, Type};
-use serde_json::Value;
+use bytes::Buf;
+use jsonbb::{Value, ValueRef};
 
 use crate::estimate_size::EstimateSize;
-use crate::types::{Scalar, ScalarRef};
+use crate::types::{Scalar, ScalarRef, F32, F64};
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct JsonbVal(pub(crate) Box<Value>); // The `Box` is just to keep `size_of::<ScalarImpl>` smaller.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct JsonbVal(pub(crate) Value);
 
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct JsonbRef<'a>(pub(crate) &'a Value);
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
+pub struct JsonbRef<'a>(pub(crate) ValueRef<'a>);
 
 impl EstimateSize for JsonbVal {
     fn estimated_heap_size(&self) -> usize {
-        // https://github.com/risingwavelabs/risingwave/issues/8957
-        // FIXME: correctly handle jsonb size
-        0
+        self.0.capacity()
     }
 }
 
@@ -63,7 +61,7 @@ impl<'a> ScalarRef<'a> for JsonbRef<'a> {
     type ScalarType = JsonbVal;
 
     fn to_owned_scalar(&self) -> Self::ScalarType {
-        JsonbVal(self.0.clone().into())
+        JsonbVal(self.0.into())
     }
 
     fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H) {
@@ -71,22 +69,6 @@ impl<'a> ScalarRef<'a> for JsonbRef<'a> {
     }
 }
 
-impl Hash for JsonbRef<'_> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        // We do not intend to support hashing `jsonb` type.
-        // Before #7981 is done, we do not panic but just hash its string representation.
-        // Note that `serde_json` without feature `preserve_order` uses `BTreeMap` for json object.
-        // So its string form always have keys sorted.
-        self.0.to_string().hash(state)
-    }
-}
-
-impl Hash for JsonbVal {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.0.to_string().hash(state)
-    }
-}
-
 impl PartialOrd for JsonbVal {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
         Some(self.cmp(other))
@@ -160,9 +142,7 @@ impl crate::types::to_binary::ToBinary for JsonbRef<'_> {
         &self,
         _ty: &crate::types::DataType,
     ) -> crate::error::Result<Option<bytes::Bytes>> {
-        let mut output = bytes::BytesMut::new();
-        self.0.to_sql(&Type::JSONB, &mut output).unwrap();
-        Ok(Some(output.freeze()))
+        Ok(Some(self.value_serialize().into()))
     }
 }
 
@@ -170,43 +150,130 @@ impl std::str::FromStr for JsonbVal {
     type Err = <Value as std::str::FromStr>::Err;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let v: Value = s.parse()?;
-        Ok(Self(v.into()))
+        Ok(Self(s.parse()?))
     }
 }
 
 impl JsonbVal {
-    /// Constructs a value without specific meaning. Usually used as a lightweight placeholder.
-    pub fn dummy() -> Self {
-        Self(Value::Null.into())
+    /// Returns a jsonb `null`.
+    pub fn null() -> Self {
+        Self(Value::null())
+    }
+
+    /// Returns an empty array `[]`.
+    pub fn empty_array() -> Self {
+        Self(Value::array([]))
+    }
+
+    /// Returns an empty array `{}`.
+    pub fn empty_object() -> Self {
+        Self(Value::object([]))
     }
 
+    /// Deserialize from a memcomparable encoding.
     pub fn memcmp_deserialize(
         deserializer: &mut memcomparable::Deserializer<impl bytes::Buf>,
     ) -> memcomparable::Result<Self> {
-        let v: Value = <String as serde::Deserialize>::deserialize(deserializer)?
+        let v = <String as serde::Deserialize>::deserialize(deserializer)?
             .parse()
             .map_err(|_| memcomparable::Error::Message("invalid json".into()))?;
-        Ok(Self(v.into()))
+        Ok(Self(v))
+    }
+
+    /// Deserialize from a pgwire "BINARY" encoding.
+    pub fn value_deserialize(mut buf: &[u8]) -> Option<Self> {
+        if buf.is_empty() || buf.get_u8() != 1 {
+            return None;
+        }
+        Value::from_text(buf).ok().map(Self)
+    }
+
+    /// Convert the value to a [`serde_json::Value`].
+    pub fn take(self) -> serde_json::Value {
+        self.0.into()
+    }
+}
+
+impl From<serde_json::Value> for JsonbVal {
+    fn from(v: serde_json::Value) -> Self {
+        Self(v.into())
+    }
+}
+
+impl From<bool> for JsonbVal {
+    fn from(v: bool) -> Self {
+        Self(v.into())
+    }
+}
+
+impl From<i16> for JsonbVal {
+    fn from(v: i16) -> Self {
+        Self(v.into())
     }
+}
 
-    pub fn value_deserialize(buf: &[u8]) -> Option<Self> {
-        let v = Value::from_sql(&Type::JSONB, buf).ok()?;
-        Some(Self(v.into()))
+impl From<i32> for JsonbVal {
+    fn from(v: i32) -> Self {
+        Self(v.into())
     }
+}
 
-    pub fn take(mut self) -> Value {
-        self.0.take()
+impl From<i64> for JsonbVal {
+    fn from(v: i64) -> Self {
+        Self(v.into())
     }
+}
 
-    pub fn as_serde_mut(&mut self) -> &mut Value {
-        &mut self.0
+impl From<F32> for JsonbVal {
+    fn from(v: F32) -> Self {
+        if v.0 == f32::INFINITY {
+            Self("Infinity".into())
+        } else if v.0 == f32::NEG_INFINITY {
+            Self("-Infinity".into())
+        } else if v.0.is_nan() {
+            Self("NaN".into())
+        } else {
+            Self(v.0.into())
+        }
+    }
+}
+
+// NOTE: Infinite or NaN values are not JSON numbers. They are stored as strings in Postgres.
+impl From<F64> for JsonbVal {
+    fn from(v: F64) -> Self {
+        if v.0 == f64::INFINITY {
+            Self("Infinity".into())
+        } else if v.0 == f64::NEG_INFINITY {
+            Self("-Infinity".into())
+        } else if v.0.is_nan() {
+            Self("NaN".into())
+        } else {
+            Self(v.0.into())
+        }
+    }
+}
+
+impl From<&str> for JsonbVal {
+    fn from(v: &str) -> Self {
+        Self(v.into())
+    }
+}
+
+impl From<JsonbRef<'_>> for JsonbVal {
+    fn from(v: JsonbRef<'_>) -> Self {
+        Self(v.0.to_owned())
     }
 }
 
 impl From<Value> for JsonbVal {
     fn from(v: Value) -> Self {
-        Self(v.into())
+        Self(v)
+    }
+}
+
+impl<'a> From<JsonbRef<'a>> for ValueRef<'a> {
+    fn from(v: JsonbRef<'a>) -> Self {
+        v.0
     }
 }
 
@@ -221,49 +288,52 @@ impl<'a> JsonbRef<'a> {
         serde::Serialize::serialize(&s, serializer)
     }
 
+    /// Serialize to a pgwire "BINARY" encoding.
     pub fn value_serialize(&self) -> Vec<u8> {
+        use std::io::Write;
         // Reuse the pgwire "BINARY" encoding for jsonb type.
         // It is not truly binary, but one byte of version `1u8` followed by string form.
         // This version number helps us maintain compatibility when we switch to more efficient
         // encoding later.
-        let mut output = bytes::BytesMut::new();
-        self.0.to_sql(&Type::JSONB, &mut output).unwrap();
-        output.freeze().into()
+        let mut buf = Vec::with_capacity(self.0.capacity());
+        buf.push(1);
+        write!(&mut buf, "{}", self.0).unwrap();
+        buf
     }
 
+    /// Returns true if this is a jsonb `null`.
     pub fn is_jsonb_null(&self) -> bool {
-        matches!(self.0, Value::Null)
+        self.0.as_null().is_some()
     }
 
+    /// Returns the type name of this jsonb.
+    ///
+    /// Possible values are: `null`, `boolean`, `number`, `string`, `array`, `object`.
     pub fn type_name(&self) -> &'static str {
         match self.0 {
-            Value::Null => "null",
-            Value::Bool(_) => "boolean",
-            Value::Number(_) => "number",
-            Value::String(_) => "string",
-            Value::Array(_) => "array",
-            Value::Object(_) => "object",
+            ValueRef::Null => "null",
+            ValueRef::Bool(_) => "boolean",
+            ValueRef::Number(_) => "number",
+            ValueRef::String(_) => "string",
+            ValueRef::Array(_) => "array",
+            ValueRef::Object(_) => "object",
         }
     }
 
+    /// Returns the length of this json array.
     pub fn array_len(&self) -> Result<usize, String> {
-        match self.0 {
-            Value::Array(v) => Ok(v.len()),
-            _ => Err(format!(
-                "cannot get array length of a jsonb {}",
-                self.type_name()
-            )),
-        }
+        let array = self
+            .0
+            .as_array()
+            .ok_or_else(|| format!("cannot get array length of a jsonb {}", self.type_name()))?;
+        Ok(array.len())
     }
 
+    /// If the JSON is a boolean, returns the associated bool.
     pub fn as_bool(&self) -> Result<bool, String> {
-        match self.0 {
-            Value::Bool(v) => Ok(*v),
-            _ => Err(format!(
-                "cannot cast jsonb {} to type boolean",
-                self.type_name()
-            )),
-        }
+        self.0
+            .as_bool()
+            .ok_or_else(|| format!("cannot cast jsonb {} to type boolean", self.type_name()))
     }
 
     /// Attempt to read jsonb as a JSON number.
@@ -271,13 +341,11 @@ impl<'a> JsonbRef<'a> {
     /// According to RFC 8259, only number within IEEE 754 binary64 (double precision) has good
     /// interoperability. We do not support arbitrary precision like PostgreSQL `numeric` right now.
     pub fn as_number(&self) -> Result<f64, String> {
-        match self.0 {
-            Value::Number(v) => v.as_f64().ok_or_else(|| "jsonb number out of range".into()),
-            _ => Err(format!(
-                "cannot cast jsonb {} to type number",
-                self.type_name()
-            )),
-        }
+        self.0
+            .as_number()
+            .ok_or_else(|| format!("cannot cast jsonb {} to type number", self.type_name()))?
+            .as_f64()
+            .ok_or_else(|| "jsonb number out of range".into())
     }
 
     /// This is part of the `->>` or `#>>` syntax to access a child as string.
@@ -291,9 +359,9 @@ impl<'a> JsonbRef<'a> {
     ///   * Jsonb string is displayed with quotes but treated as its inner value here.
     pub fn force_str<W: std::fmt::Write>(&self, writer: &mut W) -> std::fmt::Result {
         match self.0 {
-            Value::String(v) => writer.write_str(v),
-            Value::Null => Ok(()),
-            Value::Bool(_) | Value::Number(_) | Value::Array(_) | Value::Object(_) => {
+            ValueRef::String(v) => writer.write_str(v),
+            ValueRef::Null => Ok(()),
+            ValueRef::Bool(_) | ValueRef::Number(_) | ValueRef::Array(_) | ValueRef::Object(_) => {
                 use crate::types::to_text::ToText as _;
                 self.write_with_type(&crate::types::DataType::Jsonb, writer)
             }
@@ -316,38 +384,33 @@ impl<'a> JsonbRef<'a> {
 
     /// Returns an iterator over the elements if this is an array.
     pub fn array_elements(self) -> Result<impl Iterator<Item = JsonbRef<'a>>, String> {
-        match &self.0 {
-            Value::Array(array) => Ok(array.iter().map(Self)),
-            _ => Err(format!(
-                "cannot extract elements from a jsonb {}",
-                self.type_name()
-            )),
-        }
+        let array = self
+            .0
+            .as_array()
+            .ok_or_else(|| format!("cannot extract elements from a jsonb {}", self.type_name()))?;
+        Ok(array.iter().map(Self))
     }
 
     /// Returns an iterator over the keys if this is an object.
     pub fn object_keys(self) -> Result<impl Iterator<Item = &'a str>, String> {
-        match &self.0 {
-            Value::Object(object) => Ok(object.keys().map(|s| s.as_str())),
-            _ => Err(format!(
+        let object = self.0.as_object().ok_or_else(|| {
+            format!(
                 "cannot call jsonb_object_keys on a jsonb {}",
                 self.type_name()
-            )),
-        }
+            )
+        })?;
+        Ok(object.keys())
     }
 
     /// Returns an iterator over the key-value pairs if this is an object.
     pub fn object_key_values(
         self,
     ) -> Result<impl Iterator<Item = (&'a str, JsonbRef<'a>)>, String> {
-        match &self.0 {
-            Value::Object(object) => Ok(object.iter().map(|(k, v)| (k.as_str(), Self(v)))),
-            _ => Err(format!("cannot deconstruct a jsonb {}", self.type_name())),
-        }
-    }
-
-    pub fn value(&self) -> &'a Value {
-        self.0
+        let object = self
+            .0
+            .as_object()
+            .ok_or_else(|| format!("cannot deconstruct a jsonb {}", self.type_name()))?;
+        Ok(object.iter().map(|(k, v)| (k, Self(v))))
     }
 }
 
diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs
index 83d281c5238e6..386f63280a557 100644
--- a/src/common/src/types/mod.rs
+++ b/src/common/src/types/mod.rs
@@ -416,7 +416,7 @@ impl DataType {
             DataType::Timestamptz => ScalarImpl::Timestamptz(Timestamptz::MIN),
             DataType::Decimal => ScalarImpl::Decimal(Decimal::NegativeInf),
             DataType::Interval => ScalarImpl::Interval(Interval::MIN),
-            DataType::Jsonb => ScalarImpl::Jsonb(JsonbVal::dummy()), // NOT `min` #7981
+            DataType::Jsonb => ScalarImpl::Jsonb(JsonbVal::null()), // NOT `min` #7981
             DataType::Struct(data_types) => ScalarImpl::Struct(StructValue::new(
                 data_types
                     .types()
@@ -1303,7 +1303,7 @@ mod tests {
                     ScalarImpl::Interval(Interval::from_month_day_usec(2, 3, 3333)),
                     DataType::Interval,
                 ),
-                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::dummy()), DataType::Jsonb),
+                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::null()), DataType::Jsonb),
                 DataTypeName::Struct => (
                     ScalarImpl::Struct(StructValue::new(vec![
                         ScalarImpl::Int64(233).into(),
diff --git a/src/common/src/types/ordered.rs b/src/common/src/types/ordered.rs
index 75b07e529d7b9..68cd6329287e2 100644
--- a/src/common/src/types/ordered.rs
+++ b/src/common/src/types/ordered.rs
@@ -138,7 +138,7 @@ impl<T: DefaultOrd> From<T> for DefaultOrdered<T> {
     }
 }
 
-#[allow(clippy::incorrect_partial_ord_impl_on_ord_type)]
+#[allow(clippy::non_canonical_partial_ord_impl)]
 impl<T: DefaultOrd> PartialOrd for DefaultOrdered<T> {
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
         self.0.default_partial_cmp(other.as_inner())
diff --git a/src/common/src/util/chunk_coalesce.rs b/src/common/src/util/chunk_coalesce.rs
index 9a41fc83e8f0e..3bd56b19e434d 100644
--- a/src/common/src/util/chunk_coalesce.rs
+++ b/src/common/src/util/chunk_coalesce.rs
@@ -285,7 +285,12 @@ impl SlicedDataChunk {
     }
 
     pub fn with_offset_checked(data_chunk: DataChunk, offset: usize) -> Self {
-        assert!(offset < data_chunk.capacity());
+        assert!(
+            offset < data_chunk.capacity(),
+            "offset {}, data_chunk capacity {}",
+            offset,
+            data_chunk.capacity()
+        );
         Self { data_chunk, offset }
     }
 
diff --git a/src/common/src/util/epoch.rs b/src/common/src/util/epoch.rs
index 86ed158c2e206..4d57c97b054b3 100644
--- a/src/common/src/util/epoch.rs
+++ b/src/common/src/util/epoch.rs
@@ -73,6 +73,10 @@ impl Epoch {
         Epoch(time << EPOCH_PHYSICAL_SHIFT_BITS)
     }
 
+    pub fn from_unix_millis(mi: u64) -> Self {
+        Epoch((mi - UNIX_RISINGWAVE_DATE_SEC * 1000) << EPOCH_PHYSICAL_SHIFT_BITS)
+    }
+
     pub fn physical_now() -> u64 {
         UNIX_RISINGWAVE_DATE_EPOCH
             .elapsed()
diff --git a/src/common/src/util/future_utils.rs b/src/common/src/util/future_utils.rs
index 75c38488457ac..20844d8cd15d4 100644
--- a/src/common/src/util/future_utils.rs
+++ b/src/common/src/util/future_utils.rs
@@ -13,9 +13,11 @@
 // limitations under the License.
 
 use std::future::pending;
+use std::pin::{pin, Pin};
 
-use futures::future::Either;
-use futures::{Future, FutureExt, Stream};
+use futures::future::{select, Either};
+use futures::stream::Peekable;
+use futures::{Future, FutureExt, Stream, StreamExt};
 
 /// Convert a list of streams into a [`Stream`] of results from the streams.
 pub fn select_all<S: Stream + Unpin>(
@@ -43,3 +45,34 @@ pub fn drop_either_future<A, B>(
         Either::Right((right, _)) => Either::Right(right),
     }
 }
+
+/// Await on a future while monitoring on a peekable stream that may return error.
+/// The peekable stream is polled at a higher priority than the future.
+///
+/// When the peekable stream returns with a error and end of stream, the future will
+/// return the error immediately. Otherwise, it will keep polling the given future.
+///
+/// Return:
+///     - Ok(output) as the output of the given future.
+///     - Err(None) to indicate that the stream has reached the end.
+///     - Err(e) to indicate that the stream returns an error.
+pub async fn await_future_with_monitor_error_stream<T, E, F: Future>(
+    peek_stream: &mut Peekable<impl Stream<Item = Result<T, E>> + Unpin>,
+    future: F,
+) -> Result<F::Output, Option<E>> {
+    // Poll the response stream to early see the error
+    match select(pin!(Pin::new(&mut *peek_stream).peek()), pin!(future)).await {
+        Either::Left((response_result, send_future)) => match response_result {
+            None => Err(None),
+            Some(Err(_)) => {
+                let err = match peek_stream.next().now_or_never() {
+                    Some(Some(Err(err))) => err,
+                    _ => unreachable!("peek has output, peek output not None, have check err"),
+                };
+                Err(Some(err))
+            }
+            Some(Ok(_)) => Ok(send_future.await),
+        },
+        Either::Right((output, _)) => Ok(output),
+    }
+}
diff --git a/src/common/src/util/mod.rs b/src/common/src/util/mod.rs
index f4140b558faa7..e1f85263e1415 100644
--- a/src/common/src/util/mod.rs
+++ b/src/common/src/util/mod.rs
@@ -45,7 +45,9 @@ pub mod tracing;
 pub mod value_encoding;
 pub mod worker_util;
 
-pub use future_utils::{drop_either_future, pending_on_none, select_all};
+pub use future_utils::{
+    await_future_with_monitor_error_stream, drop_either_future, pending_on_none, select_all,
+};
 #[macro_use]
 pub mod match_util;
 
diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs
index 65bf59eedf19e..fc5ae9ff19854 100644
--- a/src/compute/src/lib.rs
+++ b/src/compute/src/lib.rs
@@ -13,14 +13,14 @@
 // limitations under the License.
 
 #![feature(trait_alias)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(type_alias_impl_trait)]
 #![feature(let_chains)]
 #![feature(result_option_inspect)]
 #![feature(lint_reasons)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(lazy_cell)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 #[macro_use]
 extern crate tracing;
diff --git a/src/compute/src/rpc/service/exchange_service.rs b/src/compute/src/rpc/service/exchange_service.rs
index b59cc39587c2f..6225cef2a7e30 100644
--- a/src/compute/src/rpc/service/exchange_service.rs
+++ b/src/compute/src/rpc/service/exchange_service.rs
@@ -49,7 +49,7 @@ impl ExchangeService for ExchangeServiceImpl {
     type GetDataStream = BatchDataStream;
     type GetStreamStream = StreamDataStream;
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn get_data(
         &self,
         request: Request<GetDataRequest>,
diff --git a/src/compute/src/rpc/service/monitor_service.rs b/src/compute/src/rpc/service/monitor_service.rs
index 97a0b80773791..8fc24664ec016 100644
--- a/src/compute/src/rpc/service/monitor_service.rs
+++ b/src/compute/src/rpc/service/monitor_service.rs
@@ -53,7 +53,7 @@ impl MonitorServiceImpl {
 
 #[async_trait::async_trait]
 impl MonitorService for MonitorServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn stack_trace(
         &self,
         request: Request<StackTraceRequest>,
@@ -85,7 +85,7 @@ impl MonitorService for MonitorServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn profiling(
         &self,
         request: Request<ProfilingRequest>,
@@ -115,7 +115,7 @@ impl MonitorService for MonitorServiceImpl {
         }
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn heap_profiling(
         &self,
         request: Request<HeapProfilingRequest>,
@@ -166,7 +166,7 @@ impl MonitorService for MonitorServiceImpl {
         }
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn list_heap_profiling(
         &self,
         _request: Request<ListHeapProfilingRequest>,
@@ -206,7 +206,7 @@ impl MonitorService for MonitorServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn analyze_heap(
         &self,
         request: Request<AnalyzeHeapRequest>,
diff --git a/src/compute/src/rpc/service/stream_service.rs b/src/compute/src/rpc/service/stream_service.rs
index 525364b60dc1c..1c1448b3d1e45 100644
--- a/src/compute/src/rpc/service/stream_service.rs
+++ b/src/compute/src/rpc/service/stream_service.rs
@@ -45,7 +45,7 @@ impl StreamServiceImpl {
 
 #[async_trait::async_trait]
 impl StreamService for StreamServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn update_actors(
         &self,
         request: Request<UpdateActorsRequest>,
@@ -61,7 +61,7 @@ impl StreamService for StreamServiceImpl {
         }
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn build_actors(
         &self,
         request: Request<BuildActorsRequest>,
@@ -85,7 +85,7 @@ impl StreamService for StreamServiceImpl {
         }
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn broadcast_actor_info_table(
         &self,
         request: Request<BroadcastActorInfoTableRequest>,
@@ -104,7 +104,7 @@ impl StreamService for StreamServiceImpl {
         }
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn drop_actors(
         &self,
         request: Request<DropActorsRequest>,
@@ -118,7 +118,7 @@ impl StreamService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn force_stop_actors(
         &self,
         request: Request<ForceStopActorsRequest>,
@@ -132,7 +132,7 @@ impl StreamService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn inject_barrier(
         &self,
         request: Request<InjectBarrierRequest>,
@@ -173,7 +173,7 @@ impl StreamService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn barrier_complete(
         &self,
         request: Request<BarrierCompleteRequest>,
@@ -243,7 +243,7 @@ impl StreamService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn wait_epoch_commit(
         &self,
         request: Request<WaitEpochCommitRequest>,
diff --git a/src/compute/tests/cdc_tests.rs b/src/compute/tests/cdc_tests.rs
index b3e39ece95002..6a50b8410bbd4 100644
--- a/src/compute/tests/cdc_tests.rs
+++ b/src/compute/tests/cdc_tests.rs
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #![feature(let_chains)]
-#![feature(generators)]
+#![feature(coroutines)]
 
 use std::sync::atomic::AtomicU64;
 use std::sync::Arc;
diff --git a/src/compute/tests/integration_tests.rs b/src/compute/tests/integration_tests.rs
index a43ae2e5762da..6d7e93365c275 100644
--- a/src/compute/tests/integration_tests.rs
+++ b/src/compute/tests/integration_tests.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(proc_macro_hygiene, stmt_expr_attributes)]
 
 use std::sync::atomic::AtomicU64;
diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
index 45ccba627af85..d8ba8f7c6d4a7 100644
--- a/src/connector/Cargo.toml
+++ b/src/connector/Cargo.toml
@@ -114,7 +114,7 @@ strum = "0.25"
 strum_macros = "0.25"
 tempfile = "3"
 thiserror = "1"
-time = "0.3.28"
+time = "0.3.30"
 tokio = { version = "0.2", package = "madsim-tokio", features = [
     "rt",
     "rt-multi-thread",
diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs
index 8ccf62486ce65..aa613b4043c23 100644
--- a/src/connector/src/lib.rs
+++ b/src/connector/src/lib.rs
@@ -14,7 +14,7 @@
 
 #![expect(dead_code)]
 #![allow(clippy::derive_partial_eq_without_eq)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(proc_macro_hygiene)]
 #![feature(stmt_expr_attributes)]
 #![feature(box_patterns)]
@@ -25,11 +25,9 @@
 #![feature(let_chains)]
 #![feature(box_into_inner)]
 #![feature(type_alias_impl_trait)]
-#![feature(return_position_impl_trait_in_trait)]
-#![feature(async_fn_in_trait)]
 #![feature(associated_type_defaults)]
 #![feature(impl_trait_in_assoc_type)]
-#![feature(iter_from_generator)]
+#![feature(iter_from_coroutine)]
 #![feature(if_let_guard)]
 #![feature(iterator_try_collect)]
 
diff --git a/src/connector/src/sink/blackhole.rs b/src/connector/src/sink/blackhole.rs
index 1f1ace3b0d104..60b0506604c97 100644
--- a/src/connector/src/sink/blackhole.rs
+++ b/src/connector/src/sink/blackhole.rs
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use async_trait::async_trait;
+
 use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset};
 use crate::sink::{
     DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkError, SinkParam, SinkWriterParam,
@@ -45,6 +47,7 @@ impl Sink for BlackHoleSink {
     }
 }
 
+#[async_trait]
 impl LogSinker for BlackHoleSink {
     async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> {
         log_reader.init().await?;
diff --git a/src/connector/src/sink/catalog/mod.rs b/src/connector/src/sink/catalog/mod.rs
index c18dd7d10a92c..ca3a09e7f2eda 100644
--- a/src/connector/src/sink/catalog/mod.rs
+++ b/src/connector/src/sink/catalog/mod.rs
@@ -132,6 +132,7 @@ pub enum SinkEncode {
     Json,
     Protobuf,
     Avro,
+    Template,
 }
 
 impl SinkFormatDesc {
@@ -177,6 +178,7 @@ impl SinkFormatDesc {
             SinkEncode::Json => E::Json,
             SinkEncode::Protobuf => E::Protobuf,
             SinkEncode::Avro => E::Avro,
+            SinkEncode::Template => E::Template,
         };
         let options = self
             .options
@@ -212,6 +214,7 @@ impl TryFrom<PbSinkFormatDesc> for SinkFormatDesc {
         let encode = match value.encode() {
             E::Json => SinkEncode::Json,
             E::Protobuf => SinkEncode::Protobuf,
+            E::Template => SinkEncode::Template,
             E::Avro => SinkEncode::Avro,
             e @ (E::Unspecified | E::Native | E::Csv | E::Bytes) => {
                 return Err(SinkError::Config(anyhow!(
diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs
index 2bddf8026216f..fb06baf42920c 100644
--- a/src/connector/src/sink/clickhouse.rs
+++ b/src/connector/src/sink/clickhouse.rs
@@ -11,29 +11,36 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
 use core::fmt::Debug;
 use std::collections::{HashMap, HashSet};
 use std::time::Duration;
 
 use anyhow::anyhow;
-use clickhouse::{Client, Client as ClickHouseClient, Row as ClickHouseRow};
+use clickhouse::{Client as ClickHouseClient, Row as ClickHouseRow};
 use itertools::Itertools;
-use risingwave_common::array::{Op, RowRef, StreamChunk};
+use risingwave_common::array::{Op, StreamChunk};
 use risingwave_common::catalog::Schema;
 use risingwave_common::row::Row;
-use risingwave_common::types::{DataType, ScalarRefImpl, Serial};
+use risingwave_common::types::{DataType, Decimal, ScalarRefImpl, Serial};
 use serde::ser::{SerializeSeq, SerializeStruct};
 use serde::Serialize;
 use serde_derive::Deserialize;
 use serde_with::serde_as;
 
 use super::{DummySinkCommitCoordinator, SinkWriterParam};
-use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt};
+use crate::sink::catalog::desc::SinkDesc;
+use crate::sink::log_store::DeliveryFutureManagerAddFuture;
+use crate::sink::writer::{
+    AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt,
+};
 use crate::sink::{
     Result, Sink, SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT,
 };
 
+const QUERY_ENGINE: &str =
+    "select distinct ?fields from system.tables where database = ? and table = ?";
+const QUERY_COLUMN: &str =
+    "select distinct ?fields from system.columns where database = ? and table = ? order by ?";
 pub const CLICKHOUSE_SINK: &str = "clickhouse";
 const BUFFER_SIZE: usize = 1024;
 
@@ -51,6 +58,75 @@ pub struct ClickHouseCommon {
     pub table: String,
 }
 
+#[allow(clippy::enum_variant_names)]
+#[derive(Debug)]
+enum ClickHouseEngine {
+    MergeTree,
+    ReplacingMergeTree,
+    SummingMergeTree,
+    AggregatingMergeTree,
+    CollapsingMergeTree(String),
+    VersionedCollapsingMergeTree(String),
+    GraphiteMergeTree,
+}
+impl ClickHouseEngine {
+    pub fn is_collapsing_engine(&self) -> bool {
+        matches!(
+            self,
+            ClickHouseEngine::CollapsingMergeTree(_)
+                | ClickHouseEngine::VersionedCollapsingMergeTree(_)
+        )
+    }
+
+    pub fn get_sign_name(&self) -> Option<String> {
+        match self {
+            ClickHouseEngine::CollapsingMergeTree(sign_name) => Some(sign_name.to_string()),
+            ClickHouseEngine::VersionedCollapsingMergeTree(sign_name) => {
+                Some(sign_name.to_string())
+            }
+            _ => None,
+        }
+    }
+
+    pub fn from_query_engine(engine_name: &ClickhouseQueryEngine) -> Result<Self> {
+        match engine_name.engine.as_str() {
+            "MergeTree" => Ok(ClickHouseEngine::MergeTree),
+            "ReplacingMergeTree" => Ok(ClickHouseEngine::ReplacingMergeTree),
+            "SummingMergeTree" => Ok(ClickHouseEngine::SummingMergeTree),
+            "AggregatingMergeTree" => Ok(ClickHouseEngine::AggregatingMergeTree),
+            "VersionedCollapsingMergeTree" => {
+                let sign_name = engine_name
+                    .create_table_query
+                    .split("VersionedCollapsingMergeTree(")
+                    .last()
+                    .ok_or_else(|| SinkError::ClickHouse("must have last".to_string()))?
+                    .split(',')
+                    .next()
+                    .ok_or_else(|| SinkError::ClickHouse("must have next".to_string()))?
+                    .to_string();
+                Ok(ClickHouseEngine::VersionedCollapsingMergeTree(sign_name))
+            }
+            "CollapsingMergeTree" => {
+                let sign_name = engine_name
+                    .create_table_query
+                    .split("CollapsingMergeTree(")
+                    .last()
+                    .ok_or_else(|| SinkError::ClickHouse("must have last".to_string()))?
+                    .split(')')
+                    .next()
+                    .ok_or_else(|| SinkError::ClickHouse("must have next".to_string()))?
+                    .to_string();
+                Ok(ClickHouseEngine::CollapsingMergeTree(sign_name))
+            }
+            "GraphiteMergeTree" => Ok(ClickHouseEngine::GraphiteMergeTree),
+            _ => Err(SinkError::ClickHouse(format!(
+                "Cannot find clickhouse engine {:?}",
+                engine_name.engine
+            ))),
+        }
+    }
+}
+
 const POOL_IDLE_TIMEOUT: Duration = Duration::from_secs(5);
 
 impl ClickHouseCommon {
@@ -194,9 +270,7 @@ impl ClickHouseSink {
             }
             risingwave_common::types::DataType::Float32 => Ok(ck_column.r#type.contains("Float32")),
             risingwave_common::types::DataType::Float64 => Ok(ck_column.r#type.contains("Float64")),
-            risingwave_common::types::DataType::Decimal => {
-                Err(SinkError::ClickHouse("can not support Decimal".to_string()))
-            }
+            risingwave_common::types::DataType::Decimal => Ok(ck_column.r#type.contains("Decimal")),
             risingwave_common::types::DataType::Date => Ok(ck_column.r#type.contains("Date32")),
             risingwave_common::types::DataType::Varchar => Ok(ck_column.r#type.contains("String")),
             risingwave_common::types::DataType::Time => Err(SinkError::ClickHouse(
@@ -228,7 +302,7 @@ impl ClickHouseSink {
                 Ok(ck_column.r#type.contains("UInt64") | ck_column.r#type.contains("Int64"))
             }
             risingwave_common::types::DataType::Int256 => Err(SinkError::ClickHouse(
-                "clickhouse can not support Interval".to_string(),
+                "clickhouse can not support Int256".to_string(),
             )),
         };
         if !is_match? {
@@ -243,10 +317,14 @@ impl ClickHouseSink {
 }
 impl Sink for ClickHouseSink {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = LogSinkerOf<ClickHouseSinkWriter>;
+    type LogSinker = AsyncTruncateLogSinkerOf<ClickHouseSinkWriter>;
 
     const SINK_NAME: &'static str = CLICKHOUSE_SINK;
 
+    fn default_sink_decouple(desc: &SinkDesc) -> bool {
+        desc.sink_type.is_append_only()
+    }
+
     async fn validate(&self) -> Result<()> {
         // For upsert clickhouse sink, the primary key must be defined.
         if !self.is_append_only && self.pk_indices.is_empty() {
@@ -256,20 +334,15 @@ impl Sink for ClickHouseSink {
 
         // check reachability
         let client = self.config.common.build_client()?;
-        let query_column = "select distinct ?fields from system.columns where database = ? and table = ? order by ?".to_string();
-        let clickhouse_column = client
-            .query(&query_column)
-            .bind(self.config.common.database.clone())
-            .bind(self.config.common.table.clone())
-            .bind("position")
-            .fetch_all::<SystemColumn>()
-            .await?;
-        if clickhouse_column.is_empty() {
-            return Err(SinkError::ClickHouse(format!(
-                "table {:?}.{:?} is not find in clickhouse",
-                self.config.common.database, self.config.common.table
-            )));
+
+        let (clickhouse_column, clickhouse_engine) =
+            query_column_engine_from_ck(client, &self.config).await?;
+
+        if !self.is_append_only && !clickhouse_engine.is_collapsing_engine() {
+            return Err(SinkError::ClickHouse(
+                "If you want to use upsert, please modify your engine is `VersionedCollapsingMergeTree` or `CollapsingMergeTree` in ClickHouse".to_owned()));
         }
+
         self.check_column_name_and_type(&clickhouse_column)?;
         if !self.is_append_only {
             self.check_pk_match(&clickhouse_column)?;
@@ -277,7 +350,7 @@ impl Sink for ClickHouseSink {
         Ok(())
     }
 
-    async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
+    async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
         Ok(ClickHouseSinkWriter::new(
             self.config.clone(),
             self.schema.clone(),
@@ -285,24 +358,27 @@ impl Sink for ClickHouseSink {
             self.is_append_only,
         )
         .await?
-        .into_log_sinker(writer_param.sink_metrics))
+        .into_log_sinker(usize::MAX))
     }
 }
 pub struct ClickHouseSinkWriter {
     pub config: ClickHouseConfig,
     schema: Schema,
     pk_indices: Vec<usize>,
-    client: Client,
+    client: ClickHouseClient,
     is_append_only: bool,
     // Save some features of the clickhouse column type
     column_correct_vec: Vec<ClickHouseSchemaFeature>,
-    clickhouse_fields_name: Vec<String>,
+    rw_fields_name_after_calibration: Vec<String>,
+    clickhouse_engine: ClickHouseEngine,
 }
 #[derive(Debug)]
 struct ClickHouseSchemaFeature {
     can_null: bool,
     // Time accuracy in clickhouse for rw and ck conversions
     accuracy_time: u8,
+
+    accuracy_decimal: (u8, u8),
 }
 
 impl ClickHouseSinkWriter {
@@ -312,25 +388,23 @@ impl ClickHouseSinkWriter {
         pk_indices: Vec<usize>,
         is_append_only: bool,
     ) -> Result<Self> {
-        if !is_append_only {
-            tracing::warn!("Update and delete are not recommended because of their impact on clickhouse performance.");
-        }
         let client = config.common.build_client()?;
-        let query_column = "select distinct ?fields from system.columns where database = ? and table = ? order by position".to_string();
-        let clickhouse_column = client
-            .query(&query_column)
-            .bind(config.common.database.clone())
-            .bind(config.common.table.clone())
-            .fetch_all::<SystemColumn>()
-            .await?;
+
+        let (clickhouse_column, clickhouse_engine) =
+            query_column_engine_from_ck(client.clone(), &config).await?;
+
         let column_correct_vec: Result<Vec<ClickHouseSchemaFeature>> = clickhouse_column
             .iter()
             .map(Self::build_column_correct_vec)
             .collect();
-        let clickhouse_fields_name = build_fields_name_type_from_schema(&schema)?
+        let mut rw_fields_name_after_calibration = build_fields_name_type_from_schema(&schema)?
             .iter()
             .map(|(a, _)| a.clone())
             .collect_vec();
+
+        if let Some(sign) = clickhouse_engine.get_sign_name() {
+            rw_fields_name_after_calibration.push(sign);
+        }
         Ok(Self {
             config,
             schema,
@@ -338,7 +412,8 @@ impl ClickHouseSinkWriter {
             client,
             is_append_only,
             column_correct_vec: column_correct_vec?,
-            clickhouse_fields_name,
+            rw_fields_name_after_calibration,
+            clickhouse_engine,
         })
     }
 
@@ -360,159 +435,95 @@ impl ClickHouseSinkWriter {
         } else {
             0_u8
         };
+        let accuracy_decimal = if ck_column.r#type.contains("Decimal(") {
+            let decimal_all = ck_column
+                .r#type
+                .split("Decimal(")
+                .last()
+                .ok_or_else(|| SinkError::ClickHouse("must have last".to_string()))?
+                .split(')')
+                .next()
+                .ok_or_else(|| SinkError::ClickHouse("must have next".to_string()))?
+                .split(", ")
+                .collect_vec();
+            let length = decimal_all
+                .first()
+                .ok_or_else(|| SinkError::ClickHouse("must have next".to_string()))?
+                .parse::<u8>()
+                .map_err(|e| SinkError::ClickHouse(format!("clickhouse sink error {}", e)))?;
+
+            if length > 38 {
+                return Err(SinkError::ClickHouse(
+                    "RW don't support Decimal256".to_string(),
+                ));
+            }
+
+            let scale = decimal_all
+                .last()
+                .ok_or_else(|| SinkError::ClickHouse("must have next".to_string()))?
+                .parse::<u8>()
+                .map_err(|e| SinkError::ClickHouse(format!("clickhouse sink error {}", e)))?;
+            (length, scale)
+        } else {
+            (0_u8, 0_u8)
+        };
         Ok(ClickHouseSchemaFeature {
             can_null,
             accuracy_time,
+            accuracy_decimal,
         })
     }
 
-    async fn append_only(&mut self, chunk: StreamChunk) -> Result<()> {
+    async fn write(&mut self, chunk: StreamChunk) -> Result<()> {
         let mut insert = self.client.insert_with_fields_name(
             &self.config.common.table,
-            self.clickhouse_fields_name.clone(),
+            self.rw_fields_name_after_calibration.clone(),
         )?;
         for (op, row) in chunk.rows() {
-            if op != Op::Insert {
-                tracing::warn!(
-                    "append only click house sink receive an {:?} which will be ignored.",
-                    op
-                );
-                continue;
-            }
             let mut clickhouse_filed_vec = vec![];
             for (index, data) in row.iter().enumerate() {
                 clickhouse_filed_vec.extend(ClickHouseFieldWithNull::from_scalar_ref(
                     data,
                     &self.column_correct_vec,
                     index,
-                    true,
                 )?);
             }
-            let clickhouse_column = ClickHouseColumn {
-                row: clickhouse_filed_vec,
-            };
-            insert.write(&clickhouse_column).await?;
-        }
-        insert.end().await?;
-        Ok(())
-    }
-
-    async fn upsert(&mut self, chunk: StreamChunk) -> Result<()> {
-        let get_pk_names_and_data = |row: RowRef<'_>, index: usize| {
-            let pk_names = self
-                .schema
-                .names()
-                .iter()
-                .cloned()
-                .enumerate()
-                .filter(|(index, _)| self.pk_indices.contains(index))
-                .map(|(_, b)| b)
-                .collect_vec();
-            let mut pk_data = vec![];
-            for pk_index in &self.pk_indices {
-                if let ClickHouseFieldWithNull::WithoutSome(v) =
-                    ClickHouseFieldWithNull::from_scalar_ref(
-                        row.datum_at(*pk_index),
-                        &self.column_correct_vec,
-                        index,
-                        false,
-                    )?
-                    .pop()
-                    .unwrap()
-                {
-                    pk_data.push(v)
-                } else {
-                    return Err(SinkError::ClickHouse("pk can not be null".to_string()));
-                }
-            }
-            Ok((pk_names, pk_data))
-        };
-
-        for (index, (op, row)) in chunk.rows().enumerate() {
             match op {
-                Op::Insert => {
-                    let mut insert = self.client.insert_with_fields_name(
-                        &self.config.common.table,
-                        self.clickhouse_fields_name.clone(),
-                    )?;
-                    let mut clickhouse_filed_vec = vec![];
-                    for (index, data) in row.iter().enumerate() {
-                        clickhouse_filed_vec.extend(ClickHouseFieldWithNull::from_scalar_ref(
-                            data,
-                            &self.column_correct_vec,
-                            index,
-                            true,
-                        )?);
+                Op::Insert | Op::UpdateInsert => {
+                    if self.clickhouse_engine.get_sign_name().is_some() {
+                        clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
+                            ClickHouseField::Int8(1),
+                        ));
                     }
-                    let clickhouse_column = ClickHouseColumn {
-                        row: clickhouse_filed_vec,
-                    };
-                    insert.write(&clickhouse_column).await?;
-                    insert.end().await?;
-                }
-                Op::Delete => {
-                    let (delete_pk_names, delete_pk_data) = get_pk_names_and_data(row, index)?;
-                    self.client
-                        .delete(&self.config.common.table, delete_pk_names)
-                        .delete(delete_pk_data)
-                        .await?;
                 }
-                Op::UpdateDelete => continue,
-                Op::UpdateInsert => {
-                    let (update_pk_names, update_pk_data) = get_pk_names_and_data(row, index)?;
-                    let mut clickhouse_update_filed_vec = vec![];
-                    for (index, data) in row.iter().enumerate() {
-                        if !self.pk_indices.contains(&index) {
-                            clickhouse_update_filed_vec.extend(
-                                ClickHouseFieldWithNull::from_scalar_ref(
-                                    data,
-                                    &self.column_correct_vec,
-                                    index,
-                                    false,
-                                )?,
-                            );
-                        }
+                Op::Delete | Op::UpdateDelete => {
+                    if !self.clickhouse_engine.is_collapsing_engine() {
+                        return Err(SinkError::ClickHouse(
+                            "Clickhouse engine don't support upsert".to_string(),
+                        ));
                     }
-                    // Get the names of the columns excluding pk, and use them to update.
-                    let fields_name_update = self
-                        .clickhouse_fields_name
-                        .iter()
-                        .filter(|n| !update_pk_names.contains(n))
-                        .map(|s| s.to_string())
-                        .collect_vec();
-
-                    let update = self.client.update(
-                        &self.config.common.table,
-                        update_pk_names,
-                        fields_name_update.clone(),
-                    );
-                    update
-                        .update_fields(clickhouse_update_filed_vec, update_pk_data)
-                        .await?;
+                    clickhouse_filed_vec.push(ClickHouseFieldWithNull::WithoutSome(
+                        ClickHouseField::Int8(-1),
+                    ))
                 }
             }
+            let clickhouse_column = ClickHouseColumn {
+                row: clickhouse_filed_vec,
+            };
+            insert.write(&clickhouse_column).await?;
         }
+        insert.end().await?;
         Ok(())
     }
 }
 
-#[async_trait::async_trait]
-impl SinkWriter for ClickHouseSinkWriter {
-    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> {
-        if self.is_append_only {
-            self.append_only(chunk).await
-        } else {
-            self.upsert(chunk).await
-        }
-    }
-
-    async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> {
-        // clickhouse no transactional guarantees, so we do nothing here.
-        Ok(())
-    }
-
-    async fn barrier(&mut self, _is_checkpoint: bool) -> Result<()> {
-        Ok(())
+impl AsyncTruncateSinkWriter for ClickHouseSinkWriter {
+    async fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> Result<()> {
+        self.write(chunk).await
     }
 }
 
@@ -523,6 +534,48 @@ struct SystemColumn {
     is_in_primary_key: u8,
 }
 
+#[derive(ClickHouseRow, Deserialize)]
+struct ClickhouseQueryEngine {
+    name: String,
+    engine: String,
+    create_table_query: String,
+}
+
+async fn query_column_engine_from_ck(
+    client: ClickHouseClient,
+    config: &ClickHouseConfig,
+) -> Result<(Vec<SystemColumn>, ClickHouseEngine)> {
+    let query_engine = QUERY_ENGINE;
+    let query_column = QUERY_COLUMN;
+
+    let clickhouse_engine = client
+        .query(query_engine)
+        .bind(config.common.database.clone())
+        .bind(config.common.table.clone())
+        .fetch_all::<ClickhouseQueryEngine>()
+        .await?;
+    let mut clickhouse_column = client
+        .query(query_column)
+        .bind(config.common.database.clone())
+        .bind(config.common.table.clone())
+        .bind("position")
+        .fetch_all::<SystemColumn>()
+        .await?;
+    if clickhouse_engine.is_empty() || clickhouse_column.is_empty() {
+        return Err(SinkError::ClickHouse(format!(
+            "table {:?}.{:?} is not find in clickhouse",
+            config.common.database, config.common.table
+        )));
+    }
+
+    let clickhouse_engine = ClickHouseEngine::from_query_engine(clickhouse_engine.get(0).unwrap())?;
+
+    if let Some(sign) = &clickhouse_engine.get_sign_name() {
+        clickhouse_column.retain(|a| sign.ne(&a.name))
+    }
+    Ok((clickhouse_column, clickhouse_engine))
+}
+
 /// Serialize this structure to simulate the `struct` call clickhouse interface
 #[derive(ClickHouseRow, Debug)]
 struct ClickHouseColumn {
@@ -541,6 +594,26 @@ enum ClickHouseField {
     String(String),
     Bool(bool),
     List(Vec<ClickHouseFieldWithNull>),
+    Int8(i8),
+    Decimal(ClickHouseDecimal),
+}
+#[derive(Debug)]
+enum ClickHouseDecimal {
+    Decimal32(i32),
+    Decimal64(i64),
+    Decimal128(i128),
+}
+impl Serialize for ClickHouseDecimal {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match self {
+            ClickHouseDecimal::Decimal32(v) => serializer.serialize_i32(*v),
+            ClickHouseDecimal::Decimal64(v) => serializer.serialize_i64(*v),
+            ClickHouseDecimal::Decimal128(v) => serializer.serialize_i128(*v),
+        }
+    }
 }
 
 /// Enum that support clickhouse nullable
@@ -556,7 +629,6 @@ impl ClickHouseFieldWithNull {
         data: Option<ScalarRefImpl<'_>>,
         clickhouse_schema_feature_vec: &Vec<ClickHouseSchemaFeature>,
         clickhouse_schema_feature_index: usize,
-        is_insert: bool,
     ) -> Result<Vec<ClickHouseFieldWithNull>> {
         let clickhouse_schema_feature = clickhouse_schema_feature_vec
             .get(clickhouse_schema_feature_index)
@@ -584,8 +656,29 @@ impl ClickHouseFieldWithNull {
             ScalarRefImpl::Float64(v) => ClickHouseField::Float64(v.into_inner()),
             ScalarRefImpl::Utf8(v) => ClickHouseField::String(v.to_string()),
             ScalarRefImpl::Bool(v) => ClickHouseField::Bool(v),
-            ScalarRefImpl::Decimal(_) => {
-                return Err(SinkError::ClickHouse("can not support Decimal".to_string()))
+            ScalarRefImpl::Decimal(d) => {
+                if let Decimal::Normalized(d) = d {
+                    let scale =
+                        clickhouse_schema_feature.accuracy_decimal.1 as i32 - d.scale() as i32;
+
+                    let scale = if scale < 0 {
+                        d.mantissa() / 10_i128.pow(scale.unsigned_abs())
+                    } else {
+                        d.mantissa() * 10_i128.pow(scale as u32)
+                    };
+
+                    if clickhouse_schema_feature.accuracy_decimal.0 <= 9 {
+                        ClickHouseField::Decimal(ClickHouseDecimal::Decimal32(scale as i32))
+                    } else if clickhouse_schema_feature.accuracy_decimal.0 <= 18 {
+                        ClickHouseField::Decimal(ClickHouseDecimal::Decimal64(scale as i64))
+                    } else {
+                        ClickHouseField::Decimal(ClickHouseDecimal::Decimal128(scale))
+                    }
+                } else {
+                    return Err(SinkError::ClickHouse(
+                        "clickhouse can not support Decimal NAN,-INF and INF".to_string(),
+                    ));
+                }
             }
             ScalarRefImpl::Interval(_) => {
                 return Err(SinkError::ClickHouse(
@@ -602,14 +695,9 @@ impl ClickHouseFieldWithNull {
                 ))
             }
             ScalarRefImpl::Timestamp(v) => {
-                if is_insert {
-                    let time = v.get_timestamp_nanos()
-                        / 10_i32.pow((9 - clickhouse_schema_feature.accuracy_time).into()) as i64;
-                    ClickHouseField::Int64(time)
-                } else {
-                    let time = v.truncate_micros().to_string();
-                    ClickHouseField::String(time)
-                }
+                let time = v.get_timestamp_nanos()
+                    / 10_i32.pow((9 - clickhouse_schema_feature.accuracy_time).into()) as i64;
+                ClickHouseField::Int64(time)
             }
             ScalarRefImpl::Timestamptz(_) => {
                 return Err(SinkError::ClickHouse(
@@ -628,7 +716,6 @@ impl ClickHouseFieldWithNull {
                         field,
                         clickhouse_schema_feature_vec,
                         clickhouse_schema_feature_index + index,
-                        is_insert,
                     )?;
                     struct_vec.push(ClickHouseFieldWithNull::WithoutSome(ClickHouseField::List(
                         a,
@@ -643,7 +730,6 @@ impl ClickHouseFieldWithNull {
                         i,
                         clickhouse_schema_feature_vec,
                         clickhouse_schema_feature_index,
-                        is_insert,
                     )?)
                 }
                 return Ok(vec![ClickHouseFieldWithNull::WithoutSome(
@@ -656,9 +742,7 @@ impl ClickHouseFieldWithNull {
                 ))
             }
         };
-        // Insert needs to be serialized with `Some`, update doesn't need to be serialized with
-        // `Some`
-        let data = if is_insert && clickhouse_schema_feature.can_null {
+        let data = if clickhouse_schema_feature.can_null {
             vec![ClickHouseFieldWithNull::WithSome(data)]
         } else {
             vec![ClickHouseFieldWithNull::WithoutSome(data)]
@@ -688,6 +772,8 @@ impl Serialize for ClickHouseField {
                 }
                 s.end()
             }
+            ClickHouseField::Decimal(v) => v.serialize(serializer),
+            ClickHouseField::Int8(v) => serializer.serialize_i8(*v),
         }
     }
 }
diff --git a/src/connector/src/sink/encoder/template.rs b/src/connector/src/sink/encoder/template.rs
index 85f085989b6c4..97d8271f9e83a 100644
--- a/src/connector/src/sink/encoder/template.rs
+++ b/src/connector/src/sink/encoder/template.rs
@@ -12,11 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::collections::HashSet;
+
+use regex::Regex;
 use risingwave_common::catalog::Schema;
 use risingwave_common::row::Row;
 use risingwave_common::types::ToText;
 
 use super::{Result, RowEncoder};
+use crate::sink::SinkError;
 
 /// Encode a row according to a specified string template `user_id:{user_id}`
 pub struct TemplateEncoder {
@@ -34,6 +38,24 @@ impl TemplateEncoder {
             template,
         }
     }
+
+    pub fn check_string_format(format: &str, set: &HashSet<String>) -> Result<()> {
+        // We will check if the string inside {} corresponds to a column name in rw.
+        // In other words, the content within {} should exclusively consist of column names from rw,
+        // which means '{{column_name}}' or '{{column_name1},{column_name2}}' would be incorrect.
+        let re = Regex::new(r"\{([^}]*)\}").unwrap();
+        if !re.is_match(format) {
+            return Err(SinkError::Redis(
+                "Can't find {} in key_format or value_format".to_string(),
+            ));
+        }
+        for capture in re.captures_iter(format) {
+            if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){
+                    return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str())))
+                }
+        }
+        Ok(())
+    }
 }
 
 impl RowEncoder for TemplateEncoder {
diff --git a/src/connector/src/sink/formatter/append_only.rs b/src/connector/src/sink/formatter/append_only.rs
index 523a52dab91bb..f0efcc21d9009 100644
--- a/src/connector/src/sink/formatter/append_only.rs
+++ b/src/connector/src/sink/formatter/append_only.rs
@@ -40,7 +40,7 @@ impl<KE: RowEncoder, VE: RowEncoder> SinkFormatter for AppendOnlyFormatter<KE, V
         &self,
         chunk: &StreamChunk,
     ) -> impl Iterator<Item = Result<(Option<Self::K>, Option<Self::V>)>> {
-        std::iter::from_generator(|| {
+        std::iter::from_coroutine(|| {
             for (op, row) in chunk.rows() {
                 if op != Op::Insert {
                     continue;
diff --git a/src/connector/src/sink/formatter/debezium_json.rs b/src/connector/src/sink/formatter/debezium_json.rs
index 637aa23f06410..ce98daab88756 100644
--- a/src/connector/src/sink/formatter/debezium_json.rs
+++ b/src/connector/src/sink/formatter/debezium_json.rs
@@ -85,7 +85,7 @@ impl SinkFormatter for DebeziumJsonFormatter {
         &self,
         chunk: &StreamChunk,
     ) -> impl Iterator<Item = Result<(Option<Value>, Option<Value>)>> {
-        std::iter::from_generator(|| {
+        std::iter::from_coroutine(|| {
             let DebeziumJsonFormatter {
                 schema,
                 pk_indices,
diff --git a/src/connector/src/sink/formatter/mod.rs b/src/connector/src/sink/formatter/mod.rs
index a7463f7e3b306..17cb708292890 100644
--- a/src/connector/src/sink/formatter/mod.rs
+++ b/src/connector/src/sink/formatter/mod.rs
@@ -29,6 +29,7 @@ pub use upsert::UpsertFormatter;
 use super::catalog::{SinkEncode, SinkFormat, SinkFormatDesc};
 use super::encoder::template::TemplateEncoder;
 use super::encoder::KafkaConnectParams;
+use super::redis::{KEY_FORMAT, VALUE_FORMAT};
 use crate::sink::encoder::{JsonEncoder, ProtoEncoder, TimestampHandlingMode};
 
 /// Transforms a `StreamChunk` into a sequence of key-value pairs according a specific format,
@@ -92,7 +93,7 @@ impl SinkFormatterImpl {
                 let key_encoder = (!pk_indices.is_empty()).then(|| {
                     JsonEncoder::new(
                         schema.clone(),
-                        Some(pk_indices),
+                        Some(pk_indices.clone()),
                         TimestampHandlingMode::Milli,
                     )
                 });
@@ -115,6 +116,28 @@ impl SinkFormatterImpl {
                         Ok(SinkFormatterImpl::AppendOnlyProto(formatter))
                     }
                     SinkEncode::Avro => err_unsupported(),
+                    SinkEncode::Template => {
+                        let key_format = format_desc.options.get(KEY_FORMAT).ok_or_else(|| {
+                            SinkError::Config(anyhow!(
+                                "Cannot find 'key_format',please set it or use JSON"
+                            ))
+                        })?;
+                        let value_format =
+                            format_desc.options.get(VALUE_FORMAT).ok_or_else(|| {
+                                SinkError::Config(anyhow!(
+                                    "Cannot find 'redis_value_format',please set it or use JSON"
+                                ))
+                            })?;
+                        let key_encoder = TemplateEncoder::new(
+                            schema.clone(),
+                            Some(pk_indices),
+                            key_format.clone(),
+                        );
+                        let val_encoder = TemplateEncoder::new(schema, None, value_format.clone());
+                        Ok(SinkFormatterImpl::AppendOnlyTemplate(
+                            AppendOnlyFormatter::new(Some(key_encoder), val_encoder),
+                        ))
+                    }
                 }
             }
             SinkFormat::Debezium => {
@@ -131,85 +154,66 @@ impl SinkFormatterImpl {
                 )))
             }
             SinkFormat::Upsert => {
-                if format_desc.encode != SinkEncode::Json {
-                    return err_unsupported();
-                }
+                match format_desc.encode {
+                    SinkEncode::Json => {
+                        let mut key_encoder = JsonEncoder::new(
+                            schema.clone(),
+                            Some(pk_indices),
+                            TimestampHandlingMode::Milli,
+                        );
+                        let mut val_encoder =
+                            JsonEncoder::new(schema, None, TimestampHandlingMode::Milli);
 
-                let mut key_encoder = JsonEncoder::new(
-                    schema.clone(),
-                    Some(pk_indices),
-                    TimestampHandlingMode::Milli,
-                );
-                let mut val_encoder = JsonEncoder::new(schema, None, TimestampHandlingMode::Milli);
-
-                if let Some(s) = format_desc.options.get("schemas.enable") {
-                    match s.to_lowercase().parse::<bool>() {
-                        Ok(true) => {
-                            let kafka_connect = KafkaConnectParams {
-                                schema_name: format!("{}.{}", db_name, sink_from_name),
-                            };
-                            key_encoder = key_encoder.with_kafka_connect(kafka_connect.clone());
-                            val_encoder = val_encoder.with_kafka_connect(kafka_connect);
-                        }
-                        Ok(false) => {}
-                        _ => {
-                            return Err(SinkError::Config(anyhow!(
-                                "schemas.enable is expected to be `true` or `false`, got {}",
-                                s
-                            )));
-                        }
+                        if let Some(s) = format_desc.options.get("schemas.enable") {
+                            match s.to_lowercase().parse::<bool>() {
+                                Ok(true) => {
+                                    let kafka_connect = KafkaConnectParams {
+                                        schema_name: format!("{}.{}", db_name, sink_from_name),
+                                    };
+                                    key_encoder =
+                                        key_encoder.with_kafka_connect(kafka_connect.clone());
+                                    val_encoder = val_encoder.with_kafka_connect(kafka_connect);
+                                }
+                                Ok(false) => {}
+                                _ => {
+                                    return Err(SinkError::Config(anyhow!(
+                                        "schemas.enable is expected to be `true` or `false`, got {}",
+                                        s
+                                    )));
+                                }
+                            }
+                        };
+
+                        // Initialize the upsert_stream
+                        let formatter = UpsertFormatter::new(key_encoder, val_encoder);
+                        Ok(SinkFormatterImpl::UpsertJson(formatter))
                     }
-                };
-
-                // Initialize the upsert_stream
-                let formatter = UpsertFormatter::new(key_encoder, val_encoder);
-                Ok(SinkFormatterImpl::UpsertJson(formatter))
-            }
-        }
-    }
-
-    pub fn new_with_redis(
-        schema: Schema,
-        pk_indices: Vec<usize>,
-        is_append_only: bool,
-        key_format: Option<String>,
-        value_format: Option<String>,
-    ) -> Result<Self> {
-        match (key_format, value_format) {
-            (Some(k), Some(v)) => {
-                let key_encoder = TemplateEncoder::new(
-                    schema.clone(),
-                    Some(pk_indices),
-                    k,
-                );
-                let val_encoder =
-                    TemplateEncoder::new(schema, None, v);
-                if is_append_only {
-                    Ok(SinkFormatterImpl::AppendOnlyTemplate(AppendOnlyFormatter::new(Some(key_encoder), val_encoder)))
-                } else {
-                    Ok(SinkFormatterImpl::UpsertTemplate(UpsertFormatter::new(key_encoder, val_encoder)))
-                }
-            }
-            (None, None) => {
-                let key_encoder = JsonEncoder::new(
-                    schema.clone(),
-                    Some(pk_indices),
-                    TimestampHandlingMode::Milli,
-                );
-                let val_encoder = JsonEncoder::new(
-                    schema,
-                    None,
-                    TimestampHandlingMode::Milli,
-                );
-                if is_append_only {
-                    Ok(SinkFormatterImpl::AppendOnlyJson(AppendOnlyFormatter::new(Some(key_encoder), val_encoder)))
-                } else {
-                    Ok(SinkFormatterImpl::UpsertJson(UpsertFormatter::new(key_encoder, val_encoder)))
+                    SinkEncode::Template => {
+                        let key_format = format_desc.options.get(KEY_FORMAT).ok_or_else(|| {
+                            SinkError::Config(anyhow!(
+                                "Cannot find 'key_format',please set it or use JSON"
+                            ))
+                        })?;
+                        let value_format =
+                            format_desc.options.get(VALUE_FORMAT).ok_or_else(|| {
+                                SinkError::Config(anyhow!(
+                                    "Cannot find 'redis_value_format',please set it or use JSON"
+                                ))
+                            })?;
+                        let key_encoder = TemplateEncoder::new(
+                            schema.clone(),
+                            Some(pk_indices),
+                            key_format.clone(),
+                        );
+                        let val_encoder = TemplateEncoder::new(schema, None, value_format.clone());
+                        Ok(SinkFormatterImpl::UpsertTemplate(UpsertFormatter::new(
+                            key_encoder,
+                            val_encoder,
+                        )))
+                    }
+                    _ => err_unsupported(),
                 }
             }
-            _ => {
-                Err(SinkError::Encode("Please provide template formats for both key and value, or choose the JSON format.".to_string()))
-            }
         }
     }
 }
diff --git a/src/connector/src/sink/formatter/upsert.rs b/src/connector/src/sink/formatter/upsert.rs
index 6ef2b5f2ca333..af8e70ff92850 100644
--- a/src/connector/src/sink/formatter/upsert.rs
+++ b/src/connector/src/sink/formatter/upsert.rs
@@ -40,7 +40,7 @@ impl<KE: RowEncoder, VE: RowEncoder> SinkFormatter for UpsertFormatter<KE, VE> {
         &self,
         chunk: &StreamChunk,
     ) -> impl Iterator<Item = Result<(Option<Self::K>, Option<Self::V>)>> {
-        std::iter::from_generator(|| {
+        std::iter::from_coroutine(|| {
             for (op, row) in chunk.rows() {
                 let event_key_object = Some(tri!(self.key_encoder.encode(row)));
 
diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs
index a204a8d121706..f77b2b0a88c36 100644
--- a/src/connector/src/sink/kafka.rs
+++ b/src/connector/src/sink/kafka.rs
@@ -14,20 +14,18 @@
 
 use std::collections::HashMap;
 use std::fmt::Debug;
-use std::pin::pin;
 use std::sync::Arc;
 use std::time::Duration;
 
 use anyhow::anyhow;
-use futures::future::{select, Either};
 use futures::{Future, FutureExt, TryFuture};
 use rdkafka::error::KafkaError;
 use rdkafka::message::ToBytes;
 use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord};
 use rdkafka::types::RDKafkaErrorCode;
 use rdkafka::ClientConfig;
+use risingwave_common::array::StreamChunk;
 use risingwave_common::catalog::Schema;
-use risingwave_common::util::drop_either_future;
 use serde_derive::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use strum_macros::{Display, EnumString};
@@ -37,11 +35,11 @@ use super::{Sink, SinkError, SinkParam};
 use crate::common::KafkaCommon;
 use crate::sink::catalog::desc::SinkDesc;
 use crate::sink::formatter::SinkFormatterImpl;
-use crate::sink::log_store::{
-    DeliveryFutureManager, DeliveryFutureManagerAddFuture, LogReader, LogStoreReadItem,
+use crate::sink::log_store::DeliveryFutureManagerAddFuture;
+use crate::sink::writer::{
+    AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, FormattedSink,
 };
-use crate::sink::writer::FormattedSink;
-use crate::sink::{DummySinkCommitCoordinator, LogSinker, Result, SinkWriterParam};
+use crate::sink::{DummySinkCommitCoordinator, Result, SinkWriterParam};
 use crate::source::kafka::{KafkaProperties, KafkaSplitEnumerator, PrivateLinkProducerContext};
 use crate::source::{SourceEnumeratorContext, SplitEnumerator};
 use crate::{
@@ -299,7 +297,7 @@ impl TryFrom<SinkParam> for KafkaSink {
 
 impl Sink for KafkaSink {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = KafkaLogSinker;
+    type LogSinker = AsyncTruncateLogSinkerOf<KafkaSinkWriter>;
 
     const SINK_NAME: &'static str = KAFKA_SINK;
 
@@ -316,7 +314,18 @@ impl Sink for KafkaSink {
             self.sink_from_name.clone(),
         )
         .await?;
-        KafkaLogSinker::new(self.config.clone(), formatter).await
+        let max_delivery_buffer_size = (self
+            .config
+            .rdkafka_properties
+            .queue_buffering_max_messages
+            .as_ref()
+            .cloned()
+            .unwrap_or(KAFKA_WRITER_MAX_QUEUE_SIZE) as f32
+            * KAFKA_WRITER_MAX_QUEUE_SIZE_RATIO) as usize;
+
+        Ok(KafkaSinkWriter::new(self.config.clone(), formatter)
+            .await?
+            .into_log_sinker(max_delivery_buffer_size))
     }
 
     async fn validate(&self) -> Result<()> {
@@ -370,16 +379,15 @@ struct KafkaPayloadWriter<'a> {
     config: &'a KafkaConfig,
 }
 
-type KafkaSinkDeliveryFuture = impl TryFuture<Ok = (), Error = SinkError> + Unpin + 'static;
+pub type KafkaSinkDeliveryFuture = impl TryFuture<Ok = (), Error = SinkError> + Unpin + 'static;
 
-pub struct KafkaLogSinker {
+pub struct KafkaSinkWriter {
     formatter: SinkFormatterImpl,
     inner: FutureProducer<PrivateLinkProducerContext>,
-    future_manager: DeliveryFutureManager<KafkaSinkDeliveryFuture>,
     config: KafkaConfig,
 }
 
-impl KafkaLogSinker {
+impl KafkaSinkWriter {
     async fn new(config: KafkaConfig, formatter: SinkFormatterImpl) -> Result<Self> {
         let inner: FutureProducer<PrivateLinkProducerContext> = {
             let mut c = ClientConfig::new();
@@ -403,19 +411,29 @@ impl KafkaLogSinker {
             c.create_with_context(producer_ctx).await?
         };
 
-        let max_delivery_buffer_size = (config
-            .rdkafka_properties
-            .queue_buffering_max_messages
-            .as_ref()
-            .cloned()
-            .unwrap_or(KAFKA_WRITER_MAX_QUEUE_SIZE) as f32
-            * KAFKA_WRITER_MAX_QUEUE_SIZE_RATIO) as usize;
-
-        Ok(KafkaLogSinker {
+        Ok(KafkaSinkWriter {
             formatter,
             inner,
             config: config.clone(),
-            future_manager: DeliveryFutureManager::new(max_delivery_buffer_size),
+        })
+    }
+}
+
+impl AsyncTruncateSinkWriter for KafkaSinkWriter {
+    type DeliveryFuture = KafkaSinkDeliveryFuture;
+
+    async fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> Result<()> {
+        let mut payload_writer = KafkaPayloadWriter {
+            inner: &mut self.inner,
+            add_future,
+            config: &self.config,
+        };
+        dispatch_sink_formatter_impl!(&self.formatter, formatter, {
+            payload_writer.write_chunk(chunk, formatter).await
         })
     }
 }
@@ -537,50 +555,6 @@ impl<'a> FormattedSink for KafkaPayloadWriter<'a> {
     }
 }
 
-impl LogSinker for KafkaLogSinker {
-    async fn consume_log_and_sink(mut self, mut log_reader: impl LogReader) -> Result<()> {
-        log_reader.init().await?;
-        loop {
-            let select_result = drop_either_future(
-                select(
-                    pin!(log_reader.next_item()),
-                    pin!(self.future_manager.next_truncate_offset()),
-                )
-                .await,
-            );
-            match select_result {
-                Either::Left(item_result) => {
-                    let (epoch, item) = item_result?;
-                    match item {
-                        LogStoreReadItem::StreamChunk { chunk_id, chunk } => {
-                            dispatch_sink_formatter_impl!(&self.formatter, formatter, {
-                                let mut writer = KafkaPayloadWriter {
-                                    inner: &self.inner,
-                                    add_future: self
-                                        .future_manager
-                                        .start_write_chunk(epoch, chunk_id),
-                                    config: &self.config,
-                                };
-                                writer.write_chunk(chunk, formatter).await?;
-                            })
-                        }
-                        LogStoreReadItem::Barrier {
-                            is_checkpoint: _is_checkpoint,
-                        } => {
-                            self.future_manager.add_barrier(epoch);
-                        }
-                        LogStoreReadItem::UpdateVnodeBitmap(_) => {}
-                    }
-                }
-                Either::Right(offset_result) => {
-                    let offset = offset_result?;
-                    log_reader.truncate(offset).await?;
-                }
-            }
-        }
-    }
-}
-
 #[cfg(test)]
 mod test {
     use maplit::hashmap;
@@ -748,7 +722,7 @@ mod test {
         let kafka_config = KafkaConfig::from_hashmap(properties)?;
 
         // Create the actual sink writer to Kafka
-        let mut sink = KafkaLogSinker::new(
+        let sink = KafkaSinkWriter::new(
             kafka_config.clone(),
             SinkFormatterImpl::AppendOnlyJson(AppendOnlyFormatter::new(
                 // We do not specify primary key for this schema
@@ -759,12 +733,16 @@ mod test {
         .await
         .unwrap();
 
+        use crate::sink::log_store::DeliveryFutureManager;
+
+        let mut future_manager = DeliveryFutureManager::new(usize::MAX);
+
         for i in 0..10 {
             println!("epoch: {}", i);
             for j in 0..100 {
                 let mut writer = KafkaPayloadWriter {
                     inner: &sink.inner,
-                    add_future: sink.future_manager.start_write_chunk(i, j),
+                    add_future: future_manager.start_write_chunk(i, j),
                     config: &sink.config,
                 };
                 match writer
diff --git a/src/connector/src/sink/kinesis.rs b/src/connector/src/sink/kinesis.rs
index dd8518af39948..605edde3b1eb0 100644
--- a/src/connector/src/sink/kinesis.rs
+++ b/src/connector/src/sink/kinesis.rs
@@ -30,8 +30,12 @@ use super::catalog::SinkFormatDesc;
 use super::SinkParam;
 use crate::common::KinesisCommon;
 use crate::dispatch_sink_formatter_impl;
+use crate::sink::catalog::desc::SinkDesc;
 use crate::sink::formatter::SinkFormatterImpl;
-use crate::sink::writer::{FormattedSink, LogSinkerOf, SinkWriter, SinkWriterExt};
+use crate::sink::log_store::DeliveryFutureManagerAddFuture;
+use crate::sink::writer::{
+    AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, FormattedSink,
+};
 use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkError, SinkWriterParam};
 
 pub const KINESIS_SINK: &str = "kinesis";
@@ -67,10 +71,14 @@ impl TryFrom<SinkParam> for KinesisSink {
 
 impl Sink for KinesisSink {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = LogSinkerOf<KinesisSinkWriter>;
+    type LogSinker = AsyncTruncateLogSinkerOf<KinesisSinkWriter>;
 
     const SINK_NAME: &'static str = KINESIS_SINK;
 
+    fn default_sink_decouple(desc: &SinkDesc) -> bool {
+        desc.sink_type.is_append_only()
+    }
+
     async fn validate(&self) -> Result<()> {
         // Kinesis requires partition key. There is no builtin support for round-robin as in kafka/pulsar.
         // https://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html#Streams-PutRecord-request-PartitionKey
@@ -103,7 +111,7 @@ impl Sink for KinesisSink {
         Ok(())
     }
 
-    async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
+    async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
         Ok(KinesisSinkWriter::new(
             self.config.clone(),
             self.schema.clone(),
@@ -113,7 +121,7 @@ impl Sink for KinesisSink {
             self.sink_from_name.clone(),
         )
         .await?
-        .into_log_sinker(writer_param.sink_metrics))
+        .into_log_sinker(usize::MAX))
     }
 }
 
@@ -214,20 +222,16 @@ impl FormattedSink for KinesisSinkPayloadWriter {
     }
 }
 
-#[async_trait::async_trait]
-impl SinkWriter for KinesisSinkWriter {
-    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> {
-        dispatch_sink_formatter_impl!(&self.formatter, formatter, {
+impl AsyncTruncateSinkWriter for KinesisSinkWriter {
+    async fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> Result<()> {
+        dispatch_sink_formatter_impl!(
+            &self.formatter,
+            formatter,
             self.payload_writer.write_chunk(chunk, formatter).await
-        })
-    }
-
-    async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> {
-        // Kinesis offers no transactional guarantees, so we do nothing here.
-        Ok(())
-    }
-
-    async fn barrier(&mut self, _is_checkpoint: bool) -> Result<()> {
-        Ok(())
+        )
     }
 }
diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs
index 639fc8d734758..fc590d2fa6935 100644
--- a/src/connector/src/sink/mod.rs
+++ b/src/connector/src/sink/mod.rs
@@ -35,7 +35,6 @@ pub mod utils;
 pub mod writer;
 
 use std::collections::HashMap;
-use std::future::Future;
 
 use ::clickhouse::error::Error as ClickHouseError;
 use ::redis::RedisError;
@@ -280,11 +279,9 @@ pub trait Sink: TryFrom<SinkParam, Error = SinkError> {
     }
 }
 
-pub trait LogSinker: Send + 'static {
-    fn consume_log_and_sink(
-        self,
-        log_reader: impl LogReader,
-    ) -> impl Future<Output = Result<()>> + Send + 'static;
+#[async_trait]
+pub trait LogSinker: 'static {
+    async fn consume_log_and_sink(self, log_reader: impl LogReader) -> Result<()>;
 }
 
 #[async_trait]
diff --git a/src/connector/src/sink/nats.rs b/src/connector/src/sink/nats.rs
index 8e3f3e2c18022..2f810eed786a9 100644
--- a/src/connector/src/sink/nats.rs
+++ b/src/connector/src/sink/nats.rs
@@ -25,10 +25,14 @@ use tokio_retry::strategy::{jitter, ExponentialBackoff};
 use tokio_retry::Retry;
 
 use super::utils::chunk_to_json;
-use super::{DummySinkCommitCoordinator, SinkWriter, SinkWriterParam};
+use super::{DummySinkCommitCoordinator, SinkWriterParam};
 use crate::common::NatsCommon;
+use crate::sink::catalog::desc::SinkDesc;
 use crate::sink::encoder::{JsonEncoder, TimestampHandlingMode};
-use crate::sink::writer::{LogSinkerOf, SinkWriterExt};
+use crate::sink::log_store::DeliveryFutureManagerAddFuture;
+use crate::sink::writer::{
+    AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt,
+};
 use crate::sink::{Result, Sink, SinkError, SinkParam, SINK_TYPE_APPEND_ONLY};
 
 pub const NATS_SINK: &str = "nats";
@@ -88,10 +92,14 @@ impl TryFrom<SinkParam> for NatsSink {
 
 impl Sink for NatsSink {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = LogSinkerOf<NatsSinkWriter>;
+    type LogSinker = AsyncTruncateLogSinkerOf<NatsSinkWriter>;
 
     const SINK_NAME: &'static str = NATS_SINK;
 
+    fn default_sink_decouple(desc: &SinkDesc) -> bool {
+        desc.sink_type.is_append_only()
+    }
+
     async fn validate(&self) -> Result<()> {
         if !self.is_append_only {
             return Err(SinkError::Nats(anyhow!(
@@ -110,11 +118,11 @@ impl Sink for NatsSink {
         Ok(())
     }
 
-    async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
+    async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
         Ok(
             NatsSinkWriter::new(self.config.clone(), self.schema.clone())
                 .await?
-                .into_log_sinker(writer_param.sink_metrics),
+                .into_log_sinker(usize::MAX),
         )
     }
 }
@@ -153,17 +161,12 @@ impl NatsSinkWriter {
     }
 }
 
-#[async_trait::async_trait]
-impl SinkWriter for NatsSinkWriter {
-    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> {
+impl AsyncTruncateSinkWriter for NatsSinkWriter {
+    async fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> Result<()> {
         self.append_only(chunk).await
     }
-
-    async fn begin_epoch(&mut self, _epoch_id: u64) -> Result<()> {
-        Ok(())
-    }
-
-    async fn barrier(&mut self, _is_checkpoint: bool) -> Result<()> {
-        Ok(())
-    }
 }
diff --git a/src/connector/src/sink/pulsar.rs b/src/connector/src/sink/pulsar.rs
index f980b2ad9f9b1..9eb57c1ae0771 100644
--- a/src/connector/src/sink/pulsar.rs
+++ b/src/connector/src/sink/pulsar.rs
@@ -12,14 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::collections::{HashMap, VecDeque};
+use std::collections::HashMap;
 use std::fmt::Debug;
 use std::time::Duration;
 
 use anyhow::anyhow;
-use async_trait::async_trait;
-use futures::future::try_join_all;
-use futures::TryFutureExt;
+use futures::{FutureExt, TryFuture, TryFutureExt};
 use pulsar::producer::{Message, SendFuture};
 use pulsar::{Producer, ProducerOptions, Pulsar, TokioExecutor};
 use risingwave_common::array::StreamChunk;
@@ -28,10 +26,15 @@ use serde::Deserialize;
 use serde_with::{serde_as, DisplayFromStr};
 
 use super::catalog::{SinkFormat, SinkFormatDesc};
-use super::{Sink, SinkError, SinkParam, SinkWriter, SinkWriterParam};
+use super::{Sink, SinkError, SinkParam, SinkWriterParam};
 use crate::common::PulsarCommon;
-use crate::sink::formatter::SinkFormatterImpl;
-use crate::sink::writer::{FormattedSink, LogSinkerOf, SinkWriterExt};
+use crate::sink::catalog::desc::SinkDesc;
+use crate::sink::encoder::SerTo;
+use crate::sink::formatter::{SinkFormatter, SinkFormatterImpl};
+use crate::sink::log_store::DeliveryFutureManagerAddFuture;
+use crate::sink::writer::{
+    AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt, FormattedSink,
+};
 use crate::sink::{DummySinkCommitCoordinator, Result};
 use crate::{deserialize_duration_from_string, dispatch_sink_formatter_impl};
 
@@ -155,11 +158,15 @@ impl TryFrom<SinkParam> for PulsarSink {
 
 impl Sink for PulsarSink {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = LogSinkerOf<PulsarSinkWriter>;
+    type LogSinker = AsyncTruncateLogSinkerOf<PulsarSinkWriter>;
 
     const SINK_NAME: &'static str = PULSAR_SINK;
 
-    async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
+    fn default_sink_decouple(desc: &SinkDesc) -> bool {
+        desc.sink_type.is_append_only()
+    }
+
+    async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
         Ok(PulsarSinkWriter::new(
             self.config.clone(),
             self.schema.clone(),
@@ -169,7 +176,7 @@ impl Sink for PulsarSink {
             self.sink_from_name.clone(),
         )
         .await?
-        .into_log_sinker(writer_param.sink_metrics))
+        .into_log_sinker(PULSAR_SEND_FUTURE_BUFFER_MAX_SIZE))
     }
 
     async fn validate(&self) -> Result<()> {
@@ -199,15 +206,26 @@ impl Sink for PulsarSink {
 }
 
 pub struct PulsarSinkWriter {
-    payload_writer: PulsarPayloadWriter,
     formatter: SinkFormatterImpl,
-}
-
-struct PulsarPayloadWriter {
     pulsar: Pulsar<TokioExecutor>,
     producer: Producer<TokioExecutor>,
     config: PulsarConfig,
-    send_future_buffer: VecDeque<SendFuture>,
+}
+
+struct PulsarPayloadWriter<'w> {
+    producer: &'w mut Producer<TokioExecutor>,
+    config: &'w PulsarConfig,
+    add_future: DeliveryFutureManagerAddFuture<'w, PulsarDeliveryFuture>,
+}
+
+pub type PulsarDeliveryFuture = impl TryFuture<Ok = (), Error = SinkError> + Unpin + 'static;
+
+fn may_delivery_future(future: SendFuture) -> PulsarDeliveryFuture {
+    future.map(|result| {
+        result
+            .map(|_| ())
+            .map_err(|e: pulsar::Error| SinkError::Pulsar(anyhow!(e)))
+    })
 }
 
 impl PulsarSinkWriter {
@@ -226,17 +244,14 @@ impl PulsarSinkWriter {
         let producer = build_pulsar_producer(&pulsar, &config).await?;
         Ok(Self {
             formatter,
-            payload_writer: PulsarPayloadWriter {
-                pulsar,
-                producer,
-                config,
-                send_future_buffer: VecDeque::new(),
-            },
+            pulsar,
+            producer,
+            config,
         })
     }
 }
 
-impl PulsarPayloadWriter {
+impl<'w> PulsarPayloadWriter<'w> {
     async fn send_message(&mut self, message: Message) -> Result<()> {
         let mut success_flag = false;
         let mut connection_err = None;
@@ -247,17 +262,10 @@ impl PulsarPayloadWriter {
                 // a SendFuture holding the message receipt
                 // or error after sending is returned
                 Ok(send_future) => {
-                    // Check if send_future_buffer is greater than the preset limit
-                    while self.send_future_buffer.len() >= PULSAR_SEND_FUTURE_BUFFER_MAX_SIZE {
-                        self.send_future_buffer
-                            .pop_front()
-                            .expect("Expect the SendFuture not to be None")
-                            .map_err(|e| SinkError::Pulsar(anyhow!(e)))
-                            .await?;
-                    }
-
+                    self.add_future
+                        .add_future_may_await(may_delivery_future(send_future))
+                        .await?;
                     success_flag = true;
-                    self.send_future_buffer.push_back(send_future);
                     break;
                 }
                 // error upon sending
@@ -295,24 +303,9 @@ impl PulsarPayloadWriter {
         self.send_message(message).await?;
         Ok(())
     }
-
-    async fn commit_inner(&mut self) -> Result<()> {
-        self.producer
-            .send_batch()
-            .map_err(pulsar_to_sink_err)
-            .await?;
-        try_join_all(
-            self.send_future_buffer
-                .drain(..)
-                .map(|send_future| send_future.map_err(|e| SinkError::Pulsar(anyhow!(e)))),
-        )
-        .await?;
-
-        Ok(())
-    }
 }
 
-impl FormattedSink for PulsarPayloadWriter {
+impl<'w> FormattedSink for PulsarPayloadWriter<'w> {
     type K = String;
     type V = Vec<u8>;
 
@@ -321,23 +314,33 @@ impl FormattedSink for PulsarPayloadWriter {
     }
 }
 
-#[async_trait]
-impl SinkWriter for PulsarSinkWriter {
-    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> {
+impl AsyncTruncateSinkWriter for PulsarSinkWriter {
+    type DeliveryFuture = PulsarDeliveryFuture;
+
+    async fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> Result<()> {
         dispatch_sink_formatter_impl!(&self.formatter, formatter, {
-            self.payload_writer.write_chunk(chunk, formatter).await
+            let mut payload_writer = PulsarPayloadWriter {
+                producer: &mut self.producer,
+                add_future,
+                config: &self.config,
+            };
+            // TODO: we can call `payload_writer.write_chunk(chunk, formatter)`,
+            // but for an unknown reason, this will greatly increase the compile time,
+            // by nearly 4x. May investigate it later.
+            for r in formatter.format_chunk(&chunk) {
+                let (key, value) = r?;
+                payload_writer
+                    .write_inner(
+                        key.map(SerTo::ser_to).transpose()?,
+                        value.map(SerTo::ser_to).transpose()?,
+                    )
+                    .await?;
+            }
+            Ok(())
         })
     }
-
-    async fn begin_epoch(&mut self, _epoch: u64) -> Result<()> {
-        Ok(())
-    }
-
-    async fn barrier(&mut self, is_checkpoint: bool) -> Result<Self::CommitMetadata> {
-        if is_checkpoint {
-            self.payload_writer.commit_inner().await?;
-        }
-
-        Ok(())
-    }
 }
diff --git a/src/connector/src/sink/redis.rs b/src/connector/src/sink/redis.rs
index cc8ff74d0c9c5..af3ec3b981620 100644
--- a/src/connector/src/sink/redis.rs
+++ b/src/connector/src/sink/redis.rs
@@ -18,29 +18,30 @@ use anyhow::anyhow;
 use async_trait::async_trait;
 use redis::aio::Connection;
 use redis::{Client as RedisClient, Pipeline};
-use regex::Regex;
 use risingwave_common::array::StreamChunk;
 use risingwave_common::catalog::Schema;
 use serde_derive::{Deserialize, Serialize};
 use serde_with::serde_as;
 
+use super::catalog::SinkFormatDesc;
+use super::encoder::template::TemplateEncoder;
 use super::formatter::SinkFormatterImpl;
 use super::writer::FormattedSink;
-use super::{SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
+use super::{SinkError, SinkParam};
 use crate::dispatch_sink_formatter_impl;
-use crate::sink::writer::{LogSinkerOf, SinkWriterExt};
-use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkWriter, SinkWriterParam};
+use crate::sink::log_store::DeliveryFutureManagerAddFuture;
+use crate::sink::writer::{
+    AsyncTruncateLogSinkerOf, AsyncTruncateSinkWriter, AsyncTruncateSinkWriterExt,
+};
+use crate::sink::{DummySinkCommitCoordinator, Result, Sink, SinkWriterParam};
 
 pub const REDIS_SINK: &str = "redis";
-
+pub const KEY_FORMAT: &str = "key_format";
+pub const VALUE_FORMAT: &str = "value_format";
 #[derive(Deserialize, Serialize, Debug, Clone)]
 pub struct RedisCommon {
     #[serde(rename = "redis.url")]
     pub url: String,
-    #[serde(rename = "redis.keyformat")]
-    pub key_format: Option<String>,
-    #[serde(rename = "redis.valueformat")]
-    pub value_format: Option<String>,
 }
 
 impl RedisCommon {
@@ -54,23 +55,13 @@ impl RedisCommon {
 pub struct RedisConfig {
     #[serde(flatten)]
     pub common: RedisCommon,
-
-    pub r#type: String, // accept "append-only" or "upsert"
 }
 
 impl RedisConfig {
     pub fn from_hashmap(properties: HashMap<String, String>) -> Result<Self> {
         let config =
             serde_json::from_value::<RedisConfig>(serde_json::to_value(properties).unwrap())
-                .map_err(|e| SinkError::Config(anyhow!(e)))?;
-        if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT {
-            return Err(SinkError::Config(anyhow!(
-                "`{}` must be {}, or {}",
-                SINK_TYPE_OPTION,
-                SINK_TYPE_APPEND_ONLY,
-                SINK_TYPE_UPSERT
-            )));
-        }
+                .map_err(|e| SinkError::Config(anyhow!("{:?}", e)))?;
         Ok(config)
     }
 }
@@ -79,28 +70,10 @@ impl RedisConfig {
 pub struct RedisSink {
     config: RedisConfig,
     schema: Schema,
-    is_append_only: bool,
     pk_indices: Vec<usize>,
-}
-
-fn check_string_format(format: &Option<String>, set: &HashSet<String>) -> Result<()> {
-    if let Some(format) = format {
-        // We will check if the string inside {} corresponds to a column name in rw.
-        // In other words, the content within {} should exclusively consist of column names from rw,
-        // which means '{{column_name}}' or '{{column_name1},{column_name2}}' would be incorrect.
-        let re = Regex::new(r"\{([^}]*)\}").unwrap();
-        if !re.is_match(format) {
-            return Err(SinkError::Redis(
-                "Can't find {} in key_format or value_format".to_string(),
-            ));
-        }
-        for capture in re.captures_iter(format) {
-            if let Some(inner_content) = capture.get(1) && !set.contains(inner_content.as_str()){
-                return Err(SinkError::Redis(format!("Can't find field({:?}) in key_format or value_format",inner_content.as_str())))
-            }
-        }
-    }
-    Ok(())
+    format_desc: SinkFormatDesc,
+    db_name: String,
+    sink_from_name: String,
 }
 
 #[async_trait]
@@ -117,27 +90,33 @@ impl TryFrom<SinkParam> for RedisSink {
         Ok(Self {
             config,
             schema: param.schema(),
-            is_append_only: param.sink_type.is_append_only(),
             pk_indices: param.downstream_pk,
+            format_desc: param
+                .format_desc
+                .ok_or_else(|| SinkError::Config(anyhow!("missing FORMAT ... ENCODE ...")))?,
+            db_name: param.db_name,
+            sink_from_name: param.sink_from_name,
         })
     }
 }
 
 impl Sink for RedisSink {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = LogSinkerOf<RedisSinkWriter>;
+    type LogSinker = AsyncTruncateLogSinkerOf<RedisSinkWriter>;
 
     const SINK_NAME: &'static str = "redis";
 
-    async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
+    async fn new_log_sinker(&self, _writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
         Ok(RedisSinkWriter::new(
             self.config.clone(),
             self.schema.clone(),
             self.pk_indices.clone(),
-            self.is_append_only,
+            &self.format_desc,
+            self.db_name.clone(),
+            self.sink_from_name.clone(),
         )
         .await?
-        .into_log_sinker(writer_param.sink_metrics))
+        .into_log_sinker(usize::MAX))
     }
 
     async fn validate(&self) -> Result<()> {
@@ -157,8 +136,23 @@ impl Sink for RedisSink {
             .filter(|(k, _)| self.pk_indices.contains(k))
             .map(|(_, v)| v.name.clone())
             .collect();
-        check_string_format(&self.config.common.key_format, &pk_set)?;
-        check_string_format(&self.config.common.value_format, &all_set)?;
+        if matches!(
+            self.format_desc.encode,
+            super::catalog::SinkEncode::Template
+        ) {
+            let key_format = self.format_desc.options.get(KEY_FORMAT).ok_or_else(|| {
+                SinkError::Config(anyhow!(
+                    "Cannot find 'key_format',please set it or use JSON"
+                ))
+            })?;
+            let value_format = self.format_desc.options.get(VALUE_FORMAT).ok_or_else(|| {
+                SinkError::Config(anyhow!(
+                    "Cannot find 'value_format',please set it or use JSON"
+                ))
+            })?;
+            TemplateEncoder::check_string_format(key_format, &pk_set)?;
+            TemplateEncoder::check_string_format(value_format, &all_set)?;
+        }
         Ok(())
     }
 }
@@ -166,7 +160,6 @@ impl Sink for RedisSink {
 pub struct RedisSinkWriter {
     epoch: u64,
     schema: Schema,
-    is_append_only: bool,
     pk_indices: Vec<usize>,
     formatter: SinkFormatterImpl,
     payload_writer: RedisSinkPayloadWriter,
@@ -220,21 +213,23 @@ impl RedisSinkWriter {
         config: RedisConfig,
         schema: Schema,
         pk_indices: Vec<usize>,
-        is_append_only: bool,
+        format_desc: &SinkFormatDesc,
+        db_name: String,
+        sink_from_name: String,
     ) -> Result<Self> {
         let payload_writer = RedisSinkPayloadWriter::new(config.clone()).await?;
-        let formatter = SinkFormatterImpl::new_with_redis(
+        let formatter = SinkFormatterImpl::new(
+            format_desc,
             schema.clone(),
             pk_indices.clone(),
-            is_append_only,
-            config.common.key_format,
-            config.common.value_format,
-        )?;
+            db_name,
+            sink_from_name,
+        )
+        .await?;
 
         Ok(Self {
             schema,
             pk_indices,
-            is_append_only,
             epoch: 0,
             formatter,
             payload_writer,
@@ -242,24 +237,22 @@ impl RedisSinkWriter {
     }
 
     #[cfg(test)]
-    pub fn mock(
+    pub async fn mock(
         schema: Schema,
         pk_indices: Vec<usize>,
-        is_append_only: bool,
-        key_format: Option<String>,
-        value_format: Option<String>,
+        format_desc: &SinkFormatDesc,
     ) -> Result<Self> {
-        let formatter = SinkFormatterImpl::new_with_redis(
+        let formatter = SinkFormatterImpl::new(
+            format_desc,
             schema.clone(),
             pk_indices.clone(),
-            is_append_only,
-            key_format,
-            value_format,
-        )?;
+            "d1".to_string(),
+            "t1".to_string(),
+        )
+        .await?;
         Ok(Self {
             schema,
             pk_indices,
-            is_append_only,
             epoch: 0,
             formatter,
             payload_writer: RedisSinkPayloadWriter::mock(),
@@ -267,29 +260,22 @@ impl RedisSinkWriter {
     }
 }
 
-#[async_trait]
-impl SinkWriter for RedisSinkWriter {
-    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()> {
+impl AsyncTruncateSinkWriter for RedisSinkWriter {
+    async fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        _add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> Result<()> {
         dispatch_sink_formatter_impl!(&self.formatter, formatter, {
             self.payload_writer.write_chunk(chunk, formatter).await
         })
     }
-
-    async fn begin_epoch(&mut self, epoch: u64) -> Result<()> {
-        self.epoch = epoch;
-        Ok(())
-    }
-
-    async fn barrier(&mut self, is_checkpoint: bool) -> Result<()> {
-        if is_checkpoint {
-            self.payload_writer.commit().await?;
-        }
-        Ok(())
-    }
 }
 
 #[cfg(test)]
 mod test {
+    use std::collections::BTreeMap;
+
     use rdkafka::message::FromBytes;
     use risingwave_common::array::{Array, I32Array, Op, StreamChunk, Utf8Array};
     use risingwave_common::catalog::{Field, Schema};
@@ -297,6 +283,8 @@ mod test {
     use risingwave_common::util::iter_util::ZipEqDebug;
 
     use super::*;
+    use crate::sink::catalog::{SinkEncode, SinkFormat};
+    use crate::sink::log_store::DeliveryFutureManager;
 
     #[tokio::test]
     async fn test_write() {
@@ -315,8 +303,15 @@ mod test {
             },
         ]);
 
-        let mut redis_sink_writer =
-            RedisSinkWriter::mock(schema, vec![0], true, None, None).unwrap();
+        let format_desc = SinkFormatDesc {
+            format: SinkFormat::AppendOnly,
+            encode: SinkEncode::Json,
+            options: BTreeMap::default(),
+        };
+
+        let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc)
+            .await
+            .unwrap();
 
         let chunk_a = StreamChunk::new(
             vec![Op::Insert, Op::Insert, Op::Insert],
@@ -326,8 +321,10 @@ mod test {
             ],
         );
 
+        let mut manager = DeliveryFutureManager::new(0);
+
         redis_sink_writer
-            .write_batch(chunk_a)
+            .write_chunk(chunk_a, manager.start_write_chunk(0, 0))
             .await
             .expect("failed to write batch");
         let expected_a =
@@ -367,14 +364,23 @@ mod test {
             },
         ]);
 
-        let mut redis_sink_writer = RedisSinkWriter::mock(
-            schema,
-            vec![0],
-            true,
-            Some("key-{id}".to_string()),
-            Some("values:{id:{id},name:{name}}".to_string()),
-        )
-        .unwrap();
+        let mut btree_map = BTreeMap::default();
+        btree_map.insert(KEY_FORMAT.to_string(), "key-{id}".to_string());
+        btree_map.insert(
+            VALUE_FORMAT.to_string(),
+            "values:{id:{id},name:{name}}".to_string(),
+        );
+        let format_desc = SinkFormatDesc {
+            format: SinkFormat::AppendOnly,
+            encode: SinkEncode::Template,
+            options: btree_map,
+        };
+
+        let mut redis_sink_writer = RedisSinkWriter::mock(schema, vec![0], &format_desc)
+            .await
+            .unwrap();
+
+        let mut future_manager = DeliveryFutureManager::new(0);
 
         let chunk_a = StreamChunk::new(
             vec![Op::Insert, Op::Insert, Op::Insert],
@@ -385,7 +391,7 @@ mod test {
         );
 
         redis_sink_writer
-            .write_batch(chunk_a)
+            .write_chunk(chunk_a, future_manager.start_write_chunk(0, 0))
             .await
             .expect("failed to write batch");
         let expected_a = vec![
diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs
index ad182e734a33a..3c52cb720dbd4 100644
--- a/src/connector/src/sink/remote.rs
+++ b/src/connector/src/sink/remote.rs
@@ -13,17 +13,23 @@
 // limitations under the License.
 
 use std::collections::HashMap;
+use std::fmt::Formatter;
+use std::future::Future;
 use std::marker::PhantomData;
 use std::ops::Deref;
+use std::time::Instant;
 
 use anyhow::anyhow;
 use async_trait::async_trait;
+use futures::stream::Peekable;
+use futures::{StreamExt, TryFutureExt, TryStreamExt};
 use itertools::Itertools;
 use jni::objects::{JByteArray, JValue, JValueOwned};
 use prost::Message;
 use risingwave_common::array::StreamChunk;
 use risingwave_common::error::anyhow_error;
 use risingwave_common::types::DataType;
+use risingwave_common::util::await_future_with_monitor_error_stream;
 use risingwave_jni_core::jvm_runtime::JVM;
 use risingwave_pb::connector_service::sink_coordinator_stream_request::{
     CommitMetadata, StartCoordinator,
@@ -43,15 +49,17 @@ use risingwave_pb::connector_service::{
 };
 use tokio::sync::mpsc;
 use tokio::sync::mpsc::{Receiver, Sender};
+use tokio_stream::wrappers::ReceiverStream;
 use tracing::warn;
 
 use super::encoder::{JsonEncoder, RowEncoder};
 use crate::sink::coordinate::CoordinatedSinkWriter;
 use crate::sink::encoder::TimestampHandlingMode;
+use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset};
 use crate::sink::writer::{LogSinkerOf, SinkWriter, SinkWriterExt};
 use crate::sink::{
-    DummySinkCommitCoordinator, Result, Sink, SinkCommitCoordinator, SinkError, SinkMetrics,
-    SinkParam, SinkWriterParam,
+    DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkCommitCoordinator, SinkError,
+    SinkMetrics, SinkParam, SinkWriterParam,
 };
 use crate::ConnectorParams;
 
@@ -101,18 +109,12 @@ impl<R: RemoteSinkTrait> TryFrom<SinkParam> for RemoteSink<R> {
 
 impl<R: RemoteSinkTrait> Sink for RemoteSink<R> {
     type Coordinator = DummySinkCommitCoordinator;
-    type LogSinker = LogSinkerOf<RemoteSinkWriter<R>>;
+    type LogSinker = RemoteLogSinker<R>;
 
     const SINK_NAME: &'static str = R::SINK_NAME;
 
     async fn new_log_sinker(&self, writer_param: SinkWriterParam) -> Result<Self::LogSinker> {
-        Ok(RemoteSinkWriter::new(
-            self.param.clone(),
-            writer_param.connector_params,
-            writer_param.sink_metrics.clone(),
-        )
-        .await?
-        .into_log_sinker(writer_param.sink_metrics))
+        RemoteLogSinker::new(self.param.clone(), writer_param).await
     }
 
     async fn validate(&self) -> Result<()> {
@@ -192,6 +194,140 @@ impl<R: RemoteSinkTrait> Sink for RemoteSink<R> {
     }
 }
 
+pub struct RemoteLogSinker<R: RemoteSinkTrait> {
+    writer: RemoteSinkWriter<R>,
+    sink_metrics: SinkMetrics,
+}
+
+impl<R: RemoteSinkTrait> RemoteLogSinker<R> {
+    async fn new(sink_param: SinkParam, writer_param: SinkWriterParam) -> Result<Self> {
+        let writer = RemoteSinkWriter::new(
+            sink_param,
+            writer_param.connector_params,
+            writer_param.sink_metrics.clone(),
+        )
+        .await?;
+        let sink_metrics = writer_param.sink_metrics;
+        Ok(RemoteLogSinker {
+            writer,
+            sink_metrics,
+        })
+    }
+}
+
+/// Await the given future while monitoring on error of the receiver stream.
+async fn await_future_with_monitor_receiver_err<O, F: Future<Output = Result<O>>>(
+    receiver: &mut SinkWriterStreamJniReceiver,
+    future: F,
+) -> Result<O> {
+    match await_future_with_monitor_error_stream(&mut receiver.response_stream, future).await {
+        Ok(result) => result,
+        Err(None) => Err(SinkError::Remote(anyhow!("end of remote receiver stream"))),
+        Err(Some(err)) => Err(SinkError::Internal(err)),
+    }
+}
+
+#[async_trait]
+impl<R: RemoteSinkTrait> LogSinker for RemoteLogSinker<R> {
+    async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> {
+        // Note: this is a total copy of the implementation of LogSinkerOf<impl SinkWriter>,
+        // except that we monitor the future of `log_reader.next_item` with await_future_with_monitor_receiver_err
+        // to monitor the error in the response stream.
+
+        let mut sink_writer = self.writer;
+        let sink_metrics = self.sink_metrics;
+        #[derive(Debug)]
+        enum LogConsumerState {
+            /// Mark that the log consumer is not initialized yet
+            Uninitialized,
+
+            /// Mark that a new epoch has begun.
+            EpochBegun { curr_epoch: u64 },
+
+            /// Mark that the consumer has just received a barrier
+            BarrierReceived { prev_epoch: u64 },
+        }
+
+        let mut state = LogConsumerState::Uninitialized;
+
+        log_reader.init().await?;
+
+        loop {
+            let (epoch, item): (u64, LogStoreReadItem) = await_future_with_monitor_receiver_err(
+                &mut sink_writer.stream_handle.response_rx,
+                log_reader.next_item().map_err(SinkError::Internal),
+            )
+            .await?;
+            if let LogStoreReadItem::UpdateVnodeBitmap(_) = &item {
+                match &state {
+                    LogConsumerState::BarrierReceived { .. } => {}
+                    _ => unreachable!(
+                        "update vnode bitmap can be accepted only right after \
+                    barrier, but current state is {:?}",
+                        state
+                    ),
+                }
+            }
+            // begin_epoch when not previously began
+            state = match state {
+                LogConsumerState::Uninitialized => {
+                    sink_writer.begin_epoch(epoch).await?;
+                    LogConsumerState::EpochBegun { curr_epoch: epoch }
+                }
+                LogConsumerState::EpochBegun { curr_epoch } => {
+                    assert!(
+                        epoch >= curr_epoch,
+                        "new epoch {} should not be below the current epoch {}",
+                        epoch,
+                        curr_epoch
+                    );
+                    LogConsumerState::EpochBegun { curr_epoch: epoch }
+                }
+                LogConsumerState::BarrierReceived { prev_epoch } => {
+                    assert!(
+                        epoch > prev_epoch,
+                        "new epoch {} should be greater than prev epoch {}",
+                        epoch,
+                        prev_epoch
+                    );
+                    sink_writer.begin_epoch(epoch).await?;
+                    LogConsumerState::EpochBegun { curr_epoch: epoch }
+                }
+            };
+            match item {
+                LogStoreReadItem::StreamChunk { chunk, .. } => {
+                    if let Err(e) = sink_writer.write_batch(chunk).await {
+                        sink_writer.abort().await?;
+                        return Err(e);
+                    }
+                }
+                LogStoreReadItem::Barrier { is_checkpoint } => {
+                    let prev_epoch = match state {
+                        LogConsumerState::EpochBegun { curr_epoch } => curr_epoch,
+                        _ => unreachable!("epoch must have begun before handling barrier"),
+                    };
+                    if is_checkpoint {
+                        let start_time = Instant::now();
+                        sink_writer.barrier(true).await?;
+                        sink_metrics
+                            .sink_commit_duration_metrics
+                            .observe(start_time.elapsed().as_millis() as f64);
+                        log_reader
+                            .truncate(TruncateOffset::Barrier { epoch })
+                            .await?;
+                    } else {
+                        sink_writer.barrier(false).await?;
+                    }
+                    state = LogConsumerState::BarrierReceived { prev_epoch }
+                }
+                LogStoreReadItem::UpdateVnodeBitmap(vnode_bitmap) => {
+                    sink_writer.update_vnode_bitmap(vnode_bitmap).await?;
+                }
+            }
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct CoordinatedRemoteSink<R: RemoteSinkTrait>(pub RemoteSink<R>);
 
@@ -286,14 +422,11 @@ impl SinkCoordinatorStreamJniHandle {
     }
 }
 
-const DEFAULT_CHANNEL_SIZE: usize = 16;
-#[derive(Debug)]
-pub struct SinkWriterStreamJniHandle {
+struct SinkWriterStreamJniSender {
     request_tx: Sender<SinkWriterStreamRequest>,
-    response_rx: Receiver<SinkWriterStreamResponse>,
 }
 
-impl SinkWriterStreamJniHandle {
+impl SinkWriterStreamJniSender {
     pub async fn start_epoch(&mut self, epoch: u64) -> Result<()> {
         self.request_tx
             .send(SinkWriterStreamRequest {
@@ -316,33 +449,29 @@ impl SinkWriterStreamJniHandle {
             .map_err(|err| SinkError::Internal(err.into()))
     }
 
-    pub async fn barrier(&mut self, epoch: u64) -> Result<()> {
+    pub async fn barrier(&mut self, epoch: u64, is_checkpoint: bool) -> Result<()> {
         self.request_tx
             .send(SinkWriterStreamRequest {
                 request: Some(SinkRequest::Barrier(Barrier {
                     epoch,
-                    is_checkpoint: false,
+                    is_checkpoint,
                 })),
             })
             .await
             .map_err(|err| SinkError::Internal(err.into()))
     }
+}
 
-    pub async fn commit(&mut self, epoch: u64) -> Result<CommitResponse> {
-        self.request_tx
-            .send(SinkWriterStreamRequest {
-                request: Some(SinkRequest::Barrier(Barrier {
-                    epoch,
-                    is_checkpoint: true,
-                })),
-            })
-            .await
-            .map_err(|err| SinkError::Internal(err.into()))?;
+struct SinkWriterStreamJniReceiver {
+    response_stream: Peekable<ReceiverStream<anyhow::Result<SinkWriterStreamResponse>>>,
+}
 
-        match self.response_rx.recv().await {
-            Some(SinkWriterStreamResponse {
+impl SinkWriterStreamJniReceiver {
+    async fn next_commit_response(&mut self) -> Result<CommitResponse> {
+        match self.response_stream.try_next().await {
+            Ok(Some(SinkWriterStreamResponse {
                 response: Some(sink_writer_stream_response::Response::Commit(rsp)),
-            }) => Ok(rsp),
+            })) => Ok(rsp),
             msg => Err(SinkError::Internal(anyhow!(
                 "should get Sync response but get {:?}",
                 msg
@@ -351,6 +480,53 @@ impl SinkWriterStreamJniHandle {
     }
 }
 
+const DEFAULT_CHANNEL_SIZE: usize = 16;
+struct SinkWriterStreamJniHandle {
+    request_tx: SinkWriterStreamJniSender,
+    response_rx: SinkWriterStreamJniReceiver,
+}
+
+impl std::fmt::Debug for SinkWriterStreamJniHandle {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("SinkWriterStreamJniHandle").finish()
+    }
+}
+
+impl SinkWriterStreamJniHandle {
+    async fn start_epoch(&mut self, epoch: u64) -> Result<()> {
+        await_future_with_monitor_receiver_err(
+            &mut self.response_rx,
+            self.request_tx.start_epoch(epoch),
+        )
+        .await
+    }
+
+    async fn write_batch(&mut self, epoch: u64, batch_id: u64, payload: Payload) -> Result<()> {
+        await_future_with_monitor_receiver_err(
+            &mut self.response_rx,
+            self.request_tx.write_batch(epoch, batch_id, payload),
+        )
+        .await
+    }
+
+    async fn barrier(&mut self, epoch: u64) -> Result<()> {
+        await_future_with_monitor_receiver_err(
+            &mut self.response_rx,
+            self.request_tx.barrier(epoch, false),
+        )
+        .await
+    }
+
+    async fn commit(&mut self, epoch: u64) -> Result<CommitResponse> {
+        await_future_with_monitor_receiver_err(
+            &mut self.response_rx,
+            self.request_tx.barrier(epoch, true),
+        )
+        .await?;
+        self.response_rx.next_commit_response().await
+    }
+}
+
 pub type RemoteSinkWriter<R> = RemoteSinkWriterInner<(), R>;
 pub type CoordinatedRemoteSinkWriter<R> = RemoteSinkWriterInner<Option<SinkMetadata>, R>;
 
@@ -374,10 +550,7 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
         let (request_tx, request_rx) = mpsc::channel(DEFAULT_CHANNEL_SIZE);
         let (response_tx, response_rx) = mpsc::channel(DEFAULT_CHANNEL_SIZE);
 
-        let mut stream_handle = SinkWriterStreamJniHandle {
-            request_tx,
-            response_rx,
-        };
+        let mut response_stream = ReceiverStream::new(response_rx).peekable();
 
         std::thread::spawn(move || {
             let mut env = JVM.get_or_init().unwrap().attach_current_thread().unwrap();
@@ -388,7 +561,10 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
                 "(JJ)V",
                 &[
                     JValue::from(&request_rx as *const Receiver<SinkWriterStreamRequest> as i64),
-                    JValue::from(&response_tx as *const Sender<SinkWriterStreamResponse> as i64),
+                    JValue::from(
+                        &response_tx as *const Sender<anyhow::Result<SinkWriterStreamResponse>>
+                            as i64,
+                    ),
                 ],
             );
 
@@ -410,8 +586,7 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
         };
 
         // First request
-        stream_handle
-            .request_tx
+        request_tx
             .send(sink_writer_stream_request)
             .await
             .map_err(|err| {
@@ -423,17 +598,18 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
             })?;
 
         // First response
-        match stream_handle.response_rx.recv().await {
-            Some(SinkWriterStreamResponse {
+        match response_stream.try_next().await {
+            Ok(Some(SinkWriterStreamResponse {
                 response: Some(sink_writer_stream_response::Response::Start(_)),
-            }) => {}
-            msg => {
+            })) => {}
+            Ok(msg) => {
                 return Err(SinkError::Internal(anyhow!(
                     "should get start response for connector `{}` but get {:?}",
                     R::SINK_NAME,
                     msg
                 )));
             }
+            Err(e) => return Err(SinkError::Internal(e)),
         };
 
         tracing::trace!(
@@ -444,6 +620,11 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
 
         let schema = param.schema();
 
+        let stream_handle = SinkWriterStreamJniHandle {
+            request_tx: SinkWriterStreamJniSender { request_tx },
+            response_rx: SinkWriterStreamJniReceiver { response_stream },
+        };
+
         Ok(Self {
             properties: param.properties,
             epoch: None,
@@ -458,7 +639,7 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
 
     #[cfg(test)]
     fn for_test(
-        response_receiver: Receiver<SinkWriterStreamResponse>,
+        response_receiver: Receiver<anyhow::Result<SinkWriterStreamResponse>>,
         request_sender: Sender<SinkWriterStreamRequest>,
     ) -> RemoteSinkWriter<R> {
         use risingwave_common::catalog::{Field, Schema};
@@ -480,8 +661,12 @@ impl<SM, R: RemoteSinkTrait> RemoteSinkWriterInner<SM, R> {
         ]);
 
         let stream_handle = SinkWriterStreamJniHandle {
-            request_tx: request_sender,
-            response_rx: response_receiver,
+            request_tx: SinkWriterStreamJniSender {
+                request_tx: request_sender,
+            },
+            response_rx: SinkWriterStreamJniReceiver {
+                response_stream: ReceiverStream::new(response_receiver).peekable(),
+            },
         };
 
         RemoteSinkWriter {
@@ -828,12 +1013,12 @@ mod test {
 
         // test commit
         response_sender
-            .send(SinkWriterStreamResponse {
+            .send(Ok(SinkWriterStreamResponse {
                 response: Some(Response::Commit(CommitResponse {
                     epoch: 2022,
                     metadata: None,
                 })),
-            })
+            }))
             .await
             .expect("test failed: failed to sync epoch");
         sink.barrier(true).await.unwrap();
diff --git a/src/connector/src/sink/writer.rs b/src/connector/src/sink/writer.rs
index 37ad452831b2e..64261bb42ab48 100644
--- a/src/connector/src/sink/writer.rs
+++ b/src/connector/src/sink/writer.rs
@@ -12,17 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::future::{Future, Ready};
+use std::pin::pin;
 use std::sync::Arc;
 use std::time::Instant;
 
 use async_trait::async_trait;
+use futures::future::{select, Either};
+use futures::TryFuture;
 use risingwave_common::array::StreamChunk;
 use risingwave_common::buffer::Bitmap;
+use risingwave_common::util::drop_either_future;
 
 use crate::sink::encoder::SerTo;
 use crate::sink::formatter::SinkFormatter;
-use crate::sink::log_store::{LogReader, LogStoreReadItem, TruncateOffset};
-use crate::sink::{LogSinker, Result, SinkMetrics};
+use crate::sink::log_store::{
+    DeliveryFutureManager, DeliveryFutureManagerAddFuture, LogReader, LogStoreReadItem,
+    TruncateOffset,
+};
+use crate::sink::{LogSinker, Result, SinkError, SinkMetrics};
 
 #[async_trait]
 pub trait SinkWriter: Send + 'static {
@@ -48,22 +56,17 @@ pub trait SinkWriter: Send + 'static {
     }
 }
 
-// TODO: remove this trait after KafkaSinkWriter implements SinkWriter
-#[async_trait]
-// An old version of SinkWriter for backward compatibility
-pub trait SinkWriterV1: Send + 'static {
-    async fn write_batch(&mut self, chunk: StreamChunk) -> Result<()>;
-
-    // the following interface is for transactions, if not supported, return Ok(())
-    // start a transaction with epoch number. Note that epoch number should be increasing.
-    async fn begin_epoch(&mut self, epoch: u64) -> Result<()>;
+pub type DummyDeliveryFuture = Ready<std::result::Result<(), SinkError>>;
 
-    // commits the current transaction and marks all messages in the transaction success.
-    async fn commit(&mut self) -> Result<()>;
+pub trait AsyncTruncateSinkWriter: Send + 'static {
+    type DeliveryFuture: TryFuture<Ok = (), Error = SinkError> + Unpin + Send + 'static =
+        DummyDeliveryFuture;
 
-    // aborts the current transaction because some error happens. we should rollback to the last
-    // commit point.
-    async fn abort(&mut self) -> Result<()>;
+    fn write_chunk<'a>(
+        &'a mut self,
+        chunk: StreamChunk,
+        add_future: DeliveryFutureManagerAddFuture<'a, Self::DeliveryFuture>,
+    ) -> impl Future<Output = Result<()>> + Send + 'a;
 }
 
 /// A free-form sink that may output in multiple formats and encodings. Examples include kafka,
@@ -104,12 +107,12 @@ pub trait FormattedSink {
     }
 }
 
-pub struct LogSinkerOf<W: SinkWriter<CommitMetadata = ()>> {
+pub struct LogSinkerOf<W> {
     writer: W,
     sink_metrics: SinkMetrics,
 }
 
-impl<W: SinkWriter<CommitMetadata = ()>> LogSinkerOf<W> {
+impl<W> LogSinkerOf<W> {
     pub fn new(writer: W, sink_metrics: SinkMetrics) -> Self {
         LogSinkerOf {
             writer,
@@ -118,6 +121,7 @@ impl<W: SinkWriter<CommitMetadata = ()>> LogSinkerOf<W> {
     }
 }
 
+#[async_trait]
 impl<W: SinkWriter<CommitMetadata = ()>> LogSinker for LogSinkerOf<W> {
     async fn consume_log_and_sink(self, mut log_reader: impl LogReader) -> Result<()> {
         let mut sink_writer = self.writer;
@@ -222,3 +226,64 @@ where
         }
     }
 }
+
+pub struct AsyncTruncateLogSinkerOf<W: AsyncTruncateSinkWriter> {
+    writer: W,
+    future_manager: DeliveryFutureManager<W::DeliveryFuture>,
+}
+
+impl<W: AsyncTruncateSinkWriter> AsyncTruncateLogSinkerOf<W> {
+    pub fn new(writer: W, max_future_count: usize) -> Self {
+        AsyncTruncateLogSinkerOf {
+            writer,
+            future_manager: DeliveryFutureManager::new(max_future_count),
+        }
+    }
+}
+
+#[async_trait]
+impl<W: AsyncTruncateSinkWriter> LogSinker for AsyncTruncateLogSinkerOf<W> {
+    async fn consume_log_and_sink(mut self, mut log_reader: impl LogReader) -> Result<()> {
+        log_reader.init().await?;
+        loop {
+            let select_result = drop_either_future(
+                select(
+                    pin!(log_reader.next_item()),
+                    pin!(self.future_manager.next_truncate_offset()),
+                )
+                .await,
+            );
+            match select_result {
+                Either::Left(item_result) => {
+                    let (epoch, item) = item_result?;
+                    match item {
+                        LogStoreReadItem::StreamChunk { chunk_id, chunk } => {
+                            let add_future = self.future_manager.start_write_chunk(epoch, chunk_id);
+                            self.writer.write_chunk(chunk, add_future).await?;
+                        }
+                        LogStoreReadItem::Barrier {
+                            is_checkpoint: _is_checkpoint,
+                        } => {
+                            self.future_manager.add_barrier(epoch);
+                        }
+                        LogStoreReadItem::UpdateVnodeBitmap(_) => {}
+                    }
+                }
+                Either::Right(offset_result) => {
+                    let offset = offset_result?;
+                    log_reader.truncate(offset).await?;
+                }
+            }
+        }
+    }
+}
+
+#[easy_ext::ext(AsyncTruncateSinkWriterExt)]
+impl<T> T
+where
+    T: AsyncTruncateSinkWriter + Sized,
+{
+    pub fn into_log_sinker(self, max_future_count: usize) -> AsyncTruncateLogSinkerOf<Self> {
+        AsyncTruncateLogSinkerOf::new(self, max_future_count)
+    }
+}
diff --git a/src/expr/core/src/aggregate/def.rs b/src/expr/core/src/aggregate/def.rs
index f71bfd454a415..964ec46c9f9c4 100644
--- a/src/expr/core/src/aggregate/def.rs
+++ b/src/expr/core/src/aggregate/def.rs
@@ -233,6 +233,9 @@ pub enum AggKind {
     PercentileDisc,
     Mode,
     Grouping,
+
+    /// Return last seen one of the input values.
+    InternalLastSeenValue,
 }
 
 impl AggKind {
@@ -264,6 +267,7 @@ impl AggKind {
             PbType::PercentileDisc => Ok(AggKind::PercentileDisc),
             PbType::Mode => Ok(AggKind::Mode),
             PbType::Grouping => Ok(AggKind::Grouping),
+            PbType::InternalLastSeenValue => Ok(AggKind::InternalLastSeenValue),
             PbType::Unspecified => bail!("Unrecognized agg."),
         }
     }
@@ -294,8 +298,9 @@ impl AggKind {
             Self::VarSamp => PbType::VarSamp,
             Self::PercentileCont => PbType::PercentileCont,
             Self::PercentileDisc => PbType::PercentileDisc,
-            Self::Grouping => PbType::Grouping,
             Self::Mode => PbType::Mode,
+            Self::Grouping => PbType::Grouping,
+            Self::InternalLastSeenValue => PbType::InternalLastSeenValue,
         }
     }
 }
@@ -422,6 +427,7 @@ pub mod agg_kinds {
                 | AggKind::BoolAnd
                 | AggKind::BoolOr
                 | AggKind::ApproxCountDistinct
+                | AggKind::InternalLastSeenValue
         };
     }
     pub use single_value_state;
@@ -450,7 +456,11 @@ impl AggKind {
     /// Get the total phase agg kind from the partial phase agg kind.
     pub fn partial_to_total(self) -> Option<Self> {
         match self {
-            AggKind::BitXor | AggKind::Min | AggKind::Max | AggKind::Sum => Some(self),
+            AggKind::BitXor
+            | AggKind::Min
+            | AggKind::Max
+            | AggKind::Sum
+            | AggKind::InternalLastSeenValue => Some(self),
             AggKind::Sum0 | AggKind::Count => Some(AggKind::Sum0),
             agg_kinds::simply_cannot_two_phase!() => None,
             agg_kinds::rewritten!() => None,
diff --git a/src/expr/core/src/expr/build.rs b/src/expr/core/src/expr/build.rs
index 1ea03bd36f42a..7dffbcd42d66b 100644
--- a/src/expr/core/src/expr/build.rs
+++ b/src/expr/core/src/expr/build.rs
@@ -27,8 +27,13 @@ use super::expr_in::InExpression;
 use super::expr_some_all::SomeAllExpression;
 use super::expr_udf::UdfExpression;
 use super::expr_vnode::VnodeExpression;
-use super::wrapper::{Checked, EvalErrorReport, NonStrict};
-use crate::expr::{BoxedExpression, Expression, InputRefExpression, LiteralExpression};
+use super::wrapper::checked::Checked;
+use super::wrapper::non_strict::NonStrict;
+use super::wrapper::EvalErrorReport;
+use super::NonStrictExpression;
+use crate::expr::{
+    BoxedExpression, Expression, ExpressionBoxExt, InputRefExpression, LiteralExpression,
+};
 use crate::sig::FUNCTION_REGISTRY;
 use crate::{bail, ExprError, Result};
 
@@ -41,8 +46,10 @@ pub fn build_from_prost(prost: &ExprNode) -> Result<BoxedExpression> {
 pub fn build_non_strict_from_prost(
     prost: &ExprNode,
     error_report: impl EvalErrorReport + 'static,
-) -> Result<BoxedExpression> {
-    ExprBuilder::new_non_strict(error_report).build(prost)
+) -> Result<NonStrictExpression> {
+    ExprBuilder::new_non_strict(error_report)
+        .build(prost)
+        .map(NonStrictExpression)
 }
 
 /// Build an expression from protobuf with possibly some wrappers attached to each node.
@@ -153,7 +160,7 @@ impl<E: Build + 'static> BuildBoxed for E {
         prost: &ExprNode,
         build_child: impl Fn(&ExprNode) -> Result<BoxedExpression>,
     ) -> Result<BoxedExpression> {
-        Self::build(prost, build_child).map(Expression::boxed)
+        Self::build(prost, build_child).map(ExpressionBoxExt::boxed)
     }
 }
 
@@ -217,9 +224,9 @@ pub fn build_func_non_strict(
     ret_type: DataType,
     children: Vec<BoxedExpression>,
     error_report: impl EvalErrorReport + 'static,
-) -> Result<BoxedExpression> {
+) -> Result<NonStrictExpression> {
     let expr = build_func(func, ret_type, children)?;
-    let wrapped = ExprBuilder::new_non_strict(error_report).wrap(expr);
+    let wrapped = NonStrictExpression(ExprBuilder::new_non_strict(error_report).wrap(expr));
 
     Ok(wrapped)
 }
diff --git a/src/expr/core/src/expr/mod.rs b/src/expr/core/src/expr/mod.rs
index 37e0104371a3e..48a46f640bf7b 100644
--- a/src/expr/core/src/expr/mod.rs
+++ b/src/expr/core/src/expr/mod.rs
@@ -58,7 +58,7 @@ pub use self::build::*;
 pub use self::expr_input_ref::InputRefExpression;
 pub use self::expr_literal::LiteralExpression;
 pub use self::value::{ValueImpl, ValueRef};
-pub use self::wrapper::EvalErrorReport;
+pub use self::wrapper::*;
 pub use super::{ExprError, Result};
 
 /// Interface of an expression.
@@ -67,6 +67,7 @@ pub use super::{ExprError, Result};
 /// should be implemented. Prefer calling and implementing `eval_v2` instead of `eval` if possible,
 /// to gain the performance benefit of scalar expression.
 #[async_trait::async_trait]
+#[auto_impl::auto_impl(&, Box)]
 pub trait Expression: std::fmt::Debug + Sync + Send {
     /// Get the return data type.
     fn return_type(&self) -> DataType;
@@ -101,23 +102,77 @@ pub trait Expression: std::fmt::Debug + Sync + Send {
     fn eval_const(&self) -> Result<Datum> {
         Err(ExprError::NotConstant)
     }
+}
 
+/// An owned dynamically typed [`Expression`].
+pub type BoxedExpression = Box<dyn Expression>;
+
+/// Extension trait for boxing expressions.
+///
+/// This is not directly made into [`Expression`] trait because...
+/// - an expression does not have to be `'static`,
+/// - and for the ease of `auto_impl`.
+#[easy_ext::ext(ExpressionBoxExt)]
+impl<E: Expression + 'static> E {
     /// Wrap the expression in a Box.
-    fn boxed(self) -> BoxedExpression
-    where
-        Self: Sized + Send + 'static,
-    {
+    pub fn boxed(self) -> BoxedExpression {
         Box::new(self)
     }
 }
 
-// TODO: make this an extension, or implement it on a `NonStrict` newtype.
-impl dyn Expression {
+/// An type-safe wrapper that indicates the inner expression can be evaluated in a non-strict
+/// manner, i.e., developers can directly call `eval_infallible` and `eval_row_infallible` without
+/// checking the result.
+///
+/// This is usually created by non-strict build functions like [`crate::expr::build_non_strict_from_prost`]
+/// and [`crate::expr::build_func_non_strict`]. It can also be created directly by
+/// [`NonStrictExpression::new_topmost`], where only the evaluation of the topmost level expression
+/// node is non-strict and should be treated as a TODO.
+///
+/// Compared to [`crate::expr::wrapper::non_strict::NonStrict`], this is more like an indicator
+/// applied on the root of an expression tree, while the latter is a wrapper that can be applied on
+/// each node of the tree and actually changes the behavior. As a result, [`NonStrictExpression`]
+/// does not implement [`Expression`] trait and instead deals directly with developers.
+#[derive(Debug)]
+pub struct NonStrictExpression<E = BoxedExpression>(E);
+
+impl<E> NonStrictExpression<E>
+where
+    E: Expression,
+{
+    /// Create a non-strict expression directly wrapping the given expression.
+    ///
+    /// Should only be used in tests as evaluation may panic.
+    pub fn for_test(inner: E) -> NonStrictExpression
+    where
+        E: 'static,
+    {
+        NonStrictExpression(inner.boxed())
+    }
+
+    /// Create a non-strict expression from the given expression, where only the evaluation of the
+    /// topmost level expression node is non-strict (which is subtly different from
+    /// [`crate::expr::build_non_strict_from_prost`] where every node is non-strict).
+    ///
+    /// This should be used as a TODO.
+    pub fn new_topmost(
+        inner: E,
+        error_report: impl EvalErrorReport,
+    ) -> NonStrictExpression<impl Expression> {
+        let inner = wrapper::non_strict::NonStrict::new(inner, error_report);
+        NonStrictExpression(inner)
+    }
+
+    /// Get the return data type.
+    pub fn return_type(&self) -> DataType {
+        self.0.return_type()
+    }
+
     /// Evaluate the expression in vectorized execution and assert it succeeds. Returns an array.
     ///
     /// Use with expressions built in non-strict mode.
     pub async fn eval_infallible(&self, input: &DataChunk) -> ArrayRef {
-        self.eval(input).await.expect("evaluation failed")
+        self.0.eval(input).await.expect("evaluation failed")
     }
 
     /// Evaluate the expression in row-based execution and assert it succeeds. Returns a nullable
@@ -125,38 +180,17 @@ impl dyn Expression {
     ///
     /// Use with expressions built in non-strict mode.
     pub async fn eval_row_infallible(&self, input: &OwnedRow) -> Datum {
-        self.eval_row(input).await.expect("evaluation failed")
-    }
-}
-
-/// An owned dynamically typed [`Expression`].
-pub type BoxedExpression = Box<dyn Expression>;
-
-// TODO: avoid the overhead of extra boxing.
-#[async_trait::async_trait]
-impl Expression for BoxedExpression {
-    fn return_type(&self) -> DataType {
-        (**self).return_type()
+        self.0.eval_row(input).await.expect("evaluation failed")
     }
 
-    async fn eval(&self, input: &DataChunk) -> Result<ArrayRef> {
-        (**self).eval(input).await
-    }
-
-    async fn eval_v2(&self, input: &DataChunk) -> Result<ValueImpl> {
-        (**self).eval_v2(input).await
-    }
-
-    async fn eval_row(&self, input: &OwnedRow) -> Result<Datum> {
-        (**self).eval_row(input).await
-    }
-
-    fn eval_const(&self) -> Result<Datum> {
-        (**self).eval_const()
+    /// Unwrap the inner expression.
+    pub fn into_inner(self) -> E {
+        self.0
     }
 
-    fn boxed(self) -> BoxedExpression {
-        self
+    /// Get a reference to the inner expression.
+    pub fn inner(&self) -> &E {
+        &self.0
     }
 }
 
diff --git a/src/expr/core/src/expr/wrapper/checked.rs b/src/expr/core/src/expr/wrapper/checked.rs
index 1e049ad481010..b3b1375c4fa82 100644
--- a/src/expr/core/src/expr/wrapper/checked.rs
+++ b/src/expr/core/src/expr/wrapper/checked.rs
@@ -22,7 +22,7 @@ use crate::expr::{Expression, ValueImpl};
 
 /// A wrapper of [`Expression`] that does extra checks after evaluation.
 #[derive(Debug)]
-pub struct Checked<E>(pub E);
+pub(crate) struct Checked<E>(pub E);
 
 // TODO: avoid the overhead of extra boxing.
 #[async_trait]
diff --git a/src/expr/core/src/expr/wrapper/mod.rs b/src/expr/core/src/expr/wrapper/mod.rs
index 48241d05de45c..16988a050ad8d 100644
--- a/src/expr/core/src/expr/wrapper/mod.rs
+++ b/src/expr/core/src/expr/wrapper/mod.rs
@@ -12,8 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-mod checked;
-mod non_strict;
+pub(crate) mod checked;
+pub(crate) mod non_strict;
 
-pub use checked::Checked;
-pub use non_strict::{EvalErrorReport, NonStrict};
+pub use non_strict::{EvalErrorReport, LogReport};
diff --git a/src/expr/core/src/expr/wrapper/non_strict.rs b/src/expr/core/src/expr/wrapper/non_strict.rs
index 0859cea27aa49..782456023cdf7 100644
--- a/src/expr/core/src/expr/wrapper/non_strict.rs
+++ b/src/expr/core/src/expr/wrapper/non_strict.rs
@@ -23,7 +23,7 @@ use crate::expr::{Expression, ValueImpl};
 use crate::ExprError;
 
 /// Report an error during evaluation.
-#[auto_impl(Arc)]
+#[auto_impl(&, Arc)]
 pub trait EvalErrorReport: Clone + Send + Sync {
     /// Perform the error reporting.
     ///
@@ -42,11 +42,21 @@ impl EvalErrorReport for ! {
     }
 }
 
+/// Log the error to report an error during evaluation.
+#[derive(Clone)]
+pub struct LogReport;
+
+impl EvalErrorReport for LogReport {
+    fn report(&self, error: ExprError) {
+        tracing::error!(%error, "failed to evaluate expression");
+    }
+}
+
 /// A wrapper of [`Expression`] that evaluates in a non-strict way. Basically...
 /// - When an error occurs during chunk-level evaluation, recompute in row-based execution and pad
 ///   with NULL for each failed row.
 /// - Report all error occurred during row-level evaluation to the [`EvalErrorReport`].
-pub struct NonStrict<E, R> {
+pub(crate) struct NonStrict<E, R> {
     inner: E,
     report: R,
 }
diff --git a/src/expr/core/src/lib.rs b/src/expr/core/src/lib.rs
index c2f46d5632274..b49c4ae161dfc 100644
--- a/src/expr/core/src/lib.rs
+++ b/src/expr/core/src/lib.rs
@@ -17,7 +17,7 @@
 #![feature(lint_reasons)]
 #![feature(iterator_try_collect)]
 #![feature(lazy_cell)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(arc_unwrap_or_clone)]
 #![feature(never_type)]
 
diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml
index 81cd685c4dc27..cc0229f83ebab 100644
--- a/src/expr/impl/Cargo.toml
+++ b/src/expr/impl/Cargo.toml
@@ -29,6 +29,7 @@ futures-async-stream = { workspace = true }
 futures-util = "0.3"
 hex = "0.4"
 itertools = "0.11"
+jsonbb = "0.1"
 md5 = "0.7"
 num-traits = "0.2"
 regex = "1"
diff --git a/src/expr/impl/benches/expr.rs b/src/expr/impl/benches/expr.rs
index 1e84d8d8e4825..010508c8de45e 100644
--- a/src/expr/impl/benches/expr.rs
+++ b/src/expr/impl/benches/expr.rs
@@ -170,10 +170,7 @@ fn bench_expr(c: &mut Criterion) {
             // 25: serial array
             SerialArray::from_iter((1..=CHUNK_SIZE).map(|i| Serial::from(i as i64))).into_ref(),
             // 26: jsonb array
-            JsonbArray::from_iter(
-                (1..=CHUNK_SIZE).map(|i| JsonbVal::from(serde_json::Value::Number(i.into()))),
-            )
-            .into_ref(),
+            JsonbArray::from_iter((1..=CHUNK_SIZE).map(|i| JsonbVal::from(i as i64))).into_ref(),
             // 27: int256 array
             Int256Array::from_iter((1..=CHUNK_SIZE).map(|_| Int256::from(1))).into_ref(),
             // 28: extract field for interval
@@ -279,16 +276,16 @@ fn bench_expr(c: &mut Criterion) {
     'sig: for sig in sigs {
         if (sig.inputs_type.iter())
             .chain([&sig.ret_type])
-            .any(|t| !t.is_exact())
+            .any(|t| !t.is_exact() || t.as_exact().is_array())
         {
-            // TODO: support struct and list
+            // TODO: support struct and array
             println!("todo: {sig:?}");
             continue;
         }
         if [
-            "date_trunc(varchar, timestamptz) -> timestamptz",
-            "to_timestamp1(varchar, varchar) -> timestamptz",
-            "to_char(timestamptz, varchar) -> varchar",
+            "date_trunc(character varying, timestamp with time zone) -> timestamp with time zone",
+            "to_timestamp1(character varying, character varying) -> timestamp with time zone",
+            "to_char(timestamp with time zone, character varying) -> character varying",
         ]
         .contains(&format!("{sig:?}").as_str())
         {
@@ -376,6 +373,13 @@ fn bench_expr(c: &mut Criterion) {
             args: match sig.inputs_type.as_slice() {
                 [] => AggArgs::None,
                 [t] => AggArgs::Unary(t.as_exact().clone(), input_index_for_type(t.as_exact())),
+                [t1, t2] => AggArgs::Binary(
+                    [t1.as_exact().clone(), t2.as_exact().clone()],
+                    [
+                        input_index_for_type(t1.as_exact()),
+                        input_index_for_type(t2.as_exact()),
+                    ],
+                ),
                 _ => {
                     println!("todo: {sig:?}");
                     continue;
@@ -393,6 +397,15 @@ fn bench_expr(c: &mut Criterion) {
                 continue;
             }
         };
+        let input = match sig.inputs_type.as_slice() {
+            [] => input.project(&[]),
+            [t] => input.project(&[input_index_for_type(t.as_exact())]),
+            [t1, t2] => input.project(&[
+                input_index_for_type(t1.as_exact()),
+                input_index_for_type(t2.as_exact()),
+            ]),
+            _ => unreachable!(),
+        };
         c.bench_function(&format!("{sig:?}"), |bencher| {
             bencher
                 .to_async(FuturesExecutor)
diff --git a/src/expr/impl/src/aggregate/general.rs b/src/expr/impl/src/aggregate/general.rs
index de1331c524063..f47c94d45f24d 100644
--- a/src/expr/impl/src/aggregate/general.rs
+++ b/src/expr/impl/src/aggregate/general.rs
@@ -62,6 +62,15 @@ fn last_value<T>(_: T, input: T) -> T {
     input
 }
 
+#[aggregate("internal_last_seen_value(*) -> auto", state = "ref")]
+fn internal_last_seen_value<T>(state: T, input: T, retract: bool) -> T {
+    if retract {
+        state
+    } else {
+        input
+    }
+}
+
 /// Note the following corner cases:
 ///
 /// ```slt
diff --git a/src/expr/impl/src/aggregate/jsonb_agg.rs b/src/expr/impl/src/aggregate/jsonb_agg.rs
index 8385e2c6a060b..96f5e50da85e3 100644
--- a/src/expr/impl/src/aggregate/jsonb_agg.rs
+++ b/src/expr/impl/src/aggregate/jsonb_agg.rs
@@ -12,22 +12,21 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use risingwave_common::types::JsonbVal;
+use risingwave_common::estimate_size::EstimateSize;
+use risingwave_common::types::{JsonbRef, JsonbVal, ScalarImpl, F32, F64};
+use risingwave_expr::aggregate::AggStateDyn;
 use risingwave_expr::{aggregate, ExprError, Result};
-use serde_json::Value;
 
 #[aggregate("jsonb_agg(boolean) -> jsonb")]
 #[aggregate("jsonb_agg(*int) -> jsonb")]
 #[aggregate("jsonb_agg(*float) -> jsonb")]
 #[aggregate("jsonb_agg(varchar) -> jsonb")]
 #[aggregate("jsonb_agg(jsonb) -> jsonb")]
-fn jsonb_agg(state: Option<JsonbVal>, input: Option<impl Into<Value>>) -> JsonbVal {
-    let mut jsonb = state.unwrap_or_else(|| Value::Array(Vec::with_capacity(1)).into());
-    match jsonb.as_serde_mut() {
-        Value::Array(a) => a.push(input.map_or(Value::Null, Into::into)),
-        _ => unreachable!("invalid jsonb state"),
-    };
-    jsonb
+fn jsonb_agg(state: &mut JsonbArrayState, input: Option<impl ToJson>) {
+    match input {
+        Some(input) => input.add_to(&mut state.0),
+        None => state.0.add_null(),
+    }
 }
 
 #[aggregate("jsonb_object_agg(varchar, boolean) -> jsonb")]
@@ -36,15 +35,130 @@ fn jsonb_agg(state: Option<JsonbVal>, input: Option<impl Into<Value>>) -> JsonbV
 #[aggregate("jsonb_object_agg(varchar, varchar) -> jsonb")]
 #[aggregate("jsonb_object_agg(varchar, jsonb) -> jsonb")]
 fn jsonb_object_agg(
-    state: Option<JsonbVal>,
+    state: &mut JsonbObjectState,
     key: Option<&str>,
-    value: Option<impl Into<Value>>,
-) -> Result<JsonbVal> {
+    value: Option<impl ToJson>,
+) -> Result<()> {
     let key = key.ok_or(ExprError::FieldNameNull)?;
-    let mut jsonb = state.unwrap_or_else(|| Value::Object(Default::default()).into());
-    match jsonb.as_serde_mut() {
-        Value::Object(map) => map.insert(key.into(), value.map_or(Value::Null, Into::into)),
-        _ => unreachable!("invalid jsonb state"),
-    };
-    Ok(jsonb)
+    state.0.add_string(key);
+    match value {
+        Some(value) => value.add_to(&mut state.0),
+        None => state.0.add_null(),
+    }
+    Ok(())
+}
+
+#[derive(Debug)]
+struct JsonbArrayState(jsonbb::Builder);
+
+impl EstimateSize for JsonbArrayState {
+    fn estimated_heap_size(&self) -> usize {
+        self.0.capacity()
+    }
+}
+
+impl AggStateDyn for JsonbArrayState {}
+
+/// Creates an initial state.
+impl Default for JsonbArrayState {
+    fn default() -> Self {
+        let mut builder = jsonbb::Builder::default();
+        builder.begin_array();
+        Self(builder)
+    }
+}
+
+/// Finishes aggregation and returns the result.
+impl From<&JsonbArrayState> for ScalarImpl {
+    fn from(builder: &JsonbArrayState) -> Self {
+        // TODO: avoid clone
+        let mut builder = builder.0.clone();
+        builder.end_array();
+        let jsonb: JsonbVal = builder.finish().into();
+        jsonb.into()
+    }
+}
+
+#[derive(Debug)]
+struct JsonbObjectState(jsonbb::Builder);
+
+impl EstimateSize for JsonbObjectState {
+    fn estimated_heap_size(&self) -> usize {
+        self.0.capacity()
+    }
+}
+
+impl AggStateDyn for JsonbObjectState {}
+
+/// Creates an initial state.
+impl Default for JsonbObjectState {
+    fn default() -> Self {
+        let mut builder = jsonbb::Builder::default();
+        builder.begin_object();
+        Self(builder)
+    }
+}
+
+/// Finishes aggregation and returns the result.
+impl From<&JsonbObjectState> for ScalarImpl {
+    fn from(builder: &JsonbObjectState) -> Self {
+        // TODO: avoid clone
+        let mut builder = builder.0.clone();
+        builder.end_object();
+        let jsonb: JsonbVal = builder.finish().into();
+        jsonb.into()
+    }
+}
+
+/// Values that can be converted to JSON.
+trait ToJson {
+    fn add_to(self, builder: &mut jsonbb::Builder);
+}
+
+impl ToJson for bool {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_bool(self);
+    }
+}
+
+impl ToJson for i16 {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_i64(self as _);
+    }
+}
+
+impl ToJson for i32 {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_i64(self as _);
+    }
+}
+
+impl ToJson for i64 {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_i64(self);
+    }
+}
+
+impl ToJson for F32 {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_f64(self.0 as f64);
+    }
+}
+
+impl ToJson for F64 {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_f64(self.0);
+    }
+}
+
+impl ToJson for &str {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_string(self);
+    }
+}
+
+impl ToJson for JsonbRef<'_> {
+    fn add_to(self, builder: &mut jsonbb::Builder) {
+        builder.add_value(self.into());
+    }
 }
diff --git a/src/expr/impl/src/lib.rs b/src/expr/impl/src/lib.rs
index a5906e4320282..6ea82d30ac5f1 100644
--- a/src/expr/impl/src/lib.rs
+++ b/src/expr/impl/src/lib.rs
@@ -28,7 +28,7 @@
 #![feature(exclusive_range_pattern)]
 #![feature(lazy_cell)]
 #![feature(round_ties_even)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(test)]
 #![feature(arc_unwrap_or_clone)]
 
diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs
index 889cc43fe6b18..c173c76c330c5 100644
--- a/src/expr/impl/src/scalar/cast.rs
+++ b/src/expr/impl/src/scalar/cast.rs
@@ -22,7 +22,9 @@ use risingwave_common::cast;
 use risingwave_common::row::OwnedRow;
 use risingwave_common::types::{DataType, Int256, IntoOrdered, JsonbRef, ToText, F64};
 use risingwave_common::util::iter_util::ZipEqFast;
-use risingwave_expr::expr::{build_func, Context, Expression, InputRefExpression};
+use risingwave_expr::expr::{
+    build_func, Context, Expression, ExpressionBoxExt, InputRefExpression,
+};
 use risingwave_expr::{function, ExprError, Result};
 use risingwave_pb::expr::expr_node::PbType;
 
diff --git a/src/expr/impl/src/scalar/jsonb_concat.rs b/src/expr/impl/src/scalar/jsonb_concat.rs
index 6277db8d5b981..db469457bb135 100644
--- a/src/expr/impl/src/scalar/jsonb_concat.rs
+++ b/src/expr/impl/src/scalar/jsonb_concat.rs
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use jsonbb::{Value, ValueRef};
 use risingwave_common::types::{JsonbRef, JsonbVal};
 use risingwave_expr::function;
-use serde_json::{json, Value};
 
 /// Concatenates the two jsonbs.
 ///
@@ -59,43 +59,35 @@ use serde_json::{json, Value};
 /// ```
 #[function("jsonb_cat(jsonb, jsonb) -> jsonb")]
 pub fn jsonb_cat(left: JsonbRef<'_>, right: JsonbRef<'_>) -> JsonbVal {
-    let left_val = left.value().clone();
-    let right_val = right.value().clone();
-    match (left_val, right_val) {
+    match (left.into(), right.into()) {
         // left and right are object based.
         // This would have left:{'a':1}, right:{'b':2} -> {'a':1,'b':2}
-        (Value::Object(mut left_map), Value::Object(right_map)) => {
-            left_map.extend(right_map);
-            JsonbVal::from(Value::Object(left_map))
+        (ValueRef::Object(left), ValueRef::Object(right)) => {
+            JsonbVal::from(Value::object(left.iter().chain(right.iter())))
         }
 
         // left and right are array-based.
         // This would merge both arrays into one array.
         // This would have left:[1,2], right:[3,4] -> [1,2,3,4]
-        (Value::Array(mut left_arr), Value::Array(right_arr)) => {
-            left_arr.extend(right_arr);
-            JsonbVal::from(Value::Array(left_arr))
+        (ValueRef::Array(left), ValueRef::Array(right)) => {
+            JsonbVal::from(Value::array(left.iter().chain(right.iter())))
         }
 
         // One operand is an array, and the other is a single element.
         // This would insert the non-array value as another element into the array
         // Eg left:[1,2] right: {'a':1} -> [1,2,{'a':1}]
-        (Value::Array(mut left_arr), single_val) => {
-            left_arr.push(single_val);
-            JsonbVal::from(Value::Array(left_arr))
-        }
+        (ValueRef::Array(left), value) => JsonbVal::from(Value::array(left.iter().chain([value]))),
 
         // One operand is an array, and the other is a single element.
         // This would insert the non-array value as another element into the array
         // Eg left:{'a':1} right:[1,2] -> [{'a':1},1,2]
-        (single_val, Value::Array(mut right_arr)) => {
-            right_arr.insert(0, single_val);
-            JsonbVal::from(Value::Array(right_arr))
+        (value, ValueRef::Array(right)) => {
+            JsonbVal::from(Value::array([value].into_iter().chain(right.iter())))
         }
 
         // Both are non-array inputs.
         // Both elements would be placed together in an array
         // Eg left:1 right: 2 -> [1,2]
-        (left, right) => JsonbVal::from(json!([left, right])),
+        (left, right) => JsonbVal::from(Value::array([left, right])),
     }
 }
diff --git a/src/expr/impl/src/table_function/generate_series.rs b/src/expr/impl/src/table_function/generate_series.rs
index 586fa60de02c2..dfa09b0e215b8 100644
--- a/src/expr/impl/src/table_function/generate_series.rs
+++ b/src/expr/impl/src/table_function/generate_series.rs
@@ -159,7 +159,7 @@ mod tests {
     use risingwave_common::array::DataChunk;
     use risingwave_common::types::test_utils::IntervalTestExt;
     use risingwave_common::types::{DataType, Decimal, Interval, ScalarImpl, Timestamp};
-    use risingwave_expr::expr::{BoxedExpression, Expression, LiteralExpression};
+    use risingwave_expr::expr::{BoxedExpression, ExpressionBoxExt, LiteralExpression};
     use risingwave_expr::table_function::build;
     use risingwave_expr::ExprError;
     use risingwave_pb::expr::table_function::PbType;
diff --git a/src/expr/macro/src/gen.rs b/src/expr/macro/src/gen.rs
index 9155853df5b7b..454d2a3169137 100644
--- a/src/expr/macro/src/gen.rs
+++ b/src/expr/macro/src/gen.rs
@@ -579,9 +579,13 @@ impl FunctionAttr {
 
     /// Generate build function for aggregate function.
     fn generate_agg_build_fn(&self, user_fn: &AggregateFnOrImpl) -> Result<TokenStream2> {
-        let state_type: TokenStream2 = match &self.state {
-            Some(state) if state == "ref" => types::ref_type(&self.ret).parse().unwrap(),
-            Some(state) if state != "ref" => types::owned_type(state).parse().unwrap(),
+        // If the first argument of the aggregate function is of type `&mut T`,
+        // we assume it is a user defined state type.
+        let custom_state = user_fn.accumulate().first_mut_ref_arg.as_ref();
+        let state_type: TokenStream2 = match (custom_state, &self.state) {
+            (Some(s), _) => s.parse().unwrap(),
+            (_, Some(state)) if state == "ref" => types::ref_type(&self.ret).parse().unwrap(),
+            (_, Some(state)) if state != "ref" => types::owned_type(state).parse().unwrap(),
             _ => types::owned_type(&self.ret).parse().unwrap(),
         };
         let let_arrays = self
@@ -603,24 +607,37 @@ impl FunctionAttr {
                 quote! { let #v = unsafe { #a.value_at_unchecked(row_id) }; }
             })
             .collect_vec();
-        let let_state = match &self.state {
-            Some(s) if s == "ref" => {
-                quote! { state0.as_ref().map(|x| x.as_scalar_ref_impl().try_into().unwrap()) }
-            }
-            _ => quote! { state0.take().map(|s| s.try_into().unwrap()) },
+        let downcast_state = if custom_state.is_some() {
+            quote! { let mut state: &mut #state_type = state0.downcast_mut(); }
+        } else if let Some(s) = &self.state && s == "ref" {
+            quote! { let mut state: Option<#state_type> = state0.as_datum_mut().as_ref().map(|x| x.as_scalar_ref_impl().try_into().unwrap()); }
+        } else {
+            quote! { let mut state: Option<#state_type> = state0.as_datum_mut().take().map(|s| s.try_into().unwrap()); }
         };
-        let assign_state = match &self.state {
-            Some(s) if s == "ref" => quote! { state.map(|x| x.to_owned_scalar().into()) },
-            _ => quote! { state.map(|s| s.into()) },
+        let restore_state = if custom_state.is_some() {
+            quote! {}
+        } else if let Some(s) = &self.state && s == "ref" {
+            quote! { *state0.as_datum_mut() = state.map(|x| x.to_owned_scalar().into()); }
+        } else {
+            quote! { *state0.as_datum_mut() = state.map(|s| s.into()); }
         };
-        let create_state = self.init_state.as_ref().map(|state| {
+        let create_state = if custom_state.is_some() {
+            quote! {
+                fn create_state(&self) -> AggregateState {
+                    AggregateState::Any(Box::<#state_type>::default())
+                }
+            }
+        } else if let Some(state) = &self.init_state {
             let state: TokenStream2 = state.parse().unwrap();
             quote! {
                 fn create_state(&self) -> AggregateState {
                     AggregateState::Datum(Some(#state.into()))
                 }
             }
-        });
+        } else {
+            // by default: `AggregateState::Datum(None)`
+            quote! {}
+        };
         let args = (0..self.args.len()).map(|i| format_ident!("v{i}"));
         let args = quote! { #(#args,)* };
         let panic_on_retract = {
@@ -703,17 +720,23 @@ impl FunctionAttr {
                 _ => todo!("multiple arguments are not supported for non-option function"),
             }
         }
-        let get_result = match user_fn {
-            AggregateFnOrImpl::Impl(impl_) if impl_.finalize.is_some() => {
-                quote! {
-                    let state = match state {
-                        Some(s) => s.as_scalar_ref_impl().try_into().unwrap(),
-                        None => return Ok(None),
-                    };
-                    Ok(Some(self.function.finalize(state).into()))
-                }
+        let update_state = if custom_state.is_some() {
+            quote! { _ = #next_state; }
+        } else {
+            quote! { state = #next_state; }
+        };
+        let get_result = if custom_state.is_some() {
+            quote! { Ok(Some(state.downcast_ref::<#state_type>().into())) }
+        } else if let AggregateFnOrImpl::Impl(impl_) = user_fn && impl_.finalize.is_some() {
+            quote! {
+                let state = match state.as_datum() {
+                    Some(s) => s.as_scalar_ref_impl().try_into().unwrap(),
+                    None => return Ok(None),
+                };
+                Ok(Some(self.function.finalize(state).into()))
             }
-            _ => quote! { Ok(state.clone()) },
+        } else {
+            quote! { Ok(state.as_datum().clone()) }
         };
         let function_field = match user_fn {
             AggregateFnOrImpl::Fn(_) => quote! {},
@@ -768,27 +791,25 @@ impl FunctionAttr {
 
                     async fn update(&self, state0: &mut AggregateState, input: &StreamChunk) -> Result<()> {
                         #(#let_arrays)*
-                        let state0 = state0.as_datum_mut();
-                        let mut state: Option<#state_type> = #let_state;
+                        #downcast_state
                         for row_id in input.visibility().iter_ones() {
                             let op = unsafe { *input.ops().get_unchecked(row_id) };
                             #(#let_values)*
-                            state = #next_state;
+                            #update_state
                         }
-                        *state0 = #assign_state;
+                        #restore_state
                         Ok(())
                     }
 
                     async fn update_range(&self, state0: &mut AggregateState, input: &StreamChunk, range: Range<usize>) -> Result<()> {
                         assert!(range.end <= input.capacity());
                         #(#let_arrays)*
-                        let state0 = state0.as_datum_mut();
-                        let mut state: Option<#state_type> = #let_state;
+                        #downcast_state
                         if input.is_compacted() {
                             for row_id in range {
                                 let op = unsafe { *input.ops().get_unchecked(row_id) };
                                 #(#let_values)*
-                                state = #next_state;
+                                #update_state
                             }
                         } else {
                             for row_id in input.visibility().iter_ones() {
@@ -799,15 +820,14 @@ impl FunctionAttr {
                                 }
                                 let op = unsafe { *input.ops().get_unchecked(row_id) };
                                 #(#let_values)*
-                                state = #next_state;
+                                #update_state
                             }
                         }
-                        *state0 = #assign_state;
+                        #restore_state
                         Ok(())
                     }
 
                     async fn get_result(&self, state: &AggregateState) -> Result<Datum> {
-                        let state = state.as_datum();
                         #get_result
                     }
                 }
diff --git a/src/expr/macro/src/lib.rs b/src/expr/macro/src/lib.rs
index 363fc958b557d..50a99cf3fda22 100644
--- a/src/expr/macro/src/lib.rs
+++ b/src/expr/macro/src/lib.rs
@@ -522,6 +522,8 @@ struct UserFunctionAttr {
     retract: bool,
     /// The argument type are `Option`s.
     arg_option: bool,
+    /// If the first argument type is `&mut T`, then `Some(T)`.
+    first_mut_ref_arg: Option<String>,
     /// The return type kind.
     return_type_kind: ReturnTypeKind,
     /// The kind of inner type `T` in `impl Iterator<Item = T>`
diff --git a/src/expr/macro/src/parse.rs b/src/expr/macro/src/parse.rs
index 24cc6942afcee..8e2e8c6d0b2f1 100644
--- a/src/expr/macro/src/parse.rs
+++ b/src/expr/macro/src/parse.rs
@@ -123,6 +123,7 @@ impl From<&syn::Signature> for UserFunctionAttr {
             context: sig.inputs.iter().any(arg_is_context),
             retract: last_arg_is_retract(sig),
             arg_option: args_contain_option(sig),
+            first_mut_ref_arg: first_mut_ref_arg(sig),
             return_type_kind,
             iterator_item_kind,
             core_return_type,
@@ -223,18 +224,15 @@ fn last_arg_is_retract(sig: &syn::Signature) -> bool {
 
 /// Check if any argument is `Option`.
 fn args_contain_option(sig: &syn::Signature) -> bool {
-    if sig.inputs.is_empty() {
-        return false;
-    }
     for arg in &sig.inputs {
         let syn::FnArg::Typed(arg) = arg else {
-            return false;
+            continue;
         };
         let syn::Type::Path(path) = arg.ty.as_ref() else {
-            return false;
+            continue;
         };
         let Some(seg) = path.path.segments.last() else {
-            return false;
+            continue;
         };
         if seg.ident == "Option" {
             return true;
@@ -243,6 +241,26 @@ fn args_contain_option(sig: &syn::Signature) -> bool {
     false
 }
 
+/// Returns `T` if the first argument (except `self`) is `&mut T`.
+fn first_mut_ref_arg(sig: &syn::Signature) -> Option<String> {
+    let arg = match sig.inputs.first()? {
+        syn::FnArg::Typed(arg) => arg,
+        syn::FnArg::Receiver(_) => match sig.inputs.iter().nth(1)? {
+            syn::FnArg::Typed(arg) => arg,
+            _ => return None,
+        },
+    };
+    let syn::Type::Reference(syn::TypeReference {
+        elem,
+        mutability: Some(_),
+        ..
+    }) = arg.ty.as_ref()
+    else {
+        return None;
+    };
+    Some(elem.to_token_stream().to_string())
+}
+
 /// Check the return type.
 fn check_type(ty: &syn::Type) -> (ReturnTypeKind, &syn::Type) {
     if let Some(inner) = strip_outer_type(ty, "Result") {
diff --git a/src/frontend/planner_test/tests/testdata/output/agg.yaml b/src/frontend/planner_test/tests/testdata/output/agg.yaml
index aefb4df98ef4e..baa77dc79c89b 100644
--- a/src/frontend/planner_test/tests/testdata/output/agg.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/agg.yaml
@@ -1395,20 +1395,20 @@
         sq_1.col_2;
   batch_plan: |-
     BatchExchange { order: [], dist: Single }
-    └─BatchProject { exprs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))))] }
-      └─BatchHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], aggs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))))] }
-        └─BatchExchange { order: [], dist: HashShard(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))) }
-          └─BatchHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], aggs: [max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)))] }
-            └─BatchSortAgg { group_key: [lineitem.l_orderkey], aggs: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))] }
+    └─BatchProject { exprs: [max(max(internal_last_seen_value(lineitem.l_commitdate)))] }
+      └─BatchHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate)], aggs: [max(max(internal_last_seen_value(lineitem.l_commitdate)))] }
+        └─BatchExchange { order: [], dist: HashShard(internal_last_seen_value(lineitem.l_commitdate)) }
+          └─BatchHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate)], aggs: [max(internal_last_seen_value(lineitem.l_commitdate))] }
+            └─BatchSortAgg { group_key: [lineitem.l_orderkey], aggs: [internal_last_seen_value(lineitem.l_commitdate)] }
               └─BatchScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_commitdate], distribution: UpstreamHashShard(lineitem.l_orderkey) }
   stream_plan: |-
-    StreamMaterialize { columns: [col_0, first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))(hidden)], stream_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], pk_columns: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], pk_conflict: NoCheck }
-    └─StreamProject { exprs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)))), first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))] }
-      └─StreamHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))], aggs: [max(max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)))), count] }
-        └─StreamExchange { dist: HashShard(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))) }
-          └─StreamHashAgg { group_key: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)), $expr1], aggs: [max(first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC))), count] }
-            └─StreamProject { exprs: [lineitem.l_orderkey, first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)), Vnode(lineitem.l_orderkey) as $expr1] }
-              └─StreamHashAgg { group_key: [lineitem.l_orderkey], aggs: [first_value(lineitem.l_commitdate order_by(lineitem.l_commitdate ASC)), count] }
+    StreamMaterialize { columns: [col_0, internal_last_seen_value(lineitem.l_commitdate)(hidden)], stream_key: [internal_last_seen_value(lineitem.l_commitdate)], pk_columns: [internal_last_seen_value(lineitem.l_commitdate)], pk_conflict: NoCheck }
+    └─StreamProject { exprs: [max(max(internal_last_seen_value(lineitem.l_commitdate))), internal_last_seen_value(lineitem.l_commitdate)] }
+      └─StreamHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate)], aggs: [max(max(internal_last_seen_value(lineitem.l_commitdate))), count] }
+        └─StreamExchange { dist: HashShard(internal_last_seen_value(lineitem.l_commitdate)) }
+          └─StreamHashAgg { group_key: [internal_last_seen_value(lineitem.l_commitdate), $expr1], aggs: [max(internal_last_seen_value(lineitem.l_commitdate)), count] }
+            └─StreamProject { exprs: [lineitem.l_orderkey, internal_last_seen_value(lineitem.l_commitdate), Vnode(lineitem.l_orderkey) as $expr1] }
+              └─StreamHashAgg { group_key: [lineitem.l_orderkey], aggs: [internal_last_seen_value(lineitem.l_commitdate), count] }
                 └─StreamTableScan { table: lineitem, columns: [lineitem.l_orderkey, lineitem.l_commitdate], pk: [lineitem.l_orderkey], dist: UpstreamHashShard(lineitem.l_orderkey) }
 - name: two phase agg on hop window input should use two phase agg
   sql: |
diff --git a/src/frontend/planner_test/tests/testdata/output/append_only.yaml b/src/frontend/planner_test/tests/testdata/output/append_only.yaml
index 184abd564c32b..d693d3fc942df 100644
--- a/src/frontend/planner_test/tests/testdata/output/append_only.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/append_only.yaml
@@ -14,11 +14,12 @@
     select t1.v1 as id, v2, v3 from t1 join t2 on t1.v1=t2.v1;
   stream_plan: |-
     StreamMaterialize { columns: [id, v2, v3, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, id], pk_columns: [t1._row_id, t2._row_id, id], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v3, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v1) }
-        └─StreamTableScan { table: t2, columns: [t2.v1, t2.v3, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v3, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v1) }
+          └─StreamTableScan { table: t2, columns: [t2.v1, t2.v3, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - sql: |
     create table t1 (v1 int, v2 int) append only;
     select v1 from t1 order by v1 limit 3 offset 3;
diff --git a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml
index fde09972bb66b..ce6724dc91c37 100644
--- a/src/frontend/planner_test/tests/testdata/output/basic_query.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/basic_query.yaml
@@ -234,9 +234,10 @@
         └─BatchValues { rows: [] }
   stream_plan: |-
     StreamMaterialize { columns: [v, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v], pk_columns: [t._row_id, t._row_id#1, v], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] }
-      ├─StreamExchange { dist: HashShard(t.v) }
-      │ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(t.v) }
-        └─StreamFilter { predicate: false:Boolean }
-          └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.v, t._row_id, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.v = t.v, output: [t.v, t._row_id, t._row_id] }
+        ├─StreamExchange { dist: HashShard(t.v) }
+        │ └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(t.v) }
+          └─StreamFilter { predicate: false:Boolean }
+            └─StreamTableScan { table: t, columns: [t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml b/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml
index 236bc31b2503e..2d1b0951089e8 100644
--- a/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/batch_index_join.yaml
@@ -60,9 +60,9 @@
     select t2.c, t2.d, count(distinct t.a) from t join t2 on t.a = t2.c group by t2.c, t2.d;
   batch_plan: |-
     BatchExchange { order: [], dist: Single }
-    └─BatchHashAgg { group_key: [first_value(t2.c order_by(t2.c ASC))], aggs: [first_value(first_value(t2.d order_by(t2.d ASC)) order_by(first_value(t2.d order_by(t2.d ASC)) ASC)), count(t.a)] }
-      └─BatchExchange { order: [], dist: HashShard(first_value(t2.c order_by(t2.c ASC))) }
-        └─BatchHashAgg { group_key: [t.a], aggs: [first_value(t2.c order_by(t2.c ASC)), first_value(t2.d order_by(t2.d ASC))] }
+    └─BatchHashAgg { group_key: [internal_last_seen_value(t2.c)], aggs: [internal_last_seen_value(internal_last_seen_value(t2.d)), count(t.a)] }
+      └─BatchExchange { order: [], dist: HashShard(internal_last_seen_value(t2.c)) }
+        └─BatchHashAgg { group_key: [t.a], aggs: [internal_last_seen_value(t2.c), internal_last_seen_value(t2.d)] }
           └─BatchLookupJoin { type: Inner, predicate: t.a = t2.c, output: [t2.c, t2.d, t.a] }
             └─BatchExchange { order: [], dist: UpstreamHashShard(t.a) }
               └─BatchScan { table: t, columns: [t.a], distribution: SomeShard }
diff --git a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml
index 6f4f8a673c996..e7196f7cf4fea 100644
--- a/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/ch_benchmark.yaml
@@ -134,141 +134,145 @@
                                 └─BatchScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], distribution: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
   stream_plan: |-
     StreamMaterialize { columns: [s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id(hidden), stock.s_w_id(hidden), min(stock.s_quantity)(hidden), $expr2(hidden), region.r_regionkey(hidden), supplier.s_nationkey(hidden)], stream_key: [stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_columns: [n_name, s_name, i_id, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] }
-      │   └─StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] }
-      │     ├─StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] }
-      │     │ └─StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] }
-      │     │   └─StreamExchange { dist: HashShard(stock.s_i_id) }
-      │     │     └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] }
-      │     │       ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
-      │     │       │ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] }
-      │     │       │   ├─StreamExchange { dist: HashShard(region.r_regionkey) }
-      │     │       │   │ └─StreamProject { exprs: [region.r_regionkey] }
-      │     │       │   │   └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
-      │     │       │   │     └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
-      │     │       │   └─StreamExchange { dist: HashShard(nation.n_regionkey) }
-      │     │       │     └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
-      │     │       │       ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
-      │     │       │       │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
-      │     │       │       └─StreamExchange { dist: HashShard(nation.n_nationkey) }
-      │     │       │         └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
-      │     │       └─StreamExchange { dist: HashShard($expr1) }
-      │     │         └─StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] }
-      │     │           └─StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) }
-      │     │             └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
-      │     └─StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all }
-      │       ├─StreamExchange { dist: HashShard(item.i_id) }
-      │       │ └─StreamProject { exprs: [item.i_id, item.i_name] }
-      │       │   └─StreamFilter { predicate: Like(item.i_data, '%b':Varchar) }
-      │       │     └─StreamTableScan { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
-      │       └─StreamExchange { dist: HashShard(stock.s_i_id) }
-      │         └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
-      └─StreamExchange { dist: HashShard(supplier.s_suppkey) }
-        └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] }
-          ├─StreamExchange { dist: HashShard(region.r_regionkey) }
-          │ └─StreamProject { exprs: [region.r_regionkey] }
-          │   └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
-          │     └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
-          └─StreamExchange { dist: HashShard(nation.n_regionkey) }
-            └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
-              ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
-              │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
-              └─StreamExchange { dist: HashShard(nation.n_nationkey) }
-                └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
+    └─StreamExchange { dist: HashShard(stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey) }
+      └─StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] }
+        │   └─StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] }
+        │     ├─StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] }
+        │     │ └─StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] }
+        │     │   └─StreamExchange { dist: HashShard(stock.s_i_id) }
+        │     │     └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] }
+        │     │       ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+        │     │       │ └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] }
+        │     │       │   ├─StreamExchange { dist: HashShard(region.r_regionkey) }
+        │     │       │   │ └─StreamProject { exprs: [region.r_regionkey] }
+        │     │       │   │   └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
+        │     │       │   │     └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
+        │     │       │   └─StreamExchange { dist: HashShard(nation.n_regionkey) }
+        │     │       │     └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
+        │     │       │       ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
+        │     │       │       │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+        │     │       │       └─StreamExchange { dist: HashShard(nation.n_nationkey) }
+        │     │       │         └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
+        │     │       └─StreamExchange { dist: HashShard($expr1) }
+        │     │         └─StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] }
+        │     │           └─StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) }
+        │     │             └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
+        │     └─StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all }
+        │       ├─StreamExchange { dist: HashShard(item.i_id) }
+        │       │ └─StreamProject { exprs: [item.i_id, item.i_name] }
+        │       │   └─StreamFilter { predicate: Like(item.i_data, '%b':Varchar) }
+        │       │     └─StreamTableScan { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
+        │       └─StreamExchange { dist: HashShard(stock.s_i_id) }
+        │         └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
+        └─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+          └─StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] }
+            ├─StreamExchange { dist: HashShard(region.r_regionkey) }
+            │ └─StreamProject { exprs: [region.r_regionkey] }
+            │   └─StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
+            │     └─StreamTableScan { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) }
+            └─StreamExchange { dist: HashShard(nation.n_regionkey) }
+              └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] }
+                ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
+                │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+                └─StreamExchange { dist: HashShard(nation.n_nationkey) }
+                  └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id(hidden), stock.s_w_id(hidden), min(stock.s_quantity)(hidden), $expr2(hidden), region.r_regionkey(hidden), supplier.s_nationkey(hidden)], stream_key: [stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_columns: [n_name, s_name, i_id, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), region.r_regionkey, supplier.s_nationkey, $expr2], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([2]) from 1
-        └── StreamExchange Hash([0]) from 11
+    └── StreamExchange Hash([8, 9, 10, 11, 12, 13]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: $expr2 = supplier.s_suppkey, output: [supplier.s_suppkey, supplier.s_name, nation.n_name, item.i_id, item.i_name, supplier.s_address, supplier.s_phone, supplier.s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([2]) from 2
+    └── StreamExchange Hash([0]) from 12
+
+    Fragment 2
     StreamProject { exprs: [item.i_id, item.i_name, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr2, stock.s_i_id, stock.s_w_id, min(stock.s_quantity)] }
     └── StreamHashJoin { type: Inner, predicate: stock.s_i_id = item.i_id AND min(stock.s_quantity) = stock.s_quantity AND stock.s_i_id = stock.s_i_id, output: [item.i_id, item.i_name, stock.s_i_id, stock.s_w_id, stock.s_i_id, min(stock.s_quantity)] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
         ├── StreamProject { exprs: [stock.s_i_id, min(stock.s_quantity)] }
         │   └── StreamHashAgg { group_key: [stock.s_i_id], aggs: [min(stock.s_quantity), count] } { intermediate state table: 9, state tables: [ 8 ], distinct tables: [] }
-        │       └── StreamExchange Hash([0]) from 2
+        │       └── StreamExchange Hash([0]) from 3
         └── StreamHashJoin { type: Inner, predicate: item.i_id = stock.s_i_id, output: all } { left table: 26, right table: 28, left degree table: 27, right degree table: 29 }
-            ├── StreamExchange Hash([0]) from 9
-            └── StreamExchange Hash([0]) from 10
-
-    Fragment 2
-    StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 }
-    ├── StreamExchange Hash([0]) from 3
-    └── StreamExchange Hash([2]) from 8
+            ├── StreamExchange Hash([0]) from 10
+            └── StreamExchange Hash([0]) from 11
 
     Fragment 3
-    StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
+    StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [stock.s_i_id, stock.s_quantity, region.r_regionkey, supplier.s_suppkey, supplier.s_nationkey, stock.s_w_id] } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 }
     ├── StreamExchange Hash([0]) from 4
-    └── StreamExchange Hash([1]) from 5
+    └── StreamExchange Hash([2]) from 9
 
     Fragment 4
+    StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, region.r_regionkey, supplier.s_nationkey] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
+    ├── StreamExchange Hash([0]) from 5
+    └── StreamExchange Hash([1]) from 6
+
+    Fragment 5
     StreamProject { exprs: [region.r_regionkey] }
     └── StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
         └── Chain { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } { state table: 18 }
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 5
+    Fragment 6
     StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } { left table: 19, right table: 21, left degree table: 20, right degree table: 22 }
-    ├── StreamExchange Hash([1]) from 6
-    └── StreamExchange Hash([0]) from 7
+    ├── StreamExchange Hash([1]) from 7
+    └── StreamExchange Hash([0]) from 8
 
-    Fragment 6
+    Fragment 7
     Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 23 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 7
+    Fragment 8
     Chain { table: nation, columns: [nation.n_nationkey, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 24 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 8
+    Fragment 9
     StreamProject { exprs: [stock.s_i_id, stock.s_quantity, ((stock.s_w_id * stock.s_i_id) % 10000:Int32)::Int64 as $expr1, stock.s_w_id] }
     └── StreamFilter { predicate: (stock.s_i_id = stock.s_i_id) }
         └── Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 25 }
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 9
+    Fragment 10
     StreamProject { exprs: [item.i_id, item.i_name] }
     └── StreamFilter { predicate: Like(item.i_data, '%b':Varchar) }
         └── Chain { table: item, columns: [item.i_id, item.i_name, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } { state table: 30 }
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 10
+    Fragment 11
     Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 31 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 11
+    Fragment 12
     StreamHashJoin { type: Inner, predicate: region.r_regionkey = nation.n_regionkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, region.r_regionkey, supplier.s_nationkey] } { left table: 32, right table: 34, left degree table: 33, right degree table: 35 }
-    ├── StreamExchange Hash([0]) from 12
-    └── StreamExchange Hash([6]) from 13
+    ├── StreamExchange Hash([0]) from 13
+    └── StreamExchange Hash([6]) from 14
 
-    Fragment 12
+    Fragment 13
     StreamProject { exprs: [region.r_regionkey] }
     └── StreamFilter { predicate: Like(region.r_name, 'EUROP%':Varchar) }
         └── Chain { table: region, columns: [region.r_regionkey, region.r_name], pk: [region.r_regionkey], dist: UpstreamHashShard(region.r_regionkey) } { state table: 36 }
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 13
+    Fragment 14
     StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, supplier.s_comment, nation.n_name, nation.n_regionkey, supplier.s_nationkey, nation.n_nationkey] } { left table: 37, right table: 39, left degree table: 38, right degree table: 40 }
-    ├── StreamExchange Hash([3]) from 14
-    └── StreamExchange Hash([0]) from 15
+    ├── StreamExchange Hash([3]) from 15
+    └── StreamExchange Hash([0]) from 16
 
-    Fragment 14
+    Fragment 15
     Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey, supplier.s_phone, supplier.s_comment], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 41 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 15
+    Fragment 16
     Chain { table: nation, columns: [nation.n_nationkey, nation.n_name, nation.n_regionkey], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 42 }
     ├── Upstream
     └── BatchPlanNode
@@ -359,7 +363,7 @@
 
     Table 42 { columns: [ vnode, n_nationkey, nation_backfill_finished, nation_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
 
-    Table 4294967294 { columns: [ s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey ], primary key: [ $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $12 ASC, $13 ASC, $11 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], distribution key: [ 11 ], read pk prefix len hint: 9 }
+    Table 4294967294 { columns: [ s_suppkey, s_name, n_name, i_id, i_name, s_address, s_phone, s_comment, stock.s_i_id, stock.s_w_id, min(stock.s_quantity), $expr2, region.r_regionkey, supplier.s_nationkey ], primary key: [ $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $12 ASC, $13 ASC, $11 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ], distribution key: [ 8, 9, 10, 11, 12, 13 ], read pk prefix len hint: 9 }
 
 - id: ch_q3
   before:
@@ -2496,59 +2500,63 @@
       └─LogicalScan { table: revenue1, columns: [revenue1.total_revenue] }
   stream_plan: |-
     StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no(hidden)], stream_key: [s_suppkey, revenue1.supplier_no, total_revenue], pk_columns: [s_suppkey, revenue1.supplier_no, total_revenue], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
-      ├─StreamExchange { dist: HashShard(revenue1.total_revenue) }
-      │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
-      │   ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
-      │   │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
-      │   └─StreamExchange { dist: HashShard($expr1) }
-      │     └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] }
-      │       └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
-      └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) }
-        └─StreamProject { exprs: [max(max(revenue1.total_revenue))] }
-          └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] }
-            └─StreamExchange { dist: Single }
-              └─StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] }
-                └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] }
-                  └─StreamTableScan { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
+    └─StreamExchange { dist: HashShard(supplier.s_suppkey, revenue1.total_revenue, revenue1.supplier_no) }
+      └─StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
+        ├─StreamExchange { dist: HashShard(revenue1.total_revenue) }
+        │ └─StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
+        │   ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+        │   │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+        │   └─StreamExchange { dist: HashShard($expr1) }
+        │     └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] }
+        │       └─StreamTableScan { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
+        └─StreamExchange { dist: HashShard(max(max(revenue1.total_revenue))) }
+          └─StreamProject { exprs: [max(max(revenue1.total_revenue))] }
+            └─StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] }
+              └─StreamExchange { dist: Single }
+                └─StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] }
+                  └─StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] }
+                    └─StreamTableScan { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no(hidden)], stream_key: [s_suppkey, revenue1.supplier_no, total_revenue], pk_columns: [s_suppkey, revenue1.supplier_no, total_revenue], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([4]) from 1
-        └── StreamExchange Hash([0]) from 4
+    └── StreamExchange Hash([0, 4, 5]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: revenue1.total_revenue = max(max(revenue1.total_revenue)), output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([4]) from 2
+    └── StreamExchange Hash([0]) from 5
+
+    Fragment 2
     StreamHashJoin { type: Inner, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue1.total_revenue, revenue1.supplier_no] }
     ├── left table: 4
     ├── right table: 6
     ├── left degree table: 5
     ├── right degree table: 7
-    ├── StreamExchange Hash([0]) from 2
-    └── StreamExchange Hash([1]) from 3
+    ├── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([1]) from 4
 
-    Fragment 2
+    Fragment 3
     Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 8 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no::Int64 as $expr1, revenue1.supplier_no] }
     └── Chain { table: revenue1, columns: [revenue1.supplier_no, revenue1.total_revenue], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } { state table: 9 }
         ├── Upstream
         └── BatchPlanNode
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [max(max(revenue1.total_revenue))] }
     └── StreamSimpleAgg { aggs: [max(max(revenue1.total_revenue)), count] } { intermediate state table: 11, state tables: [ 10 ], distinct tables: [] }
-        └── StreamExchange Single from 5
+        └── StreamExchange Single from 6
 
-    Fragment 5
+    Fragment 6
     StreamHashAgg { group_key: [$expr2], aggs: [max(revenue1.total_revenue), count] } { intermediate state table: 13, state tables: [ 12 ], distinct tables: [] }
     └── StreamProject { exprs: [revenue1.total_revenue, revenue1.supplier_no, Vnode(revenue1.supplier_no) as $expr2] }
         └── Chain { table: revenue1, columns: [revenue1.total_revenue, revenue1.supplier_no], pk: [revenue1.supplier_no], dist: UpstreamHashShard(revenue1.supplier_no) } { state table: 14 }
@@ -2590,7 +2598,7 @@
 
     Table 14 { columns: [ vnode, supplier_no, revenue1_backfill_finished, revenue1_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
 
-    Table 4294967294 { columns: [ s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no ], primary key: [ $0 ASC, $5 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 4 ], read pk prefix len hint: 3 }
+    Table 4294967294 { columns: [ s_suppkey, s_name, s_address, s_phone, total_revenue, revenue1.supplier_no ], primary key: [ $0 ASC, $5 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 0, 4, 5 ], read pk prefix len hint: 3 }
 
 - id: ch_q16
   before:
@@ -3174,58 +3182,62 @@
                             └─BatchScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_delivery_d], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [s_name, s_address, supplier.s_suppkey(hidden), supplier.s_nationkey(hidden)], stream_key: [supplier.s_suppkey, supplier.s_nationkey], pk_columns: [s_name, supplier.s_suppkey, supplier.s_nationkey], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] }
-      ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
-      │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all }
-      │   ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
-      │   │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
-      │   └─StreamExchange { dist: HashShard(nation.n_nationkey) }
-      │     └─StreamProject { exprs: [nation.n_nationkey] }
-      │       └─StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) }
-      │         └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
-      └─StreamExchange { dist: HashShard($expr1) }
-        └─StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] }
-          └─StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) }
-            └─StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] }
-              └─StreamHashAgg { group_key: [stock.s_i_id, stock.s_w_id, stock.s_quantity], aggs: [sum(order_line.ol_quantity), count] }
-                └─StreamHashJoin { type: LeftSemi, predicate: stock.s_i_id = item.i_id, output: all }
-                  ├─StreamHashJoin { type: Inner, predicate: stock.s_i_id = order_line.ol_i_id, output: [stock.s_i_id, stock.s_w_id, stock.s_quantity, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
-                  │ ├─StreamExchange { dist: HashShard(stock.s_i_id) }
-                  │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
-                  │ └─StreamExchange { dist: HashShard(order_line.ol_i_id) }
-                  │   └─StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
-                  │     └─StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) }
-                  │       └─StreamTableScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) }
-                  └─StreamExchange { dist: HashShard(item.i_id) }
-                    └─StreamProject { exprs: [item.i_id] }
-                      └─StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) }
-                        └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
+    └─StreamExchange { dist: HashShard(supplier.s_suppkey, supplier.s_nationkey) }
+      └─StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] }
+        ├─StreamExchange { dist: HashShard(supplier.s_suppkey) }
+        │ └─StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all }
+        │   ├─StreamExchange { dist: HashShard(supplier.s_nationkey) }
+        │   │ └─StreamTableScan { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) }
+        │   └─StreamExchange { dist: HashShard(nation.n_nationkey) }
+        │     └─StreamProject { exprs: [nation.n_nationkey] }
+        │       └─StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) }
+        │         └─StreamTableScan { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) }
+        └─StreamExchange { dist: HashShard($expr1) }
+          └─StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] }
+            └─StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) }
+              └─StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] }
+                └─StreamHashAgg { group_key: [stock.s_i_id, stock.s_w_id, stock.s_quantity], aggs: [sum(order_line.ol_quantity), count] }
+                  └─StreamHashJoin { type: LeftSemi, predicate: stock.s_i_id = item.i_id, output: all }
+                    ├─StreamHashJoin { type: Inner, predicate: stock.s_i_id = order_line.ol_i_id, output: [stock.s_i_id, stock.s_w_id, stock.s_quantity, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
+                    │ ├─StreamExchange { dist: HashShard(stock.s_i_id) }
+                    │ │ └─StreamTableScan { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) }
+                    │ └─StreamExchange { dist: HashShard(order_line.ol_i_id) }
+                    │   └─StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
+                    │     └─StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) }
+                    │       └─StreamTableScan { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) }
+                    └─StreamExchange { dist: HashShard(item.i_id) }
+                      └─StreamProject { exprs: [item.i_id] }
+                        └─StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) }
+                          └─StreamTableScan { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [s_name, s_address, supplier.s_suppkey(hidden), supplier.s_nationkey(hidden)], stream_key: [supplier.s_suppkey, supplier.s_nationkey], pk_columns: [s_name, supplier.s_suppkey, supplier.s_nationkey], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 4
+    └── StreamExchange Hash([2, 3]) from 1
 
     Fragment 1
-    StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
-    ├── StreamExchange Hash([3]) from 2
-    └── StreamExchange Hash([0]) from 3
+    StreamHashJoin { type: LeftSemi, predicate: supplier.s_suppkey = $expr1, output: [supplier.s_name, supplier.s_address, supplier.s_suppkey, supplier.s_nationkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 5
 
     Fragment 2
+    StreamHashJoin { type: Inner, predicate: supplier.s_nationkey = nation.n_nationkey, output: all } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
+    ├── StreamExchange Hash([3]) from 3
+    └── StreamExchange Hash([0]) from 4
+
+    Fragment 3
     Chain { table: supplier, columns: [supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_nationkey], pk: [supplier.s_suppkey], dist: UpstreamHashShard(supplier.s_suppkey) } { state table: 8 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [nation.n_nationkey] }
     └── StreamFilter { predicate: (nation.n_name = 'CHINA':Varchar) }
         └── Chain { table: nation, columns: [nation.n_nationkey, nation.n_name], pk: [nation.n_nationkey], dist: UpstreamHashShard(nation.n_nationkey) } { state table: 9 }
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [((stock.s_i_id * stock.s_w_id) % 10000:Int32)::Int64 as $expr1, stock.s_i_id, stock.s_w_id, stock.s_quantity] }
     └── StreamFilter { predicate: ((2:Int32 * stock.s_quantity) > sum(order_line.ol_quantity)) }
         └── StreamProject { exprs: [stock.s_i_id, stock.s_w_id, stock.s_quantity, sum(order_line.ol_quantity)] }
@@ -3236,16 +3248,16 @@
                     │   ├── right table: 17
                     │   ├── left degree table: 16
                     │   ├── right degree table: 18
-                    │   ├── StreamExchange Hash([0]) from 5
-                    │   └── StreamExchange Hash([0]) from 6
-                    └── StreamExchange Hash([0]) from 7
+                    │   ├── StreamExchange Hash([0]) from 6
+                    │   └── StreamExchange Hash([0]) from 7
+                    └── StreamExchange Hash([0]) from 8
 
-    Fragment 5
+    Fragment 6
     Chain { table: stock, columns: [stock.s_i_id, stock.s_w_id, stock.s_quantity], pk: [stock.s_w_id, stock.s_i_id], dist: UpstreamHashShard(stock.s_i_id, stock.s_w_id) } { state table: 19 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 6
+    Fragment 7
     StreamProject { exprs: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number] }
     └── StreamFilter { predicate: (order_line.ol_delivery_d > '2010-05-23 12:00:00':Timestamp) }
         └── Chain { table: order_line, columns: [order_line.ol_i_id, order_line.ol_quantity, order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number, order_line.ol_delivery_d], pk: [order_line.ol_w_id, order_line.ol_d_id, order_line.ol_o_id, order_line.ol_number], dist: UpstreamHashShard(order_line.ol_o_id, order_line.ol_d_id, order_line.ol_w_id, order_line.ol_number) }
@@ -3253,7 +3265,7 @@
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 7
+    Fragment 8
     StreamProject { exprs: [item.i_id] }
     └── StreamFilter { predicate: Like(item.i_data, 'co%':Varchar) }
         └── Chain { table: item, columns: [item.i_id, item.i_data], pk: [item.i_id], dist: UpstreamHashShard(item.i_id) } { state table: 21 }
@@ -3304,7 +3316,7 @@
 
     Table 21 { columns: [ vnode, i_id, item_backfill_finished, item_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
 
-    Table 4294967294 { columns: [ s_name, s_address, supplier.s_suppkey, supplier.s_nationkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 2 ], read pk prefix len hint: 3 }
+    Table 4294967294 { columns: [ s_name, s_address, supplier.s_suppkey, supplier.s_nationkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 2, 3 ], read pk prefix len hint: 3 }
 
 - id: ch_q21
   before:
diff --git a/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml b/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml
index 9c961429276a3..dece27002b19b 100644
--- a/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/common_table_expressions.yaml
@@ -23,11 +23,12 @@
           └─LogicalScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id] }
   stream_plan: |-
     StreamMaterialize { columns: [v3, v4, v1, t2._row_id(hidden), t1._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, v3], pk_columns: [t2._row_id, t1._row_id, v3], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t2._row_id, t1._row_id] }
-      ├─StreamExchange { dist: HashShard(t2.v3) }
-      │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      └─StreamExchange { dist: HashShard(t1.v1) }
-        └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+    └─StreamExchange { dist: HashShard(t2.v3, t2._row_id, t1._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t2._row_id, t1._row_id] }
+        ├─StreamExchange { dist: HashShard(t2.v3) }
+        │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        └─StreamExchange { dist: HashShard(t1.v1) }
+          └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
 - sql: |
     create table t1 (v1 int, v2 int);
     create table t2 (v3 int, v4 int);
@@ -79,8 +80,9 @@
         └─LogicalValues { rows: [['cn':Varchar, 'China':Varchar], ['us':Varchar, 'United States':Varchar]], schema: Schema { fields: [*VALUES*_0.column_0:Varchar, *VALUES*_0.column_1:Varchar] } }
   stream_plan: |-
     StreamMaterialize { columns: [v, c, abbr, real, t._row_id(hidden), _row_id(hidden)], stream_key: [t._row_id, _row_id, c], pk_columns: [t._row_id, _row_id, c], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.c = *VALUES*_0.column_0, output: [t.v, t.c, *VALUES*_0.column_0, *VALUES*_0.column_1, t._row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard(t.c) }
-      │ └─StreamTableScan { table: t, columns: [t.v, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(*VALUES*_0.column_0) }
-        └─StreamValues { rows: [['cn':Varchar, 'China':Varchar, 0:Int64], ['us':Varchar, 'United States':Varchar, 1:Int64]] }
+    └─StreamExchange { dist: HashShard(t.c, t._row_id, _row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.c = *VALUES*_0.column_0, output: [t.v, t.c, *VALUES*_0.column_0, *VALUES*_0.column_1, t._row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard(t.c) }
+        │ └─StreamTableScan { table: t, columns: [t.v, t.c, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(*VALUES*_0.column_0) }
+          └─StreamValues { rows: [['cn':Varchar, 'China':Varchar, 0:Int64], ['us':Varchar, 'United States':Varchar, 1:Int64]] }
diff --git a/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml b/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml
index 85d76188f3e76..818fd88b30a20 100644
--- a/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/distribution_derive.yaml
@@ -963,31 +963,35 @@
           └─BatchScan { table: a, columns: [a.k1], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), a.k1(hidden)], stream_key: [ak1.a._row_id, ak1.k1], pk_columns: [ak1.a._row_id, ak1.k1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
-      ├─StreamExchange { dist: HashShard(ak1.k1) }
-      │ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
-      └─StreamHashAgg { group_key: [a.k1], aggs: [count] }
-        └─StreamExchange { dist: HashShard(a.k1) }
-          └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
+    └─StreamExchange { dist: HashShard(ak1.a._row_id, ak1.k1) }
+      └─StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
+        ├─StreamExchange { dist: HashShard(ak1.k1) }
+        │ └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
+        └─StreamHashAgg { group_key: [a.k1], aggs: [count] }
+          └─StreamExchange { dist: HashShard(a.k1) }
+            └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [v, bv, ak1.a._row_id(hidden), ak1.k1(hidden), a.k1(hidden)], stream_key: [ak1.a._row_id, ak1.k1], pk_columns: [ak1.a._row_id, ak1.k1], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-            └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([2, 3]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: ak1.k1 = a.k1, output: [ak1.v, count, ak1.a._row_id, ak1.k1, a.k1] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+        └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } { state table: 4 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
@@ -1022,7 +1026,7 @@
     ├── columns: [ v, bv, ak1.a._row_id, ak1.k1, a.k1 ]
     ├── primary key: [ $2 ASC, $3 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4 ]
-    ├── distribution key: [ 3 ]
+    ├── distribution key: [ 2, 3 ]
     └── read pk prefix len hint: 2
 
 - id: aggk1_join_Ak1_onk1
@@ -1054,31 +1058,35 @@
             └─BatchScan { table: a, columns: [a.k1], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v, bv, a.k1(hidden), ak1.a._row_id(hidden)], stream_key: [a.k1, ak1.a._row_id], pk_columns: [a.k1, ak1.a._row_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
-      ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
-      │ └─StreamExchange { dist: HashShard(a.k1) }
-      │   └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
-      └─StreamExchange { dist: HashShard(ak1.k1) }
-        └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
+    └─StreamExchange { dist: HashShard(a.k1, ak1.a._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
+        ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
+        │ └─StreamExchange { dist: HashShard(a.k1) }
+        │   └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
+        └─StreamExchange { dist: HashShard(ak1.k1) }
+          └─StreamTableScan { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [v, bv, a.k1(hidden), ak1.a._row_id(hidden)], stream_key: [a.k1, ak1.a._row_id], pk_columns: [a.k1, ak1.a._row_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
-        │   └── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([2, 3]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: a.k1 = ak1.k1, output: [ak1.v, count, a.k1, ak1.a._row_id] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
+    │   └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 5 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: ak1, columns: [ak1.k1, ak1.v, ak1.a._row_id], pk: [ak1.a._row_id], dist: UpstreamHashShard(ak1.k1) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
@@ -1113,7 +1121,7 @@
     ├── columns: [ v, bv, a.k1, ak1.a._row_id ]
     ├── primary key: [ $2 ASC, $3 ASC ]
     ├── value indices: [ 0, 1, 2, 3 ]
-    ├── distribution key: [ 2 ]
+    ├── distribution key: [ 2, 3 ]
     └── read pk prefix len hint: 2
 
 - id: aggk1_join_aggk1_onk1
@@ -1156,33 +1164,37 @@
           └─BatchScan { table: b, columns: [b.k1], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [num, bv, a.k1(hidden), b.k1(hidden)], stream_key: [a.k1], pk_columns: [a.k1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
-      ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
-      │ └─StreamExchange { dist: HashShard(a.k1) }
-      │   └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
-      └─StreamHashAgg { group_key: [b.k1], aggs: [count] }
-        └─StreamExchange { dist: HashShard(b.k1) }
-          └─StreamTableScan { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) }
+    └─StreamExchange { dist: HashShard(a.k1) }
+      └─StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
+        ├─StreamHashAgg { group_key: [a.k1], aggs: [count] }
+        │ └─StreamExchange { dist: HashShard(a.k1) }
+        │   └─StreamTableScan { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) }
+        └─StreamHashAgg { group_key: [b.k1], aggs: [count] }
+          └─StreamExchange { dist: HashShard(b.k1) }
+            └─StreamTableScan { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [num, bv, a.k1(hidden), b.k1(hidden)], stream_key: [a.k1], pk_columns: [a.k1], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
-        │   └── StreamExchange Hash([0]) from 1
-        └── StreamHashAgg { group_key: [b.k1], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
-            └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: a.k1 = b.k1, output: [count, count, a.k1, b.k1] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamHashAgg { group_key: [a.k1], aggs: [count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
+    │   └── StreamExchange Hash([0]) from 2
+    └── StreamHashAgg { group_key: [b.k1], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+        └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: a, columns: [a.k1, a._row_id], pk: [a._row_id], dist: UpstreamHashShard(a._row_id) } { state table: 5 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: b, columns: [b.k1, b._row_id], pk: [b._row_id], dist: UpstreamHashShard(b._row_id) } { state table: 7 }
     ├── Upstream
     └── BatchPlanNode
diff --git a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml
index a340014298c47..ab282ebe3858a 100644
--- a/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/dynamic_filter.yaml
@@ -124,16 +124,17 @@
       └─LogicalScan { table: t2, columns: [t2.v2] }
   stream_plan: |-
     StreamMaterialize { columns: [v1, max, t1._row_id(hidden)], stream_key: [t1._row_id, v1], pk_columns: [t1._row_id, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v1 = max(max(t2.v2)), output: [t1.v1, max(max(t2.v2)), t1._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(max(max(t2.v2))) }
-        └─StreamProject { exprs: [max(max(t2.v2))] }
-          └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] }
-            └─StreamExchange { dist: Single }
-              └─StreamHashAgg { group_key: [$expr1], aggs: [max(t2.v2), count] }
-                └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
-                  └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v1 = max(max(t2.v2)), output: [t1.v1, max(max(t2.v2)), t1._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(max(max(t2.v2))) }
+          └─StreamProject { exprs: [max(max(t2.v2))] }
+            └─StreamSimpleAgg { aggs: [max(max(t2.v2)), count] }
+              └─StreamExchange { dist: Single }
+                └─StreamHashAgg { group_key: [$expr1], aggs: [max(t2.v2), count] }
+                  └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
+                    └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: Dynamic filter join on unequal types
   sql: |
     create table t1 (v1 int);
diff --git a/src/frontend/planner_test/tests/testdata/output/except.yaml b/src/frontend/planner_test/tests/testdata/output/except.yaml
index 204a1814b8db7..1e27a7b74c0f0 100644
--- a/src/frontend/planner_test/tests/testdata/output/except.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/except.yaml
@@ -108,20 +108,20 @@
     create table t2 (a int, b numeric, c bigint, primary key(a));
     select * from t1 except select * from t2;
   optimized_logical_plan_for_batch: |-
-    LogicalAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+    LogicalAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
     └─LogicalJoin { type: LeftAnti, on: IsNotDistinctFrom(t1.a, t2.a) AND IsNotDistinctFrom(t1.b, t2.b) AND IsNotDistinctFrom(t1.c, t2.c), output: all }
       ├─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.c] }
       └─LogicalScan { table: t2, columns: [t2.a, t2.b, t2.c] }
   batch_plan: |-
     BatchExchange { order: [], dist: Single }
-    └─BatchHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+    └─BatchHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
       └─BatchLookupJoin { type: LeftAnti, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
         └─BatchExchange { order: [], dist: UpstreamHashShard(t1.a) }
           └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c], distribution: UpstreamHashShard(t1.a) }
   stream_plan: |-
     StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
-    └─StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
-      └─StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
+    └─StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+      └─StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
         └─StreamExchange { dist: HashShard(t1.a) }
           └─StreamHashJoin { type: LeftAnti, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
             ├─StreamExchange { dist: HashShard(t1.a, t1.b, t1.c) }
@@ -130,83 +130,70 @@
               └─StreamTableScan { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) }
   stream_dist_plan: |+
     Fragment 0
-    StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck } { materialized table: 4294967294 }
-    └── StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
-        └── StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
-            ├── intermediate state table: 2
-            ├── state tables: [ 0, 1 ]
+    StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
+    ├── materialized table: 4294967294
+    └── StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+        └── StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
+            ├── intermediate state table: 0
+            ├── state tables: []
             ├── distinct tables: []
             └── StreamExchange Hash([0]) from 1
 
     Fragment 1
     StreamHashJoin { type: LeftAnti, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
-    ├── left table: 3
-    ├── right table: 5
-    ├── left degree table: 4
-    ├── right degree table: 6
+    ├── left table: 1
+    ├── right table: 3
+    ├── left degree table: 2
+    ├── right degree table: 4
     ├── StreamExchange Hash([0, 1, 2]) from 2
     └── StreamExchange Hash([0, 1, 2]) from 3
 
     Fragment 2
-    Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 7 }
+    Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 5 }
     ├── Upstream
     └── BatchPlanNode
 
     Fragment 3
-    Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 8 }
+    Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
 
     Table 0
-    ├── columns: [ t1_a, t1_b, t1_c ]
-    ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
-    ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
-    └── read pk prefix len hint: 1
-
-    Table 1
-    ├── columns: [ t1_a, t1_c, t1_b ]
-    ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
-    ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
-    └── read pk prefix len hint: 1
-
-    Table 2
-    ├── columns: [ t1_a, first_value(t1_b order_by(t1_b ASC)), first_value(t1_c order_by(t1_c ASC)), count ]
+    ├── columns: [ t1_a, internal_last_seen_value(t1_b), internal_last_seen_value(t1_c), count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
-    Table 3
+    Table 1
     ├── columns: [ t1_a, t1_b, t1_c ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 0, 1, 2 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 4
+    Table 2
     ├── columns: [ t1_a, t1_b, t1_c, _degree ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 3 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 5
+    Table 3
     ├── columns: [ t2_a, t2_b, t2_c ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 0, 1, 2 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 6
+    Table 4
     ├── columns: [ t2_a, t2_b, t2_c, _degree ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 3 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 7
+    Table 5
     ├── columns: [ vnode, a, t1_backfill_finished, t1_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
@@ -214,7 +201,7 @@
     ├── read pk prefix len hint: 1
     └── vnode column idx: 0
 
-    Table 8
+    Table 6
     ├── columns: [ vnode, a, t2_backfill_finished, t2_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
diff --git a/src/frontend/planner_test/tests/testdata/output/intersect.yaml b/src/frontend/planner_test/tests/testdata/output/intersect.yaml
index c203f1f953814..91839346824ec 100644
--- a/src/frontend/planner_test/tests/testdata/output/intersect.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/intersect.yaml
@@ -108,20 +108,20 @@
     create table t2 (a int, b numeric, c bigint, primary key(a));
     select * from t1 intersect select * from t2;
   optimized_logical_plan_for_batch: |-
-    LogicalAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+    LogicalAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
     └─LogicalJoin { type: LeftSemi, on: IsNotDistinctFrom(t1.a, t2.a) AND IsNotDistinctFrom(t1.b, t2.b) AND IsNotDistinctFrom(t1.c, t2.c), output: all }
       ├─LogicalScan { table: t1, columns: [t1.a, t1.b, t1.c] }
       └─LogicalScan { table: t2, columns: [t2.a, t2.b, t2.c] }
   batch_plan: |-
     BatchExchange { order: [], dist: Single }
-    └─BatchHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
+    └─BatchHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
       └─BatchLookupJoin { type: LeftSemi, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
         └─BatchExchange { order: [], dist: UpstreamHashShard(t1.a) }
           └─BatchScan { table: t1, columns: [t1.a, t1.b, t1.c], distribution: UpstreamHashShard(t1.a) }
   stream_plan: |-
     StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
-    └─StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
-      └─StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
+    └─StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+      └─StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
         └─StreamExchange { dist: HashShard(t1.a) }
           └─StreamHashJoin { type: LeftSemi, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
             ├─StreamExchange { dist: HashShard(t1.a, t1.b, t1.c) }
@@ -130,83 +130,70 @@
               └─StreamTableScan { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) }
   stream_dist_plan: |+
     Fragment 0
-    StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck } { materialized table: 4294967294 }
-    └── StreamProject { exprs: [t1.a, first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC))] }
-        └── StreamHashAgg { group_key: [t1.a], aggs: [first_value(t1.b order_by(t1.b ASC)), first_value(t1.c order_by(t1.c ASC)), count] }
-            ├── intermediate state table: 2
-            ├── state tables: [ 0, 1 ]
+    StreamMaterialize { columns: [a, b, c], stream_key: [a], pk_columns: [a], pk_conflict: NoCheck }
+    ├── materialized table: 4294967294
+    └── StreamProject { exprs: [t1.a, internal_last_seen_value(t1.b), internal_last_seen_value(t1.c)] }
+        └── StreamHashAgg { group_key: [t1.a], aggs: [internal_last_seen_value(t1.b), internal_last_seen_value(t1.c), count] }
+            ├── intermediate state table: 0
+            ├── state tables: []
             ├── distinct tables: []
             └── StreamExchange Hash([0]) from 1
 
     Fragment 1
     StreamHashJoin { type: LeftSemi, predicate: t1.a IS NOT DISTINCT FROM t2.a AND t1.b IS NOT DISTINCT FROM t2.b AND t1.c IS NOT DISTINCT FROM t2.c, output: all }
-    ├── left table: 3
-    ├── right table: 5
-    ├── left degree table: 4
-    ├── right degree table: 6
+    ├── left table: 1
+    ├── right table: 3
+    ├── left degree table: 2
+    ├── right degree table: 4
     ├── StreamExchange Hash([0, 1, 2]) from 2
     └── StreamExchange Hash([0, 1, 2]) from 3
 
     Fragment 2
-    Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 7 }
+    Chain { table: t1, columns: [t1.a, t1.b, t1.c], pk: [t1.a], dist: UpstreamHashShard(t1.a) } { state table: 5 }
     ├── Upstream
     └── BatchPlanNode
 
     Fragment 3
-    Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 8 }
+    Chain { table: t2, columns: [t2.a, t2.b, t2.c], pk: [t2.a], dist: UpstreamHashShard(t2.a) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
 
     Table 0
-    ├── columns: [ t1_a, t1_b, t1_c ]
-    ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
-    ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
-    └── read pk prefix len hint: 1
-
-    Table 1
-    ├── columns: [ t1_a, t1_c, t1_b ]
-    ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
-    ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
-    └── read pk prefix len hint: 1
-
-    Table 2
-    ├── columns: [ t1_a, first_value(t1_b order_by(t1_b ASC)), first_value(t1_c order_by(t1_c ASC)), count ]
+    ├── columns: [ t1_a, internal_last_seen_value(t1_b), internal_last_seen_value(t1_c), count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
-    Table 3
+    Table 1
     ├── columns: [ t1_a, t1_b, t1_c ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 0, 1, 2 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 4
+    Table 2
     ├── columns: [ t1_a, t1_b, t1_c, _degree ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 3 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 5
+    Table 3
     ├── columns: [ t2_a, t2_b, t2_c ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 0, 1, 2 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 6
+    Table 4
     ├── columns: [ t2_a, t2_b, t2_c, _degree ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
     ├── value indices: [ 3 ]
     ├── distribution key: [ 0, 1, 2 ]
     └── read pk prefix len hint: 3
 
-    Table 7
+    Table 5
     ├── columns: [ vnode, a, t1_backfill_finished, t1_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
@@ -214,7 +201,7 @@
     ├── read pk prefix len hint: 1
     └── vnode column idx: 0
 
-    Table 8
+    Table 6
     ├── columns: [ vnode, a, t2_backfill_finished, t2_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
diff --git a/src/frontend/planner_test/tests/testdata/output/join.yaml b/src/frontend/planner_test/tests/testdata/output/join.yaml
index 4ef01cc84ab47..a61d2a0d73327 100644
--- a/src/frontend/planner_test/tests/testdata/output/join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/join.yaml
@@ -14,14 +14,15 @@
         └─LogicalScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id] }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v1 = t3.v5, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
-      ├─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
-      │ ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │ └─StreamExchange { dist: HashShard(t2.v3) }
-      │   └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      └─StreamExchange { dist: HashShard(t3.v5) }
-        └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id, t3._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v1 = t3.v5, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
+        ├─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
+        │ ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │ └─StreamExchange { dist: HashShard(t2.v3) }
+        │   └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        └─StreamExchange { dist: HashShard(t3.v5) }
+          └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
 - name: self join
   sql: |
     create table t (v1 int, v2 int);
@@ -33,11 +34,12 @@
       └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] }
   stream_plan: |-
     StreamMaterialize { columns: [t1v1, t2v1, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, t1v1], pk_columns: [t._row_id, t._row_id#1, t1v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v1, t._row_id, t._row_id] }
-      ├─StreamExchange { dist: HashShard(t.v1) }
-      │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(t.v1) }
-        └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v1, t._row_id, t._row_id] }
+        ├─StreamExchange { dist: HashShard(t.v1) }
+        │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(t.v1) }
+          └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - sql: |
     create table t1 (v1 int, v2 int);
     create table t2 (v1 int, v2 int);
@@ -65,15 +67,16 @@
       └─BatchScan { table: t3, columns: [t3.v1, t3.v2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [t1_v1, t1_v2, t2_v1, t2_v2, t3_v1, t3_v2, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1, t3._row_id, t2_v2], pk_columns: [t1._row_id, t2._row_id, t1_v1, t3._row_id, t2_v2], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t2.v2 = t3.v2, output: [t1.v1, t1.v2, t2.v1, t2.v2, t3.v1, t3.v2, t1._row_id, t2._row_id, t3._row_id] }
-      ├─StreamExchange { dist: HashShard(t2.v2) }
-      │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
-      │   ├─StreamExchange { dist: HashShard(t1.v1) }
-      │   │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │   └─StreamExchange { dist: HashShard(t2.v1) }
-      │     └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      └─StreamExchange { dist: HashShard(t3.v2) }
-        └─StreamTableScan { table: t3, columns: [t3.v1, t3.v2, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t2.v2, t1._row_id, t2._row_id, t3._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t2.v2 = t3.v2, output: [t1.v1, t1.v2, t2.v1, t2.v2, t3.v1, t3.v2, t1._row_id, t2._row_id, t3._row_id] }
+        ├─StreamExchange { dist: HashShard(t2.v2) }
+        │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+        │   ├─StreamExchange { dist: HashShard(t1.v1) }
+        │   │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │   └─StreamExchange { dist: HashShard(t2.v1) }
+        │     └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        └─StreamExchange { dist: HashShard(t3.v2) }
+          └─StreamTableScan { table: t3, columns: [t3.v1, t3.v2, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
 - sql: |
     create table t1 (v1 int, v2 int);
     create table t2 (v1 int, v2 int);
@@ -93,11 +96,12 @@
       └─BatchScan { table: t2, columns: [t2.v1, t2.v2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [t1_v2, t2_v2, t1._row_id(hidden), t1.v1(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1.v1], pk_columns: [t1._row_id, t2._row_id, t1.v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v2, t2.v2, t1._row_id, t1.v1, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v1) }
-        └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1._row_id, t1.v1, t2._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v1, output: [t1.v2, t2.v2, t1._row_id, t1.v1, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v1) }
+          └─StreamTableScan { table: t2, columns: [t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - sql: |
     create table t1 (v1 int, v2 int);
     create table t2 (v1 int, v2 int);
@@ -154,11 +158,12 @@
       └─BatchScan { table: i, columns: [i.x], distribution: UpstreamHashShard(i.x) }
   stream_plan: |-
     StreamMaterialize { columns: [ix, iix, i.t._row_id(hidden), i.t._row_id#1(hidden)], stream_key: [i.t._row_id, i.t._row_id#1, ix], pk_columns: [i.t._row_id, i.t._row_id#1, ix], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id] }
-      ├─StreamExchange { dist: HashShard(i.x) }
-      │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
-      └─StreamExchange { dist: HashShard(i.x) }
-        └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
+    └─StreamExchange { dist: HashShard(i.x, i.t._row_id, i.t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: i.x = i.x, output: [i.x, i.x, i.t._row_id, i.t._row_id] }
+        ├─StreamExchange { dist: HashShard(i.x) }
+        │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
+        └─StreamExchange { dist: HashShard(i.x) }
+          └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
 - name: Left & right has same SomeShard distribution. There should still be exchanges below hash join
   sql: |
     create table t(x int);
@@ -170,11 +175,12 @@
       └─BatchScan { table: i, columns: [i.x], distribution: UpstreamHashShard(i.x) }
   stream_plan: |-
     StreamMaterialize { columns: [ix, tx, i.t._row_id(hidden), t._row_id(hidden)], stream_key: [i.t._row_id, t._row_id, ix], pk_columns: [i.t._row_id, t._row_id, ix], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: i.x = t.x, output: [i.x, t.x, i.t._row_id, t._row_id] }
-      ├─StreamExchange { dist: HashShard(i.x) }
-      │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
-      └─StreamExchange { dist: HashShard(t.x) }
-        └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(i.x, i.t._row_id, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: i.x = t.x, output: [i.x, t.x, i.t._row_id, t._row_id] }
+        ├─StreamExchange { dist: HashShard(i.x) }
+        │ └─StreamTableScan { table: i, columns: [i.x, i.t._row_id], pk: [i.t._row_id], dist: UpstreamHashShard(i.x) }
+        └─StreamExchange { dist: HashShard(t.x) }
+          └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - name: Left & right has same HashShard distribution. There should be no exchange below hash join
   sql: |
     create table t(x int);
@@ -628,12 +634,13 @@
         └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), $expr1(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, $expr1], pk_columns: [t1._row_id, t2._row_id, $expr1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] }
-      ├─StreamExchange { dist: HashShard($expr1) }
-      │ └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] }
-      │   └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v2) }
-        └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1._row_id, $expr1, t2._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: $expr1 IS NOT DISTINCT FROM t2.v2, output: [t1.v1, t2.v2, t1._row_id, $expr1, t2._row_id] }
+        ├─StreamExchange { dist: HashShard($expr1) }
+        │ └─StreamProject { exprs: [t1.v1, t1.v1::Int64 as $expr1, t1._row_id] }
+        │   └─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v2) }
+          └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: Repeated columns in project should not interfere with join result (https://github.com/risingwavelabs/risingwave/issues/8216)
   sql: |
     create table t(x int);
@@ -652,39 +659,43 @@
     select t1.src p1, t1.dst p2, t2.dst p3 from t t1, t t2, t t3 where t1.dst = t2.src and t2.src = t3.dst and t3.dst = t1.src;
   stream_plan: |-
     StreamMaterialize { columns: [p1, p2, p3, t._row_id(hidden), t._row_id#1(hidden), t.src(hidden), t._row_id#2(hidden)], stream_key: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_columns: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] }
-      ├─StreamExchange { dist: HashShard(t.src) }
-      │ └─StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] }
-      │   ├─StreamExchange { dist: HashShard(t.dst) }
-      │   │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      │   └─StreamExchange { dist: HashShard(t.src) }
-      │     └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(t.dst) }
-        └─StreamTableScan { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.src, t.dst, t._row_id, t._row_id, t.src, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] }
+        ├─StreamExchange { dist: HashShard(t.src) }
+        │ └─StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] }
+        │   ├─StreamExchange { dist: HashShard(t.dst) }
+        │   │ └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        │   └─StreamExchange { dist: HashShard(t.src) }
+        │     └─StreamTableScan { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(t.dst) }
+          └─StreamTableScan { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [p1, p2, p3, t._row_id(hidden), t._row_id#1(hidden), t.src(hidden), t._row_id#2(hidden)], stream_key: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_columns: [t._row_id, t._row_id#1, p2, t._row_id#2, t.src, p1], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 4
+    └── StreamExchange Hash([0, 1, 3, 4, 5, 6]) from 1
 
     Fragment 1
-    StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
-    ├── StreamExchange Hash([1]) from 2
-    └── StreamExchange Hash([0]) from 3
+    StreamHashJoin { type: Inner, predicate: t.src = t.dst AND t.src = t.dst, output: [t.src, t.dst, t.dst, t._row_id, t._row_id, t.src, t._row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 5
 
     Fragment 2
+    StreamHashJoin { type: Inner, predicate: t.dst = t.src, output: [t.src, t.dst, t.src, t.dst, t._row_id, t._row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
+    ├── StreamExchange Hash([1]) from 3
+    └── StreamExchange Hash([0]) from 4
+
+    Fragment 3
     Chain { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 8 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 3
+    Fragment 4
     Chain { table: t, columns: [t.src, t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 9 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 4
+    Fragment 5
     Chain { table: t, columns: [t.dst, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) } { state table: 10 }
     ├── Upstream
     └── BatchPlanNode
@@ -711,5 +722,5 @@
 
     Table 10 { columns: [ vnode, _row_id, t_backfill_finished, t_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
 
-    Table 4294967294 { columns: [ p1, p2, p3, t._row_id, t._row_id#1, t.src, t._row_id#2 ], primary key: [ $3 ASC, $4 ASC, $1 ASC, $6 ASC, $5 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 6 }
+    Table 4294967294 { columns: [ p1, p2, p3, t._row_id, t._row_id#1, t.src, t._row_id#2 ], primary key: [ $3 ASC, $4 ASC, $1 ASC, $6 ASC, $5 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 1, 3, 4, 5, 6 ], read pk prefix len hint: 6 }
 
diff --git a/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml b/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml
index b24fc18c6b513..31c53d02a9a18 100644
--- a/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/join_ordering.yaml
@@ -34,19 +34,20 @@
         └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id, v2, t4._row_id, v5], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id, v2, t4._row_id, v5], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
-      ├─StreamExchange { dist: HashShard(t3.v5) }
-      │ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
-      │   ├─StreamExchange { dist: HashShard(t1.v2) }
-      │   │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
-      │   │   ├─StreamExchange { dist: HashShard(t1.v1) }
-      │   │   │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │   │   └─StreamExchange { dist: HashShard(t2.v3) }
-      │   │     └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      │   └─StreamExchange { dist: HashShard(t3.v6) }
-      │     └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
-      └─StreamExchange { dist: HashShard(t4.v7) }
-        └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1.v2, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
+        ├─StreamExchange { dist: HashShard(t3.v5) }
+        │ └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t1._row_id, t2._row_id, t3._row_id] }
+        │   ├─StreamExchange { dist: HashShard(t1.v2) }
+        │   │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
+        │   │   ├─StreamExchange { dist: HashShard(t1.v1) }
+        │   │   │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │   │   └─StreamExchange { dist: HashShard(t2.v3) }
+        │   │     └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        │   └─StreamExchange { dist: HashShard(t3.v6) }
+        │     └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+        └─StreamExchange { dist: HashShard(t4.v7) }
+          └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
 - name: bushy tree join ordering
   sql: |
     create table t1 (v1 int, v2 int);
@@ -81,19 +82,20 @@
         └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t2._row_id(hidden), t1._row_id(hidden), t4._row_id(hidden), t3._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, v3, t4._row_id, t3._row_id, v7, v2], pk_columns: [t2._row_id, t1._row_id, v3, t4._row_id, t3._row_id, v7, v2], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t1._row_id, t4._row_id, t3._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v2) }
-      │ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t1.v2, t2._row_id, t1._row_id] }
-      │   ├─StreamExchange { dist: HashShard(t2.v3) }
-      │   │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      │   └─StreamExchange { dist: HashShard(t1.v1) }
-      │     └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t3.v6) }
-        └─StreamHashJoin { type: Inner, predicate: t4.v7 = t3.v5, output: [t4.v7, t4.v8, t3.v5, t3.v6, t4._row_id, t3._row_id] }
-          ├─StreamExchange { dist: HashShard(t4.v7) }
-          │ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
-          └─StreamExchange { dist: HashShard(t3.v5) }
-            └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v2, t2.v3, t4.v7, t2._row_id, t1._row_id, t4._row_id, t3._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t1._row_id, t4._row_id, t3._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v2) }
+        │ └─StreamHashJoin { type: Inner, predicate: t2.v3 = t1.v1, output: [t2.v3, t2.v4, t1.v1, t1.v2, t2._row_id, t1._row_id] }
+        │   ├─StreamExchange { dist: HashShard(t2.v3) }
+        │   │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        │   └─StreamExchange { dist: HashShard(t1.v1) }
+        │     └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t3.v6) }
+          └─StreamHashJoin { type: Inner, predicate: t4.v7 = t3.v5, output: [t4.v7, t4.v8, t3.v5, t3.v6, t4._row_id, t3._row_id] }
+            ├─StreamExchange { dist: HashShard(t4.v7) }
+            │ └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+            └─StreamExchange { dist: HashShard(t3.v5) }
+              └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
 - name: bushy tree join ordering manually
   sql: |
     set rw_enable_join_ordering = false;
@@ -128,19 +130,20 @@
             └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1, t3._row_id, t4._row_id, v5, v2], pk_columns: [t1._row_id, t2._row_id, v1, t3._row_id, t4._row_id, v5, v2], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v2) }
-      │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
-      │   ├─StreamExchange { dist: HashShard(t1.v1) }
-      │   │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │   └─StreamExchange { dist: HashShard(t2.v3) }
-      │     └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      └─StreamExchange { dist: HashShard(t3.v6) }
-        └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
-          ├─StreamExchange { dist: HashShard(t3.v5) }
-          │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
-          └─StreamExchange { dist: HashShard(t4.v7) }
-            └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1.v2, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v2 = t3.v6, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v2) }
+        │ └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t1._row_id, t2._row_id] }
+        │   ├─StreamExchange { dist: HashShard(t1.v1) }
+        │   │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │   └─StreamExchange { dist: HashShard(t2.v3) }
+        │     └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        └─StreamExchange { dist: HashShard(t3.v6) }
+          └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
+            ├─StreamExchange { dist: HashShard(t3.v5) }
+            │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+            └─StreamExchange { dist: HashShard(t4.v7) }
+              └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
 - name: right deep tree join ordering manually
   sql: |
     set rw_enable_join_ordering = false;
@@ -175,16 +178,17 @@
                 └─BatchScan { table: t4, columns: [t4.v7, t4.v8], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, v3, v4, v5, v6, v7, v8, t1._row_id(hidden), t2._row_id(hidden), t3._row_id(hidden), t4._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t3._row_id, t4._row_id, v5, v4, v1], pk_columns: [t1._row_id, t2._row_id, t3._row_id, t4._row_id, v5, v4, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v3) }
-        └─StreamHashJoin { type: Inner, predicate: t2.v4 = t3.v6, output: [t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t3._row_id, t4._row_id] }
-          ├─StreamExchange { dist: HashShard(t2.v4) }
-          │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-          └─StreamExchange { dist: HashShard(t3.v6) }
-            └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
-              ├─StreamExchange { dist: HashShard(t3.v5) }
-              │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
-              └─StreamExchange { dist: HashShard(t4.v7) }
-                └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t2.v4, t3.v5, t1._row_id, t2._row_id, t3._row_id, t4._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v3, output: [t1.v1, t1.v2, t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t1._row_id, t2._row_id, t3._row_id, t4._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v3) }
+          └─StreamHashJoin { type: Inner, predicate: t2.v4 = t3.v6, output: [t2.v3, t2.v4, t3.v5, t3.v6, t4.v7, t4.v8, t2._row_id, t3._row_id, t4._row_id] }
+            ├─StreamExchange { dist: HashShard(t2.v4) }
+            │ └─StreamTableScan { table: t2, columns: [t2.v3, t2.v4, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+            └─StreamExchange { dist: HashShard(t3.v6) }
+              └─StreamHashJoin { type: Inner, predicate: t3.v5 = t4.v7, output: [t3.v5, t3.v6, t4.v7, t4.v8, t3._row_id, t4._row_id] }
+                ├─StreamExchange { dist: HashShard(t3.v5) }
+                │ └─StreamTableScan { table: t3, columns: [t3.v5, t3.v6, t3._row_id], pk: [t3._row_id], dist: UpstreamHashShard(t3._row_id) }
+                └─StreamExchange { dist: HashShard(t4.v7) }
+                  └─StreamTableScan { table: t4, columns: [t4.v7, t4.v8, t4._row_id], pk: [t4._row_id], dist: UpstreamHashShard(t4._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml
index 8e63beb9798c1..85bfb1a6cda36 100644
--- a/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/lateral_subquery.yaml
@@ -42,22 +42,23 @@
               └─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), all_sales._row_id(hidden), salesperson.id(hidden), all_sales.amount(hidden), salesperson.id#1(hidden)], stream_key: [salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount], pk_columns: [salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM salesperson.id AND all_sales.amount = max(all_sales.amount), output: [salesperson.name, max(all_sales.amount), all_sales.customer_name, salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount, salesperson.id] }
-      ├─StreamHashJoin { type: Inner, predicate: salesperson.id = all_sales.salesperson_id, output: [salesperson.id, salesperson.name, all_sales.customer_name, all_sales.amount, salesperson._row_id, all_sales._row_id] }
-      │ ├─StreamExchange { dist: HashShard(salesperson.id) }
-      │ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
-      │ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
-      │   └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
-      └─StreamProject { exprs: [salesperson.id, max(all_sales.amount)] }
-        └─StreamHashAgg { group_key: [salesperson.id], aggs: [max(all_sales.amount), count] }
-          └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.id, all_sales.amount, all_sales._row_id] }
-            ├─StreamProject { exprs: [salesperson.id] }
-            │ └─StreamHashAgg { group_key: [salesperson.id], aggs: [count] }
-            │   └─StreamExchange { dist: HashShard(salesperson.id) }
-            │     └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
-            └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
-              └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
-                └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+    └─StreamExchange { dist: HashShard(salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount) }
+      └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM salesperson.id AND all_sales.amount = max(all_sales.amount), output: [salesperson.name, max(all_sales.amount), all_sales.customer_name, salesperson._row_id, all_sales._row_id, salesperson.id, all_sales.amount, salesperson.id] }
+        ├─StreamHashJoin { type: Inner, predicate: salesperson.id = all_sales.salesperson_id, output: [salesperson.id, salesperson.name, all_sales.customer_name, all_sales.amount, salesperson._row_id, all_sales._row_id] }
+        │ ├─StreamExchange { dist: HashShard(salesperson.id) }
+        │ │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+        │ └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+        │   └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+        └─StreamProject { exprs: [salesperson.id, max(all_sales.amount)] }
+          └─StreamHashAgg { group_key: [salesperson.id], aggs: [max(all_sales.amount), count] }
+            └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.id, all_sales.amount, all_sales._row_id] }
+              ├─StreamProject { exprs: [salesperson.id] }
+              │ └─StreamHashAgg { group_key: [salesperson.id], aggs: [count] }
+              │   └─StreamExchange { dist: HashShard(salesperson.id) }
+              │     └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+              └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+                └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
+                  └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
 - name: lateral join 2
   sql: |
     create table all_sales (salesperson_id int, customer_name varchar, amount int );
@@ -87,14 +88,15 @@
               └─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), salesperson.id(hidden), all_sales.salesperson_id(hidden)], stream_key: [salesperson._row_id, salesperson.id], pk_columns: [salesperson._row_id, salesperson.id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
-      ├─StreamExchange { dist: HashShard(salesperson.id) }
-      │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
-      └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
-        └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
-          └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
-            └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
-              └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+    └─StreamExchange { dist: HashShard(salesperson._row_id, salesperson.id) }
+      └─StreamHashJoin { type: Inner, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
+        ├─StreamExchange { dist: HashShard(salesperson.id) }
+        │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+        └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
+          └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+            └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
+              └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
+                └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
 - name: lateral join 2 (left join)
   sql: |
     create table all_sales (salesperson_id int, customer_name varchar, amount int );
@@ -124,14 +126,15 @@
               └─BatchScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [name, amount, customer_name, salesperson._row_id(hidden), salesperson.id(hidden), all_sales.salesperson_id(hidden)], stream_key: [salesperson._row_id, salesperson.id], pk_columns: [salesperson._row_id, salesperson.id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
-      ├─StreamExchange { dist: HashShard(salesperson.id) }
-      │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
-      └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
-        └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
-          └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
-            └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
-              └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
+    └─StreamExchange { dist: HashShard(salesperson._row_id, salesperson.id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: salesperson.id IS NOT DISTINCT FROM all_sales.salesperson_id, output: [salesperson.name, all_sales.amount, all_sales.customer_name, salesperson._row_id, salesperson.id, all_sales.salesperson_id] }
+        ├─StreamExchange { dist: HashShard(salesperson.id) }
+        │ └─StreamTableScan { table: salesperson, columns: [salesperson.id, salesperson.name, salesperson._row_id], pk: [salesperson._row_id], dist: UpstreamHashShard(salesperson._row_id) }
+        └─StreamGroupTopN { order: [all_sales.amount DESC], limit: 1, offset: 0, group_key: [all_sales.salesperson_id] }
+          └─StreamExchange { dist: HashShard(all_sales.salesperson_id) }
+            └─StreamProject { exprs: [all_sales.salesperson_id, all_sales.amount, all_sales.customer_name, all_sales._row_id] }
+              └─StreamFilter { predicate: IsNotNull(all_sales.salesperson_id) }
+                └─StreamTableScan { table: all_sales, columns: [all_sales.salesperson_id, all_sales.customer_name, all_sales.amount, all_sales._row_id], pk: [all_sales._row_id], dist: UpstreamHashShard(all_sales._row_id) }
 - name: lateral join 2 (right join) should throw an error
   sql: |
     create table all_sales (salesperson_id int, customer_name varchar, amount int );
@@ -165,14 +168,15 @@
             └─BatchScan { table: t, columns: [t.arr], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [x, arr, unnest, t._row_id(hidden), t.arr(hidden), projected_row_id(hidden)], stream_key: [t._row_id, projected_row_id, arr], pk_columns: [t._row_id, projected_row_id, arr], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.arr IS NOT DISTINCT FROM t.arr, output: [t.x, t.arr, Unnest($0), t._row_id, t.arr, projected_row_id] }
-      ├─StreamExchange { dist: HashShard(t.arr) }
-      │ └─StreamTableScan { table: t, columns: [t.x, t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamProjectSet { select_list: [$0, Unnest($0)] }
-        └─StreamProject { exprs: [t.arr] }
-          └─StreamHashAgg { group_key: [t.arr], aggs: [count] }
-            └─StreamExchange { dist: HashShard(t.arr) }
-              └─StreamTableScan { table: t, columns: [t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.arr, t._row_id, projected_row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.arr IS NOT DISTINCT FROM t.arr, output: [t.x, t.arr, Unnest($0), t._row_id, t.arr, projected_row_id] }
+        ├─StreamExchange { dist: HashShard(t.arr) }
+        │ └─StreamTableScan { table: t, columns: [t.x, t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamProjectSet { select_list: [$0, Unnest($0)] }
+          └─StreamProject { exprs: [t.arr] }
+            └─StreamHashAgg { group_key: [t.arr], aggs: [count] }
+              └─StreamExchange { dist: HashShard(t.arr) }
+                └─StreamTableScan { table: t, columns: [t.arr, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - name: https://github.com/risingwavelabs/risingwave/issues/12298
   sql: |
     create table t1(c varchar, n varchar, id varchar, d varchar);
diff --git a/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml b/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml
index 2f7d9e5e75b3b..6838ddb331939 100644
--- a/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/mv_on_mv.yaml
@@ -12,8 +12,9 @@
     select m1.v1 as m1v1, m1.v2 as m1v2, m2.v1 as m2v1, m2.v2 as m2v2 from m1 join m2 on m1.v1 = m2.v1;
   stream_plan: |-
     StreamMaterialize { columns: [m1v1, m1v2, m2v1, m2v2, m1.t1._row_id(hidden), m2.t1._row_id(hidden)], stream_key: [m1.t1._row_id, m2.t1._row_id, m1v1], pk_columns: [m1.t1._row_id, m2.t1._row_id, m1v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: m1.v1 = m2.v1, output: [m1.v1, m1.v2, m2.v1, m2.v2, m1.t1._row_id, m2.t1._row_id] }
-      ├─StreamExchange { dist: HashShard(m1.v1) }
-      │ └─StreamTableScan { table: m1, columns: [m1.v1, m1.v2, m1.t1._row_id], pk: [m1.t1._row_id], dist: UpstreamHashShard(m1.t1._row_id) }
-      └─StreamExchange { dist: HashShard(m2.v1) }
-        └─StreamTableScan { table: m2, columns: [m2.v1, m2.v2, m2.t1._row_id], pk: [m2.t1._row_id], dist: UpstreamHashShard(m2.t1._row_id) }
+    └─StreamExchange { dist: HashShard(m1.v1, m1.t1._row_id, m2.t1._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: m1.v1 = m2.v1, output: [m1.v1, m1.v2, m2.v1, m2.v2, m1.t1._row_id, m2.t1._row_id] }
+        ├─StreamExchange { dist: HashShard(m1.v1) }
+        │ └─StreamTableScan { table: m1, columns: [m1.v1, m1.v2, m1.t1._row_id], pk: [m1.t1._row_id], dist: UpstreamHashShard(m1.t1._row_id) }
+        └─StreamExchange { dist: HashShard(m2.v1) }
+          └─StreamTableScan { table: m2, columns: [m2.v1, m2.v2, m2.t1._row_id], pk: [m2.t1._row_id], dist: UpstreamHashShard(m2.t1._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml
index f4b9e28ce0775..8d452bf45bc36 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark.yaml
@@ -181,34 +181,38 @@
             └─BatchScan { table: auction, columns: [auction.id, auction.seller, auction.category], distribution: UpstreamHashShard(auction.id) }
   stream_plan: |-
     StreamMaterialize { columns: [name, city, state, id, auction.seller(hidden), person.id(hidden)], stream_key: [id, auction.seller], pk_columns: [id, auction.seller], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
-      ├─StreamExchange { dist: HashShard(auction.seller) }
-      │ └─StreamProject { exprs: [auction.id, auction.seller] }
-      │   └─StreamFilter { predicate: (auction.category = 10:Int32) }
-      │     └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
-      └─StreamExchange { dist: HashShard(person.id) }
-        └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) }
-          └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) }
+    └─StreamExchange { dist: HashShard(auction.id, auction.seller) }
+      └─StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
+        ├─StreamExchange { dist: HashShard(auction.seller) }
+        │ └─StreamProject { exprs: [auction.id, auction.seller] }
+        │   └─StreamFilter { predicate: (auction.category = 10:Int32) }
+        │     └─StreamTableScan { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+        └─StreamExchange { dist: HashShard(person.id) }
+          └─StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) }
+            └─StreamTableScan { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [name, city, state, id, auction.seller(hidden), person.id(hidden)], stream_key: [id, auction.seller], pk_columns: [id, auction.seller], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([1]) from 1
-        └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([3, 4]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: auction.seller = person.id, output: [person.name, person.city, person.state, auction.id, auction.seller, person.id] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([1]) from 2
+    └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     StreamProject { exprs: [auction.id, auction.seller] }
     └── StreamFilter { predicate: (auction.category = 10:Int32) }
         └── Chain { table: auction, columns: [auction.id, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 4 }
             ├── Upstream
             └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     StreamFilter { predicate: (((person.state = 'or':Varchar) OR (person.state = 'id':Varchar)) OR (person.state = 'ca':Varchar)) }
     └── Chain { table: person, columns: [person.id, person.name, person.city, person.state], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 5 }
         ├── Upstream
@@ -242,7 +246,7 @@
     ├── columns: [ name, city, state, id, auction.seller, person.id ]
     ├── primary key: [ $3 ASC, $4 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5 ]
-    ├── distribution key: [ 4 ]
+    ├── distribution key: [ 3, 4 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q4
@@ -834,9 +838,9 @@
       AND P.endtime = A.endtime;
   batch_plan: |-
     BatchExchange { order: [], dist: Single }
-    └─BatchHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, first_value(person.name order_by(person.name ASC)), $expr1] }
+    └─BatchHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1] }
       ├─BatchExchange { order: [], dist: HashShard(person.id, $expr1, $expr2) }
-      │ └─BatchHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [first_value(person.name order_by(person.name ASC))] }
+      │ └─BatchHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name)] }
       │   └─BatchProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
       │     └─BatchProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
       │       └─BatchScan { table: person, columns: [person.id, person.name, person.date_time], distribution: UpstreamHashShard(person.id) }
@@ -847,50 +851,54 @@
               └─BatchScan { table: auction, columns: [auction.date_time, auction.seller], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), auction.seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, starttime, $expr2], pk_columns: [id, starttime, $expr2], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, first_value(person.name order_by(person.name ASC)), $expr1, $expr2, auction.seller, $expr3, $expr4] }
-      ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) }
-      │ └─StreamProject { exprs: [person.id, $expr1, $expr2, first_value(person.name order_by(person.name ASC))] }
-      │   └─StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [first_value(person.name order_by(person.name ASC)), count] }
-      │     └─StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
-      │       └─StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
-      │         └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) }
-      └─StreamProject { exprs: [auction.seller, $expr3, $expr4] }
-        └─StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] }
-          └─StreamExchange { dist: HashShard(auction.seller, $expr3, $expr4) }
-            └─StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] }
-              └─StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] }
-                └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+    └─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) }
+      └─StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] }
+        ├─StreamExchange { dist: HashShard(person.id, $expr1, $expr2) }
+        │ └─StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] }
+        │   └─StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] }
+        │     └─StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
+        │       └─StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
+        │         └─StreamTableScan { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) }
+        └─StreamProject { exprs: [auction.seller, $expr3, $expr4] }
+          └─StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] }
+            └─StreamExchange { dist: HashShard(auction.seller, $expr3, $expr4) }
+              └─StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] }
+                └─StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] }
+                  └─StreamTableScan { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), auction.seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, starttime, $expr2], pk_columns: [id, starttime, $expr2], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, first_value(person.name order_by(person.name ASC)), $expr1, $expr2, auction.seller, $expr3, $expr4] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0, 1, 2]) from 1
-        └── StreamProject { exprs: [auction.seller, $expr3, $expr4] }
-            └── StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } { intermediate state table: 7, state tables: [], distinct tables: [] }
-                └── StreamExchange Hash([0, 1, 2]) from 2
+    └── StreamExchange Hash([0, 2, 3]) from 1
 
     Fragment 1
-    StreamProject { exprs: [person.id, $expr1, $expr2, first_value(person.name order_by(person.name ASC))] }
-    └── StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [first_value(person.name order_by(person.name ASC)), count] } { intermediate state table: 5, state tables: [ 4 ], distinct tables: [] }
+    StreamHashJoin { type: Inner, predicate: person.id = auction.seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: [person.id, internal_last_seen_value(person.name), $expr1, $expr2, auction.seller, $expr3, $expr4] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0, 1, 2]) from 2
+    └── StreamProject { exprs: [auction.seller, $expr3, $expr4] }
+        └── StreamHashAgg { group_key: [auction.seller, $expr3, $expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+            └── StreamExchange Hash([0, 1, 2]) from 3
+
+    Fragment 2
+    StreamProject { exprs: [person.id, $expr1, $expr2, internal_last_seen_value(person.name)] }
+    └── StreamHashAgg { group_key: [person.id, $expr1, $expr2], aggs: [internal_last_seen_value(person.name), count] } { intermediate state table: 4, state tables: [], distinct tables: [] }
         └── StreamProject { exprs: [person.id, person.name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
             └── StreamProject { exprs: [person.id, person.name, person.date_time, TumbleStart(person.date_time, '00:00:10':Interval) as $expr1] }
-                └── Chain { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 6 }
+                └── Chain { table: person, columns: [person.id, person.name, person.date_time], pk: [person.id], dist: UpstreamHashShard(person.id) } { state table: 5 }
                     ├── Upstream
                     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [auction.seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4, auction.id] }
     └── StreamProject { exprs: [auction.date_time, auction.seller, TumbleStart(auction.date_time, '00:00:10':Interval) as $expr3, auction.id] }
-        └── Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 8 }
+        └── Chain { table: auction, columns: [auction.date_time, auction.seller, auction.id], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 }
             ├── Upstream
             └── BatchPlanNode
 
-    Table 0 { columns: [ person_id, $expr1, $expr2, first_value(person_name order_by(person_name ASC)) ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
+    Table 0 { columns: [ person_id, $expr1, $expr2, internal_last_seen_value(person_name) ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
 
     Table 1 { columns: [ person_id, $expr1, $expr2, _degree ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
 
@@ -898,17 +906,20 @@
 
     Table 3 { columns: [ auction_seller, $expr3, $expr4, _degree ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
 
-    Table 4 { columns: [ person_id, $expr1, $expr2, person_name ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 3 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
-
-    Table 5 { columns: [ person_id, $expr1, $expr2, first_value(person_name order_by(person_name ASC)), count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3, 4 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
+    Table 4 { columns: [ person_id, $expr1, $expr2, internal_last_seen_value(person_name), count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3, 4 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
 
-    Table 6 { columns: [ vnode, id, person_backfill_finished, person_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+    Table 5 { columns: [ vnode, id, person_backfill_finished, person_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
 
-    Table 7 { columns: [ auction_seller, $expr3, $expr4, count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
+    Table 6 { columns: [ auction_seller, $expr3, $expr4, count ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 3 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
 
-    Table 8 { columns: [ vnode, id, auction_backfill_finished, auction_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
+    Table 7 { columns: [ vnode, id, auction_backfill_finished, auction_row_count ], primary key: [ $0 ASC ], value indices: [ 1, 2, 3 ], distribution key: [ 0 ], read pk prefix len hint: 1, vnode column idx: 0 }
 
-    Table 4294967294 { columns: [ id, name, starttime, $expr2, auction.seller, $expr3, $expr4 ], primary key: [ $0 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 }
+    Table 4294967294
+    ├── columns: [ id, name, starttime, $expr2, auction.seller, $expr3, $expr4 ]
+    ├── primary key: [ $0 ASC, $2 ASC, $3 ASC ]
+    ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
+    ├── distribution key: [ 0, 2, 3 ]
+    └── read pk prefix len hint: 3
 
 - id: nexmark_q9
   before:
@@ -1130,27 +1141,31 @@
         └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) }
   stream_plan: |-
     StreamMaterialize { columns: [auction, bidder, price, date_time, value, bid._row_id(hidden), $expr1(hidden), side_input.key(hidden)], stream_key: [bid._row_id, $expr1], pk_columns: [bid._row_id, $expr1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
-      ├─StreamExchange { dist: HashShard($expr1) }
-      │ └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] }
-      │   └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(side_input.key) }
-        └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) }
+    └─StreamExchange { dist: HashShard(bid._row_id, $expr1) }
+      └─StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
+        ├─StreamExchange { dist: HashShard($expr1) }
+        │ └─StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] }
+        │   └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(side_input.key) }
+          └─StreamTableScan { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction, bidder, price, date_time, value, bid._row_id(hidden), $expr1(hidden), side_input.key(hidden)], stream_key: [bid._row_id, $expr1], pk_columns: [bid._row_id, $expr1], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
-        ├── StreamExchange Hash([4]) from 1
-        └── StreamExchange NoShuffle from 2
+    └── StreamExchange Hash([5, 6]) from 1
 
     Fragment 1
+    StreamTemporalJoin { type: Inner, predicate: $expr1 = side_input.key, output: [bid.auction, bid.bidder, bid.price, bid.date_time, side_input.value, bid._row_id, $expr1, side_input.key] }
+    ├── StreamExchange Hash([4]) from 2
+    └── StreamExchange NoShuffle from 3
+
+    Fragment 2
     StreamProject { exprs: [bid.auction, bid.bidder, bid.price, bid.date_time, (bid.auction % 10000:Int32) as $expr1, bid._row_id] }
     └── Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 0 }
         ├── Upstream
         └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: side_input, columns: [side_input.key, side_input.value], pk: [side_input.key], dist: UpstreamHashShard(side_input.key) } { state table: 1 }
     ├── Upstream
     └── BatchPlanNode
@@ -1163,7 +1178,7 @@
     ├── columns: [ auction, bidder, price, date_time, value, bid._row_id, $expr1, side_input.key ]
     ├── primary key: [ $5 ASC, $6 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7 ]
-    ├── distribution key: [ 6 ]
+    ├── distribution key: [ 5, 6 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q14
@@ -1792,30 +1807,34 @@
         └─BatchScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id(hidden), auction.id(hidden)], stream_key: [bid._row_id, auction], pk_columns: [bid._row_id, auction], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
-      ├─StreamExchange { dist: HashShard(bid.auction) }
-      │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
-      └─StreamExchange { dist: HashShard(auction.id) }
-        └─StreamFilter { predicate: (auction.category = 10:Int32) }
-          └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+    └─StreamExchange { dist: HashShard(bid.auction, bid._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
+        ├─StreamExchange { dist: HashShard(bid.auction) }
+        │ └─StreamTableScan { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+        └─StreamExchange { dist: HashShard(auction.id) }
+          └─StreamFilter { predicate: (auction.category = 10:Int32) }
+            └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id(hidden), auction.id(hidden)], stream_key: [bid._row_id, auction], pk_columns: [bid._row_id, auction], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0, 14]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: bid.auction = auction.id, output: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category, bid._row_id, auction.id] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: bid, columns: [bid.auction, bid.bidder, bid.price, bid.channel, bid.url, bid.date_time, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 4 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     StreamFilter { predicate: (auction.category = 10:Int32) }
     └── Chain { table: auction, columns: [auction.id, auction.item_name, auction.description, auction.initial_bid, auction.reserve, auction.date_time, auction.expires, auction.seller, auction.category], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 5 }
         ├── Upstream
@@ -1837,7 +1856,7 @@
     ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, bid._row_id, auction.id ]
     ├── primary key: [ $14 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 14 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q21
@@ -1943,33 +1962,37 @@
           └─BatchScan { table: bid, columns: [bid.auction, bid.price], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction(hidden)], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
-      ├─StreamExchange { dist: HashShard(auction.id) }
-      │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
-      └─StreamProject { exprs: [bid.auction, max(bid.price)] }
-        └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] }
-          └─StreamExchange { dist: HashShard(bid.auction) }
-            └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+    └─StreamExchange { dist: HashShard(auction.id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
+        ├─StreamExchange { dist: HashShard(auction.id) }
+        │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+        └─StreamProject { exprs: [bid.auction, max(bid.price)] }
+          └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] }
+            └─StreamExchange { dist: HashShard(bid.auction) }
+              └─StreamTableScan { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, bid.auction(hidden)], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [bid.auction, max(bid.price)] }
-            └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-                └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftOuter, predicate: auction.id = bid.auction, output: [auction.id, auction.item_name, max(bid.price), bid.auction] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [bid.auction, max(bid.price)] }
+        └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [max(bid.price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+            └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 4 }
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: bid, columns: [bid.auction, bid.price, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
@@ -2026,9 +2049,9 @@
         SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid
     )
   batch_plan: |-
-    BatchNestedLoopJoin { type: Inner, predicate: (count(bid.auction) >= $expr1), output: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+    BatchNestedLoopJoin { type: Inner, predicate: (count(bid.auction) >= $expr1), output: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
     ├─BatchExchange { order: [], dist: Single }
-    │ └─BatchHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+    │ └─BatchHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction)] }
     │   └─BatchHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
     │     ├─BatchExchange { order: [], dist: HashShard(auction.id) }
     │     │ └─BatchScan { table: auction, columns: [auction.id, auction.item_name], distribution: UpstreamHashShard(auction.id) }
@@ -2043,9 +2066,9 @@
                 └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
-    └─StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
-      ├─StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
-      │ └─StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
+    └─StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
+      ├─StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
+      │ └─StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
       │   └─StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
       │     ├─StreamExchange { dist: HashShard(auction.id) }
       │     │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
@@ -2063,50 +2086,50 @@
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+    └── StreamDynamicFilter { predicate: (count(bid.auction) >= $expr1), output: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
         ├── left table: 0
         ├── right table: 1
-        ├── StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
-        │   └── StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
-        │       ├── intermediate state table: 3
-        │       ├── state tables: [ 2 ]
+        ├── StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
+        │   └── StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
+        │       ├── intermediate state table: 2
+        │       ├── state tables: []
         │       ├── distinct tables: []
         │       └── StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
-        │           ├── left table: 4
-        │           ├── right table: 6
-        │           ├── left degree table: 5
-        │           ├── right degree table: 7
+        │           ├── left table: 3
+        │           ├── right table: 5
+        │           ├── left degree table: 4
+        │           ├── right degree table: 6
         │           ├── StreamExchange Hash([0]) from 1
         │           └── StreamExchange Hash([0]) from 2
         └── StreamExchange Broadcast from 3
 
     Fragment 1
-    Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 8 }
+    Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 }
     ├── Upstream
     └── BatchPlanNode
 
     Fragment 2
-    Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 9 }
+    Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 8 }
     ├── Upstream
     └── BatchPlanNode
 
     Fragment 3
     StreamProject { exprs: [(sum0(sum0(count)) / sum0(count(bid.auction))) as $expr1] }
-    └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(bid.auction)), count] } { intermediate state table: 10, state tables: [], distinct tables: [] }
+    └── StreamSimpleAgg { aggs: [sum0(sum0(count)), sum0(count(bid.auction)), count] } { intermediate state table: 9, state tables: [], distinct tables: [] }
         └── StreamExchange Single from 4
 
     Fragment 4
     StreamStatelessSimpleAgg { aggs: [sum0(count), count(bid.auction)] }
-    └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } { intermediate state table: 11, state tables: [], distinct tables: [] }
+    └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] } { intermediate state table: 10, state tables: [], distinct tables: [] }
         └── StreamExchange Hash([0]) from 5
 
     Fragment 5
-    Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 12 }
+    Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 11 }
     ├── Upstream
     └── BatchPlanNode
 
     Table 0
-    ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction) ]
+    ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction) ]
     ├── primary key: [ $2 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2 ]
     ├── distribution key: [ 0 ]
@@ -2115,28 +2138,36 @@
     Table 1 { columns: [ $expr1 ], primary key: [], value indices: [ 0 ], distribution key: [], read pk prefix len hint: 0 }
 
     Table 2
-    ├── columns: [ auction_id, auction_item_name, bid__row_id ]
-    ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
-    ├── value indices: [ 0, 1, 2 ]
+    ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), count ]
+    ├── primary key: [ $0 ASC ]
+    ├── value indices: [ 1, 2, 3 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
     Table 3
-    ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), count ]
+    ├── columns: [ auction_id, auction_item_name ]
     ├── primary key: [ $0 ASC ]
-    ├── value indices: [ 1, 2, 3 ]
+    ├── value indices: [ 0, 1 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
-    Table 4 { columns: [ auction_id, auction_item_name ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
-
-    Table 5 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+    Table 4 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 6 { columns: [ bid_auction, bid__row_id ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+    Table 5
+    ├── columns: [ bid_auction, bid__row_id ]
+    ├── primary key: [ $0 ASC, $1 ASC ]
+    ├── value indices: [ 0, 1 ]
+    ├── distribution key: [ 0 ]
+    └── read pk prefix len hint: 1
 
-    Table 7 { columns: [ bid_auction, bid__row_id, _degree ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+    Table 6
+    ├── columns: [ bid_auction, bid__row_id, _degree ]
+    ├── primary key: [ $0 ASC, $1 ASC ]
+    ├── value indices: [ 2 ]
+    ├── distribution key: [ 0 ]
+    └── read pk prefix len hint: 1
 
-    Table 8
+    Table 7
     ├── columns: [ vnode, id, auction_backfill_finished, auction_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
@@ -2144,7 +2175,7 @@
     ├── read pk prefix len hint: 1
     └── vnode column idx: 0
 
-    Table 9
+    Table 8
     ├── columns: [ vnode, _row_id, bid_backfill_finished, bid_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
@@ -2152,11 +2183,16 @@
     ├── read pk prefix len hint: 1
     └── vnode column idx: 0
 
-    Table 10 { columns: [ sum0(sum0(count)), sum0(count(bid_auction)), count ], primary key: [], value indices: [ 0, 1, 2 ], distribution key: [], read pk prefix len hint: 0 }
+    Table 9
+    ├── columns: [ sum0(sum0(count)), sum0(count(bid_auction)), count ]
+    ├── primary key: []
+    ├── value indices: [ 0, 1, 2 ]
+    ├── distribution key: []
+    └── read pk prefix len hint: 0
 
-    Table 11 { columns: [ bid_auction, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+    Table 10 { columns: [ bid_auction, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 12
+    Table 11
     ├── columns: [ vnode, _row_id, bid_backfill_finished, bid_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
@@ -2199,39 +2235,43 @@
               └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
-      ├─StreamExchange { dist: HashShard(auction.id) }
-      │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
-      └─StreamProject { exprs: [bid.auction] }
-        └─StreamFilter { predicate: (count >= 20:Int32) }
-          └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
-            └─StreamExchange { dist: HashShard(bid.auction) }
-              └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+    └─StreamExchange { dist: HashShard(auction.id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
+        ├─StreamExchange { dist: HashShard(auction.id) }
+        │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+        └─StreamProject { exprs: [bid.auction] }
+          └─StreamFilter { predicate: (count >= 20:Int32) }
+            └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+              └─StreamExchange { dist: HashShard(bid.auction) }
+                └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [bid.auction] }
-            └── StreamFilter { predicate: (count >= 20:Int32) }
-                └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
-                    ├── intermediate state table: 5
-                    ├── state tables: []
-                    ├── distinct tables: []
-                    └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftSemi, predicate: auction.id = bid.auction, output: all }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [bid.auction] }
+        └── StreamFilter { predicate: (count >= 20:Int32) }
+            └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+                ├── intermediate state table: 5
+                ├── state tables: []
+                ├── distinct tables: []
+                └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
     ├── state table: 4
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
@@ -2317,39 +2357,43 @@
               └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
-      ├─StreamExchange { dist: HashShard(auction.id) }
-      │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
-      └─StreamProject { exprs: [bid.auction] }
-        └─StreamFilter { predicate: (count < 20:Int32) }
-          └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
-            └─StreamExchange { dist: HashShard(bid.auction) }
-              └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
+    └─StreamExchange { dist: HashShard(auction.id) }
+      └─StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
+        ├─StreamExchange { dist: HashShard(auction.id) }
+        │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
+        └─StreamProject { exprs: [bid.auction] }
+          └─StreamFilter { predicate: (count < 20:Int32) }
+            └─StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+              └─StreamExchange { dist: HashShard(bid.auction) }
+                └─StreamTableScan { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name], stream_key: [auction_id], pk_columns: [auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [bid.auction] }
-            └── StreamFilter { predicate: (count < 20:Int32) }
-                └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
-                    ├── intermediate state table: 5
-                    ├── state tables: []
-                    ├── distinct tables: []
-                    └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftAnti, predicate: auction.id = bid.auction, output: all }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [bid.auction] }
+        └── StreamFilter { predicate: (count < 20:Int32) }
+            └── StreamHashAgg [append_only] { group_key: [bid.auction], aggs: [count] }
+                ├── intermediate state table: 5
+                ├── state tables: []
+                ├── distinct tables: []
+                └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
     ├── state table: 4
     ├── Upstream
     └── BatchPlanNode
 
-    Fragment 2
+    Fragment 3
     Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 6 }
     ├── Upstream
     └── BatchPlanNode
@@ -2427,7 +2471,7 @@
     BatchTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 }
     └─BatchExchange { order: [], dist: Single }
       └─BatchTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 }
-        └─BatchHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+        └─BatchHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction)] }
           └─BatchHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
             ├─BatchExchange { order: [], dist: HashShard(auction.id) }
             │ └─BatchScan { table: auction, columns: [auction.id, auction.item_name], distribution: UpstreamHashShard(auction.id) }
@@ -2435,12 +2479,12 @@
               └─BatchScan { table: bid, columns: [bid.auction], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [bid_count, auction_id], pk_conflict: NoCheck }
-    └─StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+    └─StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
       └─StreamTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 }
         └─StreamExchange { dist: Single }
           └─StreamGroupTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0, group_key: [$expr1] }
-            └─StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), Vnode(auction.id) as $expr1] }
-              └─StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
+            └─StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction), Vnode(auction.id) as $expr1] }
+              └─StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
                 └─StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
                   ├─StreamExchange { dist: HashShard(auction.id) }
                   │ └─StreamTableScan { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) }
@@ -2450,44 +2494,44 @@
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, bid_count], stream_key: [auction_id], pk_columns: [bid_count, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction)] }
+    └── StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction)] }
         └── StreamTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0 } { state table: 0 }
             └── StreamExchange Single from 1
 
     Fragment 1
     StreamGroupTopN { order: [count(bid.auction) DESC], limit: 1000, offset: 0, group_key: [$expr1] } { state table: 1 }
-    └── StreamProject { exprs: [auction.id, first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), Vnode(auction.id) as $expr1] }
-        └── StreamHashAgg { group_key: [auction.id], aggs: [first_value(auction.item_name order_by(auction.item_name ASC)), count(bid.auction), count] }
-            ├── intermediate state table: 3
-            ├── state tables: [ 2 ]
+    └── StreamProject { exprs: [auction.id, internal_last_seen_value(auction.item_name), count(bid.auction), Vnode(auction.id) as $expr1] }
+        └── StreamHashAgg { group_key: [auction.id], aggs: [internal_last_seen_value(auction.item_name), count(bid.auction), count] }
+            ├── intermediate state table: 2
+            ├── state tables: []
             ├── distinct tables: []
             └── StreamHashJoin { type: Inner, predicate: auction.id = bid.auction, output: all }
-                ├── left table: 4
-                ├── right table: 6
-                ├── left degree table: 5
-                ├── right degree table: 7
+                ├── left table: 3
+                ├── right table: 5
+                ├── left degree table: 4
+                ├── right degree table: 6
                 ├── StreamExchange Hash([0]) from 2
                 └── StreamExchange Hash([0]) from 3
 
     Fragment 2
-    Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 8 }
+    Chain { table: auction, columns: [auction.id, auction.item_name], pk: [auction.id], dist: UpstreamHashShard(auction.id) } { state table: 7 }
     ├── Upstream
     └── BatchPlanNode
 
     Fragment 3
-    Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 9 }
+    Chain { table: bid, columns: [bid.auction, bid._row_id], pk: [bid._row_id], dist: UpstreamHashShard(bid._row_id) } { state table: 8 }
     ├── Upstream
     └── BatchPlanNode
 
     Table 0
-    ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), $expr1 ]
+    ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), $expr1 ]
     ├── primary key: [ $2 DESC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3 ]
     ├── distribution key: []
     └── read pk prefix len hint: 0
 
     Table 1
-    ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), $expr1 ]
+    ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), $expr1 ]
     ├── primary key: [ $3 ASC, $2 DESC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3 ]
     ├── distribution key: [ 0 ]
@@ -2495,38 +2539,31 @@
     └── vnode column idx: 3
 
     Table 2
-    ├── columns: [ auction_id, auction_item_name, bid__row_id ]
-    ├── primary key: [ $0 ASC, $1 ASC, $2 ASC ]
-    ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
-    └── read pk prefix len hint: 1
-
-    Table 3
-    ├── columns: [ auction_id, first_value(auction_item_name order_by(auction_item_name ASC)), count(bid_auction), count ]
+    ├── columns: [ auction_id, internal_last_seen_value(auction_item_name), count(bid_auction), count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
-    Table 4 { columns: [ auction_id, auction_item_name ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+    Table 3 { columns: [ auction_id, auction_item_name ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 5 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
+    Table 4 { columns: [ auction_id, _degree ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 6
+    Table 5
     ├── columns: [ bid_auction, bid__row_id ]
     ├── primary key: [ $0 ASC, $1 ASC ]
     ├── value indices: [ 0, 1 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
-    Table 7
+    Table 6
     ├── columns: [ bid_auction, bid__row_id, _degree ]
     ├── primary key: [ $0 ASC, $1 ASC ]
     ├── value indices: [ 2 ]
     ├── distribution key: [ 0 ]
     └── read pk prefix len hint: 1
 
-    Table 8
+    Table 7
     ├── columns: [ vnode, id, auction_backfill_finished, auction_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
@@ -2534,7 +2571,7 @@
     ├── read pk prefix len hint: 1
     └── vnode column idx: 0
 
-    Table 9
+    Table 8
     ├── columns: [ vnode, _row_id, bid_backfill_finished, bid_row_count ]
     ├── primary key: [ $0 ASC ]
     ├── value indices: [ 1, 2, 3 ]
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml
index 7c694fad1fa67..31be64b2c480a 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark_source.yaml
@@ -158,29 +158,33 @@
           └─BatchSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, seller], pk_columns: [_row_id, _row_id#1, seller], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] }
-      ├─StreamExchange { dist: HashShard(seller) }
-      │ └─StreamFilter { predicate: (category = 10:Int32) }
-      │   └─StreamRowIdGen { row_id_index: 10 }
-      │     └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
-      └─StreamExchange { dist: HashShard(id) }
-        └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) }
-          └─StreamRowIdGen { row_id_index: 8 }
-            └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(_row_id, seller, _row_id) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] }
+        ├─StreamExchange { dist: HashShard(seller) }
+        │ └─StreamFilter { predicate: (category = 10:Int32) }
+        │   └─StreamRowIdGen { row_id_index: 10 }
+        │     └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+        └─StreamExchange { dist: HashShard(id) }
+          └─StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) }
+            └─StreamRowIdGen { row_id_index: 8 }
+              └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), seller(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, seller], pk_columns: [_row_id, _row_id#1, seller], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([7]) from 1
-        └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([4, 5, 6]) from 1
 
     Fragment 1
+    StreamHashJoin [append_only] { type: Inner, predicate: seller = id, output: [name, city, state, id, _row_id, seller, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([7]) from 2
+    └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     StreamFilter { predicate: (category = 10:Int32) }
     └── StreamRowIdGen { row_id_index: 10 }
         └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 4 }
 
-    Fragment 2
+    Fragment 3
     StreamFilter { predicate: (((state = 'or':Varchar) OR (state = 'id':Varchar)) OR (state = 'ca':Varchar)) }
     └── StreamRowIdGen { row_id_index: 8 }
         └── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { source state table: 5 }
@@ -211,7 +215,7 @@
     ├── columns: [ name, city, state, id, _row_id, seller, _row_id#1 ]
     ├── primary key: [ $4 ASC, $6 ASC, $5 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
-    ├── distribution key: [ 5 ]
+    ├── distribution key: [ 4, 5, 6 ]
     └── read pk prefix len hint: 3
 
 - id: nexmark_q4
@@ -737,40 +741,44 @@
               └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, name, starttime, $expr2], pk_columns: [id, name, starttime, $expr2], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all }
-      ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) }
-      │ └─StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] }
-      │   └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) }
-      │     └─StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
-      │       └─StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] }
-      │         └─StreamRowIdGen { row_id_index: 8 }
-      │           └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
-      └─StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] }
-        └─StreamExchange { dist: HashShard(seller, $expr3, $expr4) }
-          └─StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] }
-            └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] }
-              └─StreamRowIdGen { row_id_index: 10 }
-                └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all }
+        ├─StreamExchange { dist: HashShard(id, $expr1, $expr2) }
+        │ └─StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] }
+        │   └─StreamExchange { dist: HashShard(id, name, $expr1, $expr2) }
+        │     └─StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
+        │       └─StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] }
+        │         └─StreamRowIdGen { row_id_index: 8 }
+        │           └─StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] }
+        └─StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] }
+          └─StreamExchange { dist: HashShard(seller, $expr3, $expr4) }
+            └─StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] }
+              └─StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] }
+                └─StreamRowIdGen { row_id_index: 10 }
+                  └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [id, name, starttime, $expr2(hidden), seller(hidden), $expr3(hidden), $expr4(hidden)], stream_key: [id, name, starttime, $expr2], pk_columns: [id, name, starttime, $expr2], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0, 2, 3]) from 1
-        └── StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } { state table: 6 }
-            └── StreamExchange Hash([0, 1, 2]) from 3
+    └── StreamExchange Hash([0, 1, 2, 3]) from 1
 
     Fragment 1
-    StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } { state table: 4 }
-    └── StreamExchange Hash([0, 1, 2, 3]) from 2
+    StreamHashJoin [append_only] { type: Inner, predicate: id = seller AND $expr1 = $expr3 AND $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0, 2, 3]) from 2
+    └── StreamAppendOnlyDedup { dedup_cols: [seller, $expr3, $expr4] } { state table: 6 }
+        └── StreamExchange Hash([0, 1, 2]) from 4
 
     Fragment 2
+    StreamAppendOnlyDedup { dedup_cols: [id, name, $expr1, $expr2] } { state table: 4 }
+    └── StreamExchange Hash([0, 1, 2, 3]) from 3
+
+    Fragment 3
     StreamProject { exprs: [id, name, $expr1, ($expr1 + '00:00:10':Interval) as $expr2] }
     └── StreamProject { exprs: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr1] }
         └── StreamRowIdGen { row_id_index: 8 }
             └── StreamSource { source: person, columns: [id, name, email_address, credit_card, city, state, date_time, extra, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [seller, $expr3, ($expr3 + '00:00:10':Interval) as $expr4] }
     └── StreamProject { exprs: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id, TumbleStart(date_time, '00:00:10':Interval) as $expr3] }
         └── StreamRowIdGen { row_id_index: 10 }
@@ -796,7 +804,7 @@
     ├── columns: [ id, name, starttime, $expr2, seller, $expr3, $expr4 ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
-    ├── distribution key: [ 0, 2, 3 ]
+    ├── distribution key: [ 0, 1, 2, 3 ]
     └── read pk prefix len hint: 4
 
 - id: nexmark_q9
@@ -1629,31 +1637,31 @@
           └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard(auction) }
-      │ └─StreamRowIdGen { row_id_index: 7 }
-      │   └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
-      └─StreamExchange { dist: HashShard(id) }
-        └─StreamFilter { predicate: (category = 10:Int32) }
-          └─StreamRowIdGen { row_id_index: 10 }
-            └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(auction, _row_id, _row_id) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard(auction) }
+        │ └─StreamRowIdGen { row_id_index: 7 }
+        │   └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+        └─StreamExchange { dist: HashShard(id) }
+          └─StreamFilter { predicate: (category = 10:Int32) }
+            └─StreamRowIdGen { row_id_index: 10 }
+              └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0, 14, 15]) from 1
 
     Fragment 1
+    StreamHashJoin [append_only] { type: Inner, predicate: auction = id, output: [auction, bidder, price, channel, url, date_time, item_name, description, initial_bid, reserve, date_time, expires, seller, category, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     StreamRowIdGen { row_id_index: 7 }
     └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 4 }
 
-    Fragment 2
+    Fragment 3
     StreamFilter { predicate: (category = 10:Int32) }
     └── StreamRowIdGen { row_id_index: 10 }
         └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 5 }
@@ -1674,7 +1682,7 @@
     ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ]
     ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 14, 15 ]
     └── read pk prefix len hint: 3
 
 - id: nexmark_q21
@@ -1775,30 +1783,34 @@
           └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] }
-      ├─StreamExchange { dist: HashShard(id) }
-      │ └─StreamRowIdGen { row_id_index: 10 }
-      │   └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
-      └─StreamProject { exprs: [auction, max(price)] }
-        └─StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] }
-          └─StreamExchange { dist: HashShard(auction) }
-            └─StreamRowIdGen { row_id_index: 7 }
-              └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(id, _row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] }
+        ├─StreamExchange { dist: HashShard(id) }
+        │ └─StreamRowIdGen { row_id_index: 10 }
+        │   └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+        └─StreamProject { exprs: [auction, max(price)] }
+          └─StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] }
+            └─StreamExchange { dist: HashShard(auction) }
+              └─StreamRowIdGen { row_id_index: 7 }
+                └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), auction(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [auction, max(price)] }
-            └── StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-                └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0, 3]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftOuter, predicate: id = auction, output: [id, item_name, max(price), _row_id, auction] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [auction, max(price)] }
+        └── StreamHashAgg [append_only] { group_key: [auction], aggs: [max(price), count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+            └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     StreamRowIdGen { row_id_index: 10 }
     └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] } { source state table: 4 }
 
-    Fragment 2
+    Fragment 3
     StreamRowIdGen { row_id_index: 7 }
     └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 }
 
@@ -1825,7 +1837,7 @@
     ├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, auction ]
     ├── primary key: [ $3 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 3 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q102
@@ -1992,37 +2004,41 @@
               └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
-      ├─StreamExchange { dist: HashShard(id) }
-      │ └─StreamRowIdGen { row_id_index: 10 }
-      │   └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
-      └─StreamProject { exprs: [auction] }
-        └─StreamFilter { predicate: (count >= 20:Int32) }
-          └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
-            └─StreamExchange { dist: HashShard(auction) }
-              └─StreamRowIdGen { row_id_index: 7 }
-                └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(id, _row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
+        ├─StreamExchange { dist: HashShard(id) }
+        │ └─StreamRowIdGen { row_id_index: 10 }
+        │   └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+        └─StreamProject { exprs: [auction] }
+          └─StreamFilter { predicate: (count >= 20:Int32) }
+            └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
+              └─StreamExchange { dist: HashShard(auction) }
+                └─StreamRowIdGen { row_id_index: 7 }
+                  └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [auction] }
-            └── StreamFilter { predicate: (count >= 20:Int32) }
-                └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-                    └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0, 2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftSemi, predicate: id = auction, output: [id, item_name, _row_id] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [auction] }
+        └── StreamFilter { predicate: (count >= 20:Int32) }
+            └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+                └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     StreamRowIdGen { row_id_index: 10 }
     └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
         └── source state table: 4
 
-    Fragment 2
+    Fragment 3
     StreamRowIdGen { row_id_index: 7 }
     └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 }
 
@@ -2049,7 +2065,7 @@
     ├── columns: [ auction_id, auction_item_name, _row_id ]
     ├── primary key: [ $2 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 2 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q104
@@ -2080,37 +2096,41 @@
               └─BatchSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
-      ├─StreamExchange { dist: HashShard(id) }
-      │ └─StreamRowIdGen { row_id_index: 10 }
-      │   └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
-      └─StreamProject { exprs: [auction] }
-        └─StreamFilter { predicate: (count < 20:Int32) }
-          └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
-            └─StreamExchange { dist: HashShard(auction) }
-              └─StreamRowIdGen { row_id_index: 7 }
-                └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(id, _row_id) }
+      └─StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
+        ├─StreamExchange { dist: HashShard(id) }
+        │ └─StreamRowIdGen { row_id_index: 10 }
+        │   └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+        └─StreamProject { exprs: [auction] }
+          └─StreamFilter { predicate: (count < 20:Int32) }
+            └─StreamHashAgg [append_only] { group_key: [auction], aggs: [count] }
+              └─StreamExchange { dist: HashShard(auction) }
+                └─StreamRowIdGen { row_id_index: 7 }
+                  └─StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [auction] }
-            └── StreamFilter { predicate: (count < 20:Int32) }
-                └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-                    └── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0, 2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftAnti, predicate: id = auction, output: [id, item_name, _row_id] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [auction] }
+        └── StreamFilter { predicate: (count < 20:Int32) }
+            └── StreamHashAgg [append_only] { group_key: [auction], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+                └── StreamExchange Hash([0]) from 3
+
+    Fragment 2
     StreamRowIdGen { row_id_index: 10 }
     └── StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
         └── source state table: 4
 
-    Fragment 2
+    Fragment 3
     StreamRowIdGen { row_id_index: 7 }
     └── StreamSource { source: bid, columns: [auction, bidder, price, channel, url, date_time, extra, _row_id] } { source state table: 6 }
 
@@ -2137,7 +2157,7 @@
     ├── columns: [ auction_id, auction_item_name, _row_id ]
     ├── primary key: [ $2 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 2 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q105
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml
index ccdde39e76764..c6c3ffd4f5ad6 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark_temporal_filter.yaml
@@ -717,65 +717,69 @@
       AND P.endtime = A.endtime;
   stream_plan: |-
     StreamMaterialize { columns: [id, name, starttime, $expr6(hidden), $expr8(hidden), $expr9(hidden), $expr10(hidden)], stream_key: [id, name, starttime, $expr6], pk_columns: [id, name, starttime, $expr6], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all }
-      ├─StreamExchange { dist: HashShard($expr2, $expr5, $expr6) }
-      │ └─StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] }
-      │   └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) }
-      │     └─StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] }
-      │       └─StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] }
-      │         └─StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] }
-      │           └─StreamFilter { predicate: (event_type = 0:Int32) }
-      │             └─StreamShare { id: 5 }
-      │               └─StreamProject { exprs: [event_type, person, auction, _row_id] }
-      │                 └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
-      │                   └─StreamRowIdGen { row_id_index: 5 }
-      │                     └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-      │                       └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] }
-        └─StreamExchange { dist: HashShard($expr8, $expr9, $expr10) }
-          └─StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] }
-            └─StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] }
-              └─StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] }
-                └─StreamFilter { predicate: (event_type = 1:Int32) }
-                  └─StreamShare { id: 5 }
-                    └─StreamProject { exprs: [event_type, person, auction, _row_id] }
-                      └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
-                        └─StreamRowIdGen { row_id_index: 5 }
-                          └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-                            └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all }
+        ├─StreamExchange { dist: HashShard($expr2, $expr5, $expr6) }
+        │ └─StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] }
+        │   └─StreamExchange { dist: HashShard($expr2, $expr3, $expr5, $expr6) }
+        │     └─StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] }
+        │       └─StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] }
+        │         └─StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] }
+        │           └─StreamFilter { predicate: (event_type = 0:Int32) }
+        │             └─StreamShare { id: 5 }
+        │               └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+        │                 └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+        │                   └─StreamRowIdGen { row_id_index: 5 }
+        │                     └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+        │                       └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] }
+          └─StreamExchange { dist: HashShard($expr8, $expr9, $expr10) }
+            └─StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] }
+              └─StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] }
+                └─StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] }
+                  └─StreamFilter { predicate: (event_type = 1:Int32) }
+                    └─StreamShare { id: 5 }
+                      └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+                        └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+                          └─StreamRowIdGen { row_id_index: 5 }
+                            └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+                              └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [id, name, starttime, $expr6(hidden), $expr8(hidden), $expr9(hidden), $expr10(hidden)], stream_key: [id, name, starttime, $expr6], pk_columns: [id, name, starttime, $expr6], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0, 2, 3]) from 1
-        └── StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } { state table: 6 }
-            └── StreamExchange Hash([0, 1, 2]) from 4
+    └── StreamExchange Hash([0, 1, 2, 3]) from 1
 
     Fragment 1
-    StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } { state table: 4 }
-    └── StreamExchange Hash([0, 1, 2, 3]) from 2
+    StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr8 AND $expr5 = $expr9 AND $expr6 = $expr10, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0, 2, 3]) from 2
+    └── StreamAppendOnlyDedup { dedup_cols: [$expr8, $expr9, $expr10] } { state table: 6 }
+        └── StreamExchange Hash([0, 1, 2]) from 5
 
     Fragment 2
+    StreamAppendOnlyDedup { dedup_cols: [$expr2, $expr3, $expr5, $expr6] } { state table: 4 }
+    └── StreamExchange Hash([0, 1, 2, 3]) from 3
+
+    Fragment 3
     StreamProject { exprs: [$expr2, $expr3, $expr5, ($expr5 + '00:00:10':Interval) as $expr6] }
     └── StreamProject { exprs: [$expr2, $expr3, $expr4, TumbleStart($expr4, '00:00:10':Interval) as $expr5, _row_id] }
         └── StreamProject { exprs: [Field(person, 0:Int32) as $expr2, Field(person, 1:Int32) as $expr3, Field(person, 6:Int32) as $expr4, _row_id] }
             └── StreamFilter { predicate: (event_type = 0:Int32) }
-                └── StreamExchange NoShuffle from 3
+                └── StreamExchange NoShuffle from 4
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [event_type, person, auction, _row_id] }
     └── StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
             └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
                 └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [$expr8, $expr9, ($expr9 + '00:00:10':Interval) as $expr10] }
     └── StreamProject { exprs: [$expr7, $expr8, TumbleStart($expr7, '00:00:10':Interval) as $expr9, _row_id] }
         └── StreamProject { exprs: [Field(auction, 5:Int32) as $expr7, Field(auction, 7:Int32) as $expr8, _row_id] }
             └── StreamFilter { predicate: (event_type = 1:Int32) }
-                └── StreamExchange NoShuffle from 3
+                └── StreamExchange NoShuffle from 4
 
     Table 0 { columns: [ $expr2, $expr3, $expr5, $expr6 ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 }
 
@@ -795,7 +799,7 @@
     ├── columns: [ id, name, starttime, $expr6, $expr8, $expr9, $expr10 ]
     ├── primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4, 5, 6 ]
-    ├── distribution key: [ 0, 2, 3 ]
+    ├── distribution key: [ 0, 1, 2, 3 ]
     └── read pk prefix len hint: 4
 
 - id: nexmark_q9
@@ -1180,59 +1184,63 @@
     WHERE A.category = 10;
   stream_plan: |-
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard($expr3) }
-      │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] }
-      │   └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
-      │     ├─StreamFilter { predicate: (event_type = 2:Int32) }
-      │     │ └─StreamShare { id: 5 }
-      │     │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-      │     │     └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
-      │     │       └─StreamRowIdGen { row_id_index: 5 }
-      │     │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-      │     │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      │     └─StreamExchange { dist: Broadcast }
-      │       └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
-      │         └─StreamNow { output: [now] }
-      └─StreamExchange { dist: HashShard($expr9) }
-        └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] }
-          └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
-            └─StreamShare { id: 5 }
-              └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-                └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
-                  └─StreamRowIdGen { row_id_index: 5 }
-                    └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-                      └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr3, _row_id, _row_id) }
+      └─StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard($expr3) }
+        │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] }
+        │   └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+        │     ├─StreamFilter { predicate: (event_type = 2:Int32) }
+        │     │ └─StreamShare { id: 5 }
+        │     │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+        │     │     └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+        │     │       └─StreamRowIdGen { row_id_index: 5 }
+        │     │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+        │     │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        │     └─StreamExchange { dist: Broadcast }
+        │       └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+        │         └─StreamNow { output: [now] }
+        └─StreamExchange { dist: HashShard($expr9) }
+          └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] }
+            └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
+              └─StreamShare { id: 5 }
+                └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+                  └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+                    └─StreamRowIdGen { row_id_index: 5 }
+                      └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+                        └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 4
+    └── StreamExchange Hash([0, 14, 15]) from 1
 
     Fragment 1
+    StreamHashJoin { type: Inner, predicate: $expr3 = $expr9, output: [$expr3, $expr4, $expr5, $expr6, $expr7, $expr8, $expr10, $expr11, $expr12, $expr13, $expr14, $expr15, $expr16, $expr17, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 5
+
+    Fragment 2
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr3, Field(bid, 1:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, Field(bid, 3:Int32) as $expr6, Field(bid, 4:Int32) as $expr7, Field(bid, 5:Int32) as $expr8, _row_id] }
     └── StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { left table: 4, right table: 5 }
         ├── StreamFilter { predicate: (event_type = 2:Int32) }
-        │   └── StreamExchange NoShuffle from 2
-        └── StreamExchange Broadcast from 3
+        │   └── StreamExchange NoShuffle from 3
+        └── StreamExchange Broadcast from 4
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
         └── StreamRowIdGen { row_id_index: 5 }
             └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
                 └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 6 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
     └── StreamNow { output: [now] } { state table: 7 }
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr9, Field(auction, 1:Int32) as $expr10, Field(auction, 2:Int32) as $expr11, Field(auction, 3:Int32) as $expr12, Field(auction, 4:Int32) as $expr13, Field(auction, 5:Int32) as $expr14, Field(auction, 6:Int32) as $expr15, Field(auction, 7:Int32) as $expr16, Field(auction, 8:Int32) as $expr17, _row_id] }
     └── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr3, $expr4, $expr5, $expr6, $expr7, $expr8, _row_id ], primary key: [ $0 ASC, $6 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
@@ -1250,7 +1258,12 @@
 
     Table 7 { columns: [ now ], primary key: [], value indices: [ 0 ], distribution key: [], read pk prefix len hint: 0 }
 
-    Table 4294967294 { columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ], primary key: [ $14 ASC, $15 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
+    Table 4294967294
+    ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ]
+    ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ]
+    ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
+    ├── distribution key: [ 0, 14, 15 ]
+    └── read pk prefix len hint: 3
 
 - id: nexmark_q21
   before:
@@ -1375,61 +1388,65 @@
     ) b ON a.id = b.auction;
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (event_type = 1:Int32) }
-      │     └─StreamShare { id: 5 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-      │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-      │               └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamProject { exprs: [$expr5, max($expr6)] }
-        └─StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] }
-          └─StreamExchange { dist: HashShard($expr5) }
-            └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] }
-              └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
-                ├─StreamFilter { predicate: (event_type = 2:Int32) }
-                │ └─StreamShare { id: 5 }
-                │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-                │     └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-                │       └─StreamRowIdGen { row_id_index: 5 }
-                │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-                │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-                └─StreamExchange { dist: Broadcast }
-                  └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
-                    └─StreamNow { output: [now] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (event_type = 1:Int32) }
+        │     └─StreamShare { id: 5 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+        │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+        │               └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamProject { exprs: [$expr5, max($expr6)] }
+          └─StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] }
+            └─StreamExchange { dist: HashShard($expr5) }
+              └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] }
+                └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+                  ├─StreamFilter { predicate: (event_type = 2:Int32) }
+                  │ └─StreamShare { id: 5 }
+                  │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+                  │     └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+                  │       └─StreamRowIdGen { row_id_index: 5 }
+                  │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+                  │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+                  └─StreamExchange { dist: Broadcast }
+                    └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
+                      └─StreamNow { output: [now] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [$expr5, max($expr6)] }
-            └── StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } { intermediate state table: 6, state tables: [ 5 ], distinct tables: [] }
-                └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 3]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr5, output: [$expr2, $expr3, max($expr6), _row_id, $expr5] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [$expr5, max($expr6)] }
+        └── StreamHashAgg { group_key: [$expr5], aggs: [max($expr6), count] } { intermediate state table: 6, state tables: [ 5 ], distinct tables: [] }
+            └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
             └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
                 └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, Field(bid, 2:Int32) as $expr6, _row_id] }
     └── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true } { left table: 7, right table: 8 }
         ├── StreamFilter { predicate: (event_type = 2:Int32) }
-        │   └── StreamExchange NoShuffle from 2
-        └── StreamExchange Broadcast from 4
+        │   └── StreamExchange NoShuffle from 3
+        └── StreamExchange Broadcast from 5
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
     └── StreamNow { output: [now] } { state table: 9 }
 
@@ -1457,7 +1474,7 @@
     ├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, $expr5 ]
     ├── primary key: [ $3 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 3 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q102
@@ -1642,65 +1659,69 @@
     );
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (event_type = 1:Int32) }
-      │     └─StreamShare { id: 5 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-      │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-      │               └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamProject { exprs: [$expr5] }
-        └─StreamFilter { predicate: (count >= 20:Int32) }
-          └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
-            └─StreamExchange { dist: HashShard($expr5) }
-              └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
-                └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
-                  ├─StreamFilter { predicate: (event_type = 2:Int32) }
-                  │ └─StreamShare { id: 5 }
-                  │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-                  │     └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-                  │       └─StreamRowIdGen { row_id_index: 5 }
-                  │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-                  │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-                  └─StreamExchange { dist: Broadcast }
-                    └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
-                      └─StreamNow { output: [now] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (event_type = 1:Int32) }
+        │     └─StreamShare { id: 5 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+        │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+        │               └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamProject { exprs: [$expr5] }
+          └─StreamFilter { predicate: (count >= 20:Int32) }
+            └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
+              └─StreamExchange { dist: HashShard($expr5) }
+                └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
+                  └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+                    ├─StreamFilter { predicate: (event_type = 2:Int32) }
+                    │ └─StreamShare { id: 5 }
+                    │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+                    │     └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+                    │       └─StreamRowIdGen { row_id_index: 5 }
+                    │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+                    │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+                    └─StreamExchange { dist: Broadcast }
+                      └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
+                        └─StreamNow { output: [now] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [$expr5] }
-            └── StreamFilter { predicate: (count >= 20:Int32) }
-                └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-                    └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [$expr5] }
+        └── StreamFilter { predicate: (count >= 20:Int32) }
+            └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+                └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
             └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
                 └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
     └── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
         ├── left table: 6
         ├── right table: 7
         ├── StreamFilter { predicate: (event_type = 2:Int32) }
-        │   └── StreamExchange NoShuffle from 2
-        └── StreamExchange Broadcast from 4
+        │   └── StreamExchange NoShuffle from 3
+        └── StreamExchange Broadcast from 5
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
     └── StreamNow { output: [now] } { state table: 8 }
 
@@ -1731,7 +1752,7 @@
     ├── columns: [ auction_id, auction_item_name, _row_id ]
     ├── primary key: [ $2 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 2 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q104
@@ -1752,65 +1773,69 @@
     );
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (event_type = 1:Int32) }
-      │     └─StreamShare { id: 5 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-      │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-      │               └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamProject { exprs: [$expr5] }
-        └─StreamFilter { predicate: (count < 20:Int32) }
-          └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
-            └─StreamExchange { dist: HashShard($expr5) }
-              └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
-                └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
-                  ├─StreamFilter { predicate: (event_type = 2:Int32) }
-                  │ └─StreamShare { id: 5 }
-                  │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-                  │     └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-                  │       └─StreamRowIdGen { row_id_index: 5 }
-                  │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
-                  │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-                  └─StreamExchange { dist: Broadcast }
-                    └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
-                      └─StreamNow { output: [now] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id) }
+      └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (event_type = 1:Int32) }
+        │     └─StreamShare { id: 5 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+        │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+        │               └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamProject { exprs: [$expr5] }
+          └─StreamFilter { predicate: (count < 20:Int32) }
+            └─StreamHashAgg { group_key: [$expr5], aggs: [count] }
+              └─StreamExchange { dist: HashShard($expr5) }
+                └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
+                  └─StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
+                    ├─StreamFilter { predicate: (event_type = 2:Int32) }
+                    │ └─StreamShare { id: 5 }
+                    │   └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+                    │     └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+                    │       └─StreamRowIdGen { row_id_index: 5 }
+                    │         └─StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
+                    │           └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+                    └─StreamExchange { dist: Broadcast }
+                      └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
+                        └─StreamNow { output: [now] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [$expr5] }
-            └── StreamFilter { predicate: (count < 20:Int32) }
-                └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
-                    └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr5, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [$expr5] }
+        └── StreamFilter { predicate: (count < 20:Int32) }
+            └── StreamHashAgg { group_key: [$expr5], aggs: [count] } { intermediate state table: 5, state tables: [], distinct tables: [] }
+                └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
             └── StreamProject { exprs: [event_type, person, auction, bid, Proctime as $expr1, _row_id], output_watermarks: [$expr1] }
                 └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 4 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr5, _row_id] }
     └── StreamDynamicFilter { predicate: ($expr1 > $expr4), output_watermarks: [$expr1], output: [event_type, auction, bid, $expr1, _row_id], cleaned_by_watermark: true }
         ├── left table: 6
         ├── right table: 7
         ├── StreamFilter { predicate: (event_type = 2:Int32) }
-        │   └── StreamExchange NoShuffle from 2
-        └── StreamExchange Broadcast from 4
+        │   └── StreamExchange NoShuffle from 3
+        └── StreamExchange Broadcast from 5
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [SubtractWithTimeZone(now, '00:05:00':Interval, 'UTC':Varchar) as $expr4], output_watermarks: [$expr4] }
     └── StreamNow { output: [now] } { state table: 8 }
 
@@ -1841,7 +1866,7 @@
     ├── columns: [ auction_id, auction_item_name, _row_id ]
     ├── primary key: [ $2 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 2 ]
     └── read pk prefix len hint: 2
 
 - id: nexmark_q105
diff --git a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml
index 3554e31d281ec..39adc39a16653 100644
--- a/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/nexmark_watermark.yaml
@@ -131,45 +131,49 @@
               └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), $expr3(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, $expr3], pk_columns: [_row_id, _row_id#1, $expr3], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
-      ├─StreamExchange { dist: HashShard($expr3) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
-      │     └─StreamShare { id: 6 }
-      │       └─StreamProject { exprs: [event_type, person, auction, _row_id] }
-      │         └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamExchange { dist: HashShard($expr4) }
-        └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] }
-          └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) }
-            └─StreamShare { id: 6 }
-              └─StreamProject { exprs: [event_type, person, auction, _row_id] }
-                └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
-                  └─StreamRowIdGen { row_id_index: 5 }
-                    └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                      └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                        └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard(_row_id, $expr3, _row_id) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
+        ├─StreamExchange { dist: HashShard($expr3) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
+        │     └─StreamShare { id: 6 }
+        │       └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+        │         └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamExchange { dist: HashShard($expr4) }
+          └─StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] }
+            └─StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) }
+              └─StreamShare { id: 6 }
+                └─StreamProject { exprs: [event_type, person, auction, _row_id] }
+                  └─StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
+                    └─StreamRowIdGen { row_id_index: 5 }
+                      └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                        └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                          └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [name, city, state, id, _row_id(hidden), $expr3(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, $expr3], pk_columns: [_row_id, _row_id#1, $expr3], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([1]) from 1
-        └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([4, 5, 6]) from 1
 
     Fragment 1
+    StreamHashJoin [append_only] { type: Inner, predicate: $expr3 = $expr4, output: [$expr5, $expr6, $expr7, $expr2, _row_id, $expr3, _row_id] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([1]) from 2
+    └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 7:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, person, auction, _row_id] }
     └── StreamFilter { predicate: (((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32)) OR ((((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32))) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -177,10 +181,10 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(person, 0:Int32) as $expr4, Field(person, 1:Int32) as $expr5, Field(person, 4:Int32) as $expr6, Field(person, 5:Int32) as $expr7, _row_id] }
     └── StreamFilter { predicate: (((Field(person, 5:Int32) = 'or':Varchar) OR (Field(person, 5:Int32) = 'id':Varchar)) OR (Field(person, 5:Int32) = 'ca':Varchar)) AND (event_type = 0:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 1 ], read pk prefix len hint: 1 }
 
@@ -194,7 +198,7 @@
 
     Table 5 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ name, city, state, id, _row_id, $expr3, _row_id#1 ], primary key: [ $4 ASC, $6 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 5 ], read pk prefix len hint: 3 }
+    Table 4294967294 { columns: [ name, city, state, id, _row_id, $expr3, _row_id#1 ], primary key: [ $4 ASC, $6 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 4, 5, 6 ], read pk prefix len hint: 3 }
 
   eowc_stream_error: |-
     Not supported: The query cannot be executed in Emit-On-Window-Close mode.
@@ -696,43 +700,48 @@
                     └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] }
-    └─StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
-      ├─StreamExchange { dist: HashShard($expr4) }
-      │ └─StreamShare { id: 6 }
-      │   └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
-      │     └─StreamFilter { predicate: (event_type = 2:Int32) }
-      │       └─StreamRowIdGen { row_id_index: 5 }
-      │         └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │           └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │             └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamExchange { dist: HashShard(max($expr4)) }
-        └─StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] }
-          └─StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] }
-            └─StreamExchange { dist: HashShard($expr5) }
-              └─StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] }
-                └─StreamShare { id: 6 }
-                  └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
-                    └─StreamFilter { predicate: (event_type = 2:Int32) }
-                      └─StreamRowIdGen { row_id_index: 5 }
-                        └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                          └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                            └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr4, _row_id, $expr5) }
+      └─StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
+        ├─StreamExchange { dist: HashShard($expr4) }
+        │ └─StreamShare { id: 6 }
+        │   └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
+        │     └─StreamFilter { predicate: (event_type = 2:Int32) }
+        │       └─StreamRowIdGen { row_id_index: 5 }
+        │         └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │           └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │             └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamExchange { dist: HashShard(max($expr4)) }
+          └─StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] }
+            └─StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] }
+              └─StreamExchange { dist: HashShard($expr5) }
+                └─StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] }
+                  └─StreamShare { id: 6 }
+                    └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
+                      └─StreamFilter { predicate: (event_type = 2:Int32) }
+                        └─StreamRowIdGen { row_id_index: 5 }
+                          └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                            └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                              └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
-    StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] } { materialized table: 4294967294 }
-    └── StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
-        ├── left table: 0
-        ├── right table: 2
-        ├── left degree table: 1
-        ├── right degree table: 3
-        ├── StreamExchange Hash([2]) from 1
-        └── StreamExchange Hash([1]) from 3
+    StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time, $expr5(hidden)] }
+    ├── materialized table: 4294967294
+    └── StreamExchange Hash([1, 4, 5]) from 1
 
     Fragment 1
-    StreamNoOp
-    └── StreamExchange NoShuffle from 2
+    StreamHashJoin [interval] { type: Inner, predicate: $expr4 = max($expr4) AND ($expr1 >= $expr6) AND ($expr1 <= $expr5), conditions_to_clean_left_state_table: ($expr1 >= $expr6), conditions_to_clean_right_state_table: ($expr1 <= $expr5), output_watermarks: [$expr1, $expr5], output: [$expr2, $expr4, $expr3, $expr1, _row_id, $expr5] }
+    ├── left table: 0
+    ├── right table: 2
+    ├── left degree table: 1
+    ├── right degree table: 3
+    ├── StreamExchange Hash([2]) from 2
+    └── StreamExchange Hash([1]) from 4
 
     Fragment 2
+    StreamNoOp
+    └── StreamExchange NoShuffle from 3
+
+    Fragment 3
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: (event_type = 2:Int32) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -740,14 +749,14 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [$expr5, max($expr4), ($expr5 - '00:00:10':Interval) as $expr6], output_watermarks: [$expr5, $expr6] }
     └── StreamHashAgg [append_only] { group_key: [$expr5], aggs: [max($expr4), count], output_watermarks: [$expr5] } { intermediate state table: 6, state tables: [], distinct tables: [] }
-        └── StreamExchange Hash([0]) from 4
+        └── StreamExchange Hash([0]) from 5
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [(TumbleStart($expr1, '00:00:10':Interval) + '00:00:10':Interval) as $expr5, $expr4, _row_id], output_watermarks: [$expr5] }
-    └── StreamExchange NoShuffle from 2
+    └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr2, $expr3, $expr4, $expr1, _row_id ], primary key: [ $2 ASC, $4 ASC ], value indices: [ 0, 1, 2, 3, 4 ], distribution key: [ 2 ], read pk prefix len hint: 1 }
 
@@ -763,7 +772,7 @@
 
     Table 6 { columns: [ $expr5, max($expr4), count ], primary key: [ $0 ASC ], value indices: [ 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ auction, price, bidder, date_time, _row_id, $expr5 ], primary key: [ $4 ASC, $5 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 1 ], read pk prefix len hint: 3 }
+    Table 4294967294 { columns: [ auction, price, bidder, date_time, _row_id, $expr5 ], primary key: [ $4 ASC, $5 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 1, 4, 5 ], read pk prefix len hint: 3 }
 
   eowc_stream_plan: |-
     StreamMaterialize { columns: [auction, price, bidder, date_time, _row_id(hidden), $expr5(hidden)], stream_key: [_row_id, $expr5, price], pk_columns: [_row_id, $expr5, price], pk_conflict: NoCheck, watermark_columns: [date_time] }
@@ -845,52 +854,56 @@
                   └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime, $expr5(hidden), $expr6(hidden), $expr8(hidden)] }
-    └─StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all }
-      ├─StreamExchange { dist: HashShard($expr3, $expr2, $expr5) }
-      │ └─StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] }
-      │   └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) }
-      │     └─StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] }
-      │       └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] }
-      │         └─StreamFilter { predicate: (event_type = 0:Int32) }
-      │           └─StreamShare { id: 6 }
-      │             └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
-      │               └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
-      │                 └─StreamRowIdGen { row_id_index: 5 }
-      │                   └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │                     └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │                       └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] }
-        └─StreamExchange { dist: HashShard($expr7, $expr6, $expr8) }
-          └─StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] }
-            └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] }
-              └─StreamFilter { predicate: (event_type = 1:Int32) }
-                └─StreamShare { id: 6 }
-                  └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
-                    └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
-                      └─StreamRowIdGen { row_id_index: 5 }
-                        └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                          └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                            └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) }
+      └─StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all }
+        ├─StreamExchange { dist: HashShard($expr3, $expr2, $expr5) }
+        │ └─StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] }
+        │   └─StreamExchange { dist: HashShard($expr3, $expr4, $expr2, $expr5) }
+        │     └─StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] }
+        │       └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] }
+        │         └─StreamFilter { predicate: (event_type = 0:Int32) }
+        │           └─StreamShare { id: 6 }
+        │             └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
+        │               └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+        │                 └─StreamRowIdGen { row_id_index: 5 }
+        │                   └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │                     └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │                       └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] }
+          └─StreamExchange { dist: HashShard($expr7, $expr6, $expr8) }
+            └─StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] }
+              └─StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] }
+                └─StreamFilter { predicate: (event_type = 1:Int32) }
+                  └─StreamShare { id: 6 }
+                    └─StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
+                      └─StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
+                        └─StreamRowIdGen { row_id_index: 5 }
+                          └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                            └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                              └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime, $expr5(hidden), $expr6(hidden), $expr8(hidden)] }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0, 2, 3]) from 1
-        └── StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } { state table: 7 }
-            └── StreamExchange Hash([0, 1, 2]) from 4
+    └── StreamExchange Hash([0, 1, 2, 3]) from 1
 
     Fragment 1
-    StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } { state table: 4 }
-    └── StreamExchange Hash([0, 1, 2, 3]) from 2
+    StreamHashJoin [window, append_only] { type: Inner, predicate: $expr2 = $expr6 AND $expr5 = $expr8 AND $expr3 = $expr7, output_watermarks: [$expr2, $expr5, $expr6, $expr8], output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0, 2, 3]) from 2
+    └── StreamAppendOnlyDedup { dedup_cols: [$expr7, $expr6, $expr8] } { state table: 7 }
+        └── StreamExchange Hash([0, 1, 2]) from 5
 
     Fragment 2
+    StreamAppendOnlyDedup { dedup_cols: [$expr3, $expr4, $expr2, $expr5] } { state table: 4 }
+    └── StreamExchange Hash([0, 1, 2, 3]) from 3
+
+    Fragment 3
     StreamProject { exprs: [Field(person, 0:Int32) as $expr3, Field(person, 1:Int32) as $expr4, $expr2, ($expr2 + '00:00:10':Interval) as $expr5], output_watermarks: [$expr2, $expr5] }
     └── StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr2, _row_id], output_watermarks: [$expr1, $expr2] }
         └── StreamFilter { predicate: (event_type = 0:Int32) }
-            └── StreamExchange NoShuffle from 3
+            └── StreamExchange NoShuffle from 4
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [event_type, person, auction, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: ((event_type = 0:Int32) OR (event_type = 1:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -898,11 +911,11 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 6 }
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [Field(auction, 7:Int32) as $expr7, $expr6, ($expr6 + '00:00:10':Interval) as $expr8], output_watermarks: [$expr6, $expr8] }
     └── StreamProject { exprs: [event_type, person, auction, $expr1, TumbleStart($expr1, '00:00:10':Interval) as $expr6, _row_id], output_watermarks: [$expr1, $expr6] }
         └── StreamFilter { predicate: (event_type = 1:Int32) }
-            └── StreamExchange NoShuffle from 3
+            └── StreamExchange NoShuffle from 4
 
     Table 0 { columns: [ $expr3, $expr4, $expr2, $expr5 ], primary key: [ $2 ASC, $3 ASC, $0 ASC, $1 ASC ], value indices: [ 0, 1, 2, 3 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 3 }
 
@@ -920,7 +933,7 @@
 
     Table 7 { columns: [ $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 1, 2 ], read pk prefix len hint: 3 }
 
-    Table 4294967294 { columns: [ id, name, starttime, $expr5, $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 2, 3 ], read pk prefix len hint: 4 }
+    Table 4294967294 { columns: [ id, name, starttime, $expr5, $expr7, $expr6, $expr8 ], primary key: [ $0 ASC, $1 ASC, $2 ASC, $3 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0, 1, 2, 3 ], read pk prefix len hint: 4 }
 
   eowc_stream_plan: |-
     StreamMaterialize { columns: [id, name, starttime, $expr5(hidden), $expr7(hidden), $expr6(hidden), $expr8(hidden)], stream_key: [id, name, starttime, $expr5], pk_columns: [id, name, starttime, $expr5], pk_conflict: NoCheck, watermark_columns: [starttime] }
@@ -1715,41 +1728,45 @@
               └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
-    └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] }
-      │   └─StreamFilter { predicate: (event_type = 2:Int32) }
-      │     └─StreamShare { id: 6 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-      │         └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamExchange { dist: HashShard($expr7) }
-        └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] }
-          └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
-            └─StreamShare { id: 6 }
-              └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
-                └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
-                  └─StreamRowIdGen { row_id_index: 5 }
-                    └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                      └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                        └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id, _row_id) }
+      └─StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] }
+        │   └─StreamFilter { predicate: (event_type = 2:Int32) }
+        │     └─StreamShare { id: 6 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+        │         └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamExchange { dist: HashShard($expr7) }
+          └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] }
+            └─StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
+              └─StreamShare { id: 6 }
+                └─StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
+                  └─StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
+                    └─StreamRowIdGen { row_id_index: 5 }
+                      └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                        └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                          └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, auction], pk_columns: [_row_id, _row_id#1, auction], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 14, 15]) from 1
 
     Fragment 1
+    StreamHashJoin [append_only] { type: Inner, predicate: $expr2 = $expr7, output: [$expr2, $expr3, $expr4, $expr5, $expr6, $expr1, $expr8, $expr9, $expr10, $expr11, $expr1, $expr12, $expr13, $expr14, _row_id, _row_id] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr2, Field(bid, 1:Int32) as $expr3, Field(bid, 2:Int32) as $expr4, Field(bid, 3:Int32) as $expr5, Field(bid, 4:Int32) as $expr6, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: (event_type = 2:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, $expr1, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: ((event_type = 2:Int32) OR ((Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32))) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -1757,10 +1774,10 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr7, Field(auction, 1:Int32) as $expr8, Field(auction, 2:Int32) as $expr9, Field(auction, 3:Int32) as $expr10, Field(auction, 4:Int32) as $expr11, $expr1, Field(auction, 6:Int32) as $expr12, Field(auction, 7:Int32) as $expr13, Field(auction, 8:Int32) as $expr14, _row_id], output_watermarks: [$expr1] }
     └── StreamFilter { predicate: (Field(auction, 8:Int32) = 10:Int32) AND (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr2, $expr3, $expr4, $expr5, $expr6, $expr1, _row_id ], primary key: [ $0 ASC, $6 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
@@ -1774,7 +1791,12 @@
 
     Table 5 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ], primary key: [ $14 ASC, $15 ASC, $0 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ], distribution key: [ 0 ], read pk prefix len hint: 3 }
+    Table 4294967294
+    ├── columns: [ auction, bidder, price, channel, url, date_timeb, item_name, description, initial_bid, reserve, date_timea, expires, seller, category, _row_id, _row_id#1 ]
+    ├── primary key: [ $14 ASC, $15 ASC, $0 ASC ]
+    ├── value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]
+    ├── distribution key: [ 0, 14, 15 ]
+    └── read pk prefix len hint: 3
 
   eowc_stream_error: |-
     Not supported: The query cannot be executed in Emit-On-Window-Close mode.
@@ -1909,45 +1931,49 @@
                 └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr4(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (event_type = 1:Int32) }
-      │     └─StreamShare { id: 6 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
-      │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamProject { exprs: [$expr4, max($expr5)] }
-        └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] }
-          └─StreamExchange { dist: HashShard($expr4) }
-            └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] }
-              └─StreamFilter { predicate: (event_type = 2:Int32) }
-                └─StreamShare { id: 6 }
-                  └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
-                    └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-                      └─StreamRowIdGen { row_id_index: 5 }
-                        └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                          └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                            └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (event_type = 1:Int32) }
+        │     └─StreamShare { id: 6 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+        │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamProject { exprs: [$expr4, max($expr5)] }
+          └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] }
+            └─StreamExchange { dist: HashShard($expr4) }
+              └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] }
+                └─StreamFilter { predicate: (event_type = 2:Int32) }
+                  └─StreamShare { id: 6 }
+                    └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+                      └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+                        └─StreamRowIdGen { row_id_index: 5 }
+                          └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                            └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                              └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, current_highest_bid, _row_id(hidden), $expr4(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [$expr4, max($expr5)] }
-            └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
-                └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 3]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftOuter, predicate: $expr2 = $expr4, output: [$expr2, $expr3, max($expr5), _row_id, $expr4] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [$expr4, max($expr5)] }
+        └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [max($expr5), count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+            └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, _row_id] }
     └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -1955,10 +1981,10 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, Field(bid, 2:Int32) as $expr5, _row_id] }
     └── StreamFilter { predicate: (event_type = 2:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
@@ -1978,7 +2004,7 @@
     ├── columns: [ auction_id, auction_item_name, current_highest_bid, _row_id, $expr4 ]
     ├── primary key: [ $3 ASC, $0 ASC ]
     ├── value indices: [ 0, 1, 2, 3, 4 ]
-    ├── distribution key: [ 0 ]
+    ├── distribution key: [ 0, 3 ]
     └── read pk prefix len hint: 2
 
   eowc_stream_error: |-
@@ -2184,47 +2210,51 @@
                     └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (event_type = 1:Int32) }
-      │     └─StreamShare { id: 6 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
-      │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamProject { exprs: [$expr4] }
-        └─StreamFilter { predicate: (count >= 20:Int32) }
-          └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
-            └─StreamExchange { dist: HashShard($expr4) }
-              └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
-                └─StreamFilter { predicate: (event_type = 2:Int32) }
-                  └─StreamShare { id: 6 }
-                    └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
-                      └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-                        └─StreamRowIdGen { row_id_index: 5 }
-                          └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                            └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                              └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (event_type = 1:Int32) }
+        │     └─StreamShare { id: 6 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+        │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamProject { exprs: [$expr4] }
+          └─StreamFilter { predicate: (count >= 20:Int32) }
+            └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
+              └─StreamExchange { dist: HashShard($expr4) }
+                └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
+                  └─StreamFilter { predicate: (event_type = 2:Int32) }
+                    └─StreamShare { id: 6 }
+                      └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+                        └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+                          └─StreamRowIdGen { row_id_index: 5 }
+                            └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                              └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                                └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [$expr4] }
-            └── StreamFilter { predicate: (count >= 20:Int32) }
-                └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
-                    └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftSemi, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [$expr4] }
+        └── StreamFilter { predicate: (count >= 20:Int32) }
+            └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+                └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, _row_id] }
     └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -2232,10 +2262,10 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
     └── StreamFilter { predicate: (event_type = 2:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
@@ -2251,7 +2281,7 @@
 
     Table 6 { columns: [ $expr4, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 2 }
+    Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 2 ], read pk prefix len hint: 2 }
 
   eowc_stream_error: |-
     Not supported: The query cannot be executed in Emit-On-Window-Close mode.
@@ -2290,47 +2320,51 @@
                     └─BatchSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all }
-      ├─StreamExchange { dist: HashShard($expr2) }
-      │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
-      │   └─StreamFilter { predicate: (event_type = 1:Int32) }
-      │     └─StreamShare { id: 6 }
-      │       └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
-      │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-      │           └─StreamRowIdGen { row_id_index: 5 }
-      │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-      │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-      │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
-      └─StreamProject { exprs: [$expr4] }
-        └─StreamFilter { predicate: (count < 20:Int32) }
-          └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
-            └─StreamExchange { dist: HashShard($expr4) }
-              └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
-                └─StreamFilter { predicate: (event_type = 2:Int32) }
-                  └─StreamShare { id: 6 }
-                    └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
-                      └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
-                        └─StreamRowIdGen { row_id_index: 5 }
-                          └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
-                            └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
-                              └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+    └─StreamExchange { dist: HashShard($expr2, _row_id) }
+      └─StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all }
+        ├─StreamExchange { dist: HashShard($expr2) }
+        │ └─StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
+        │   └─StreamFilter { predicate: (event_type = 1:Int32) }
+        │     └─StreamShare { id: 6 }
+        │       └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+        │         └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+        │           └─StreamRowIdGen { row_id_index: 5 }
+        │             └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+        │               └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+        │                 └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
+        └─StreamProject { exprs: [$expr4] }
+          └─StreamFilter { predicate: (count < 20:Int32) }
+            └─StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] }
+              └─StreamExchange { dist: HashShard($expr4) }
+                └─StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
+                  └─StreamFilter { predicate: (event_type = 2:Int32) }
+                    └─StreamShare { id: 6 }
+                      └─StreamProject { exprs: [event_type, auction, bid, _row_id] }
+                        └─StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
+                          └─StreamRowIdGen { row_id_index: 5 }
+                            └─StreamWatermarkFilter { watermark_descs: [Desc { column: $expr1, expr: ($expr1 - '00:00:04':Interval) }], output_watermarks: [$expr1] }
+                              └─StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
+                                └─StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [auction_id, auction_item_name, _row_id(hidden)], stream_key: [_row_id, auction_id], pk_columns: [_row_id, auction_id], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [$expr4] }
-            └── StreamFilter { predicate: (count < 20:Int32) }
-                └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
-                    └── StreamExchange Hash([0]) from 3
+    └── StreamExchange Hash([0, 2]) from 1
 
     Fragment 1
+    StreamHashJoin { type: LeftAnti, predicate: $expr2 = $expr4, output: all } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [$expr4] }
+        └── StreamFilter { predicate: (count < 20:Int32) }
+            └── StreamHashAgg [append_only] { group_key: [$expr4], aggs: [count] } { intermediate state table: 6, state tables: [], distinct tables: [] }
+                └── StreamExchange Hash([0]) from 4
+
+    Fragment 2
     StreamProject { exprs: [Field(auction, 0:Int32) as $expr2, Field(auction, 1:Int32) as $expr3, _row_id] }
     └── StreamFilter { predicate: (event_type = 1:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
-    Fragment 2
+    Fragment 3
     StreamProject { exprs: [event_type, auction, bid, _row_id] }
     └── StreamFilter { predicate: ((event_type = 1:Int32) OR (event_type = 2:Int32)) }
         └── StreamRowIdGen { row_id_index: 5 }
@@ -2338,10 +2372,10 @@
                 └── StreamProject { exprs: [event_type, person, auction, bid, Case((event_type = 0:Int32), Field(person, 6:Int32), (event_type = 1:Int32), Field(auction, 5:Int32), Field(bid, 5:Int32)) as $expr1, _row_id] }
                     └── StreamSource { source: nexmark, columns: [event_type, person, auction, bid, _row_id] } { source state table: 5 }
 
-    Fragment 3
+    Fragment 4
     StreamProject { exprs: [Field(bid, 0:Int32) as $expr4, _row_id] }
     └── StreamFilter { predicate: (event_type = 2:Int32) }
-        └── StreamExchange NoShuffle from 2
+        └── StreamExchange NoShuffle from 3
 
     Table 0 { columns: [ $expr2, $expr3, _row_id ], primary key: [ $0 ASC, $2 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
@@ -2357,7 +2391,7 @@
 
     Table 6 { columns: [ $expr4, count ], primary key: [ $0 ASC ], value indices: [ 1 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0 ], read pk prefix len hint: 2 }
+    Table 4294967294 { columns: [ auction_id, auction_item_name, _row_id ], primary key: [ $2 ASC, $0 ASC ], value indices: [ 0, 1, 2 ], distribution key: [ 0, 2 ], read pk prefix len hint: 2 }
 
   eowc_stream_error: |-
     Not supported: The query cannot be executed in Emit-On-Window-Close mode.
diff --git a/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml b/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml
index 5cc81578f829c..733a19f4ba05c 100644
--- a/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/over_window_function.yaml
@@ -185,15 +185,16 @@
           └─BatchScan { table: t, columns: [t.x, t.y, t.w], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [x, y, sum, max, min, t._row_id(hidden), t.y(hidden)], stream_key: [t._row_id, y], pk_columns: [t._row_id, y], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.y = t.y, output: [t.x, t.y, sum(t.x), max(t.x), min(t.w), t._row_id, t.y] }
-      ├─StreamExchange { dist: HashShard(t.y) }
-      │ └─StreamShare { id: 1 }
-      │   └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamProject { exprs: [t.y, sum(t.x), max(t.x), min(t.w)] }
-        └─StreamHashAgg { group_key: [t.y], aggs: [sum(t.x), max(t.x), min(t.w), count] }
-          └─StreamExchange { dist: HashShard(t.y) }
-            └─StreamShare { id: 1 }
-              └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.y, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.y = t.y, output: [t.x, t.y, sum(t.x), max(t.x), min(t.w), t._row_id, t.y] }
+        ├─StreamExchange { dist: HashShard(t.y) }
+        │ └─StreamShare { id: 1 }
+        │   └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamProject { exprs: [t.y, sum(t.x), max(t.x), min(t.w)] }
+          └─StreamHashAgg { group_key: [t.y], aggs: [sum(t.x), max(t.x), min(t.w), count] }
+            └─StreamExchange { dist: HashShard(t.y) }
+              └─StreamShare { id: 1 }
+                └─StreamTableScan { table: t, columns: [t.x, t.y, t.w, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - id: aggregate with over clause, rows frame definition with implicit current row, without ORDER BY
   sql: |
     create table t(x int, y int);
@@ -913,12 +914,13 @@
         └─BatchScan { table: t, columns: [t.x, t.y, t.z], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [t1x, t2x, t1z, t2y, t2z, t._row_id(hidden)], stream_key: [t1x, t._row_id], pk_columns: [t1x, t._row_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.x = t.x, output: [t.x, t.x, t.z, t.y, t.z, t._row_id] }
-      ├─StreamGroupTopN { order: [t.y ASC], limit: 1, offset: 0, group_key: [t.x] }
-      │ └─StreamExchange { dist: HashShard(t.x) }
-      │   └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(t.x) }
-        └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.x, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.x = t.x, output: [t.x, t.x, t.z, t.y, t.z, t._row_id] }
+        ├─StreamGroupTopN { order: [t.y ASC], limit: 1, offset: 0, group_key: [t.x] }
+        │ └─StreamExchange { dist: HashShard(t.x) }
+        │   └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(t.x) }
+          └─StreamTableScan { table: t, columns: [t.x, t.y, t.z, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - id: split calls with different ORDER BY or PARTITION BY
   sql: |
     create table t(x int, y int, z int);
diff --git a/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml b/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml
index 55131ed1614cd..65469e7754e6b 100644
--- a/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/pk_derive.yaml
@@ -21,15 +21,16 @@
         Tone.id = Ttwo.id;
   stream_plan: |-
     StreamMaterialize { columns: [max_v1, max_v2, t1.id(hidden), t2.id(hidden)], stream_key: [t1.id], pk_columns: [t1.id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.id = t2.id, output: [max(t1.v1), max(t2.v2), t1.id, t2.id] }
-      ├─StreamProject { exprs: [t1.id, max(t1.v1)] }
-      │ └─StreamHashAgg { group_key: [t1.id], aggs: [max(t1.v1), count] }
-      │   └─StreamExchange { dist: HashShard(t1.id) }
-      │     └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamProject { exprs: [t2.id, max(t2.v2)] }
-        └─StreamHashAgg { group_key: [t2.id], aggs: [max(t2.v2), count] }
-          └─StreamExchange { dist: HashShard(t2.id) }
-            └─StreamTableScan { table: t2, columns: [t2.id, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.id = t2.id, output: [max(t1.v1), max(t2.v2), t1.id, t2.id] }
+        ├─StreamProject { exprs: [t1.id, max(t1.v1)] }
+        │ └─StreamHashAgg { group_key: [t1.id], aggs: [max(t1.v1), count] }
+        │   └─StreamExchange { dist: HashShard(t1.id) }
+        │     └─StreamTableScan { table: t1, columns: [t1.id, t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamProject { exprs: [t2.id, max(t2.v2)] }
+          └─StreamHashAgg { group_key: [t2.id], aggs: [max(t2.v2), count] }
+            └─StreamExchange { dist: HashShard(t2.id) }
+              └─StreamTableScan { table: t2, columns: [t2.id, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - sql: |
     create table t (id int, v int);
     SELECT Tone.max_v, Ttwo.min_v
@@ -51,15 +52,16 @@
         Tone.id = Ttwo.id;
   stream_plan: |-
     StreamMaterialize { columns: [max_v, min_v, t.id(hidden), t.id#1(hidden)], stream_key: [t.id], pk_columns: [t.id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.id = t.id, output: [max(t.v), min(t.v), t.id, t.id] }
-      ├─StreamProject { exprs: [t.id, max(t.v)] }
-      │ └─StreamHashAgg { group_key: [t.id], aggs: [max(t.v), count] }
-      │   └─StreamExchange { dist: HashShard(t.id) }
-      │     └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamProject { exprs: [t.id, min(t.v)] }
-        └─StreamHashAgg { group_key: [t.id], aggs: [min(t.v), count] }
-          └─StreamExchange { dist: HashShard(t.id) }
-            └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.id) }
+      └─StreamHashJoin { type: Inner, predicate: t.id = t.id, output: [max(t.v), min(t.v), t.id, t.id] }
+        ├─StreamProject { exprs: [t.id, max(t.v)] }
+        │ └─StreamHashAgg { group_key: [t.id], aggs: [max(t.v), count] }
+        │   └─StreamExchange { dist: HashShard(t.id) }
+        │     └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamProject { exprs: [t.id, min(t.v)] }
+          └─StreamHashAgg { group_key: [t.id], aggs: [min(t.v), count] }
+            └─StreamExchange { dist: HashShard(t.id) }
+              └─StreamTableScan { table: t, columns: [t.id, t.v, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - sql: |
     create table t (v1 varchar, v2 varchar, v3 varchar);
     select
diff --git a/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml b/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml
index ae37459ef7bed..91dff73df0e6a 100644
--- a/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/predicate_pushdown.yaml
@@ -260,15 +260,16 @@
     └─LogicalScan { table: t2, columns: [t2.v2], predicate: (t2.v2 > ('2021-04-01 00:00:00+00:00':Timestamptz + '01:00:00':Interval)) }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1], pk_columns: [t1._row_id, t2._row_id, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
-      │   ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │   └─StreamExchange { dist: Broadcast }
-      │     └─StreamProject { exprs: [AddWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
-      │       └─StreamNow { output: [now] }
-      └─StreamExchange { dist: HashShard(t2.v2) }
-        └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamDynamicFilter { predicate: (t1.v1 > $expr1), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
+        │   ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │   └─StreamExchange { dist: Broadcast }
+        │     └─StreamProject { exprs: [AddWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+        │       └─StreamNow { output: [now] }
+        └─StreamExchange { dist: HashShard(t2.v2) }
+          └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: now() in a complex cmp expr does not get pushed down
   sql: |
     create table t1(v1 timestamp with time zone);
@@ -343,14 +344,15 @@
     └─LogicalScan { table: t2, columns: [t2.v2], predicate: (t2.v2 > '2021-04-01 00:00:00+00:00':Timestamptz) }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, v1], pk_columns: [t1._row_id, t2._row_id, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamDynamicFilter { predicate: (t1.v1 > now), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
-      │   ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │   └─StreamExchange { dist: Broadcast }
-      │     └─StreamNow { output: [now] }
-      └─StreamExchange { dist: HashShard(t2.v2) }
-        └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.v1 = t2.v2, output: [t1.v1, t2.v2, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamDynamicFilter { predicate: (t1.v1 > now), output_watermarks: [t1.v1], output: [t1.v1, t1._row_id], cleaned_by_watermark: true }
+        │   ├─StreamTableScan { table: t1, columns: [t1.v1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │   └─StreamExchange { dist: Broadcast }
+        │     └─StreamNow { output: [now] }
+        └─StreamExchange { dist: HashShard(t2.v2) }
+          └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: eq-predicate derived condition is banned for mismatching types
   sql: |
     create table t1(v1 int, v2 int);
diff --git a/src/frontend/planner_test/tests/testdata/output/project_set.yaml b/src/frontend/planner_test/tests/testdata/output/project_set.yaml
index 23db668a070df..676772d99d72e 100644
--- a/src/frontend/planner_test/tests/testdata/output/project_set.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/project_set.yaml
@@ -155,17 +155,18 @@
           └─BatchScan { table: t, columns: [t.x], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [unnest, t._row_id(hidden), projected_row_id(hidden), t._row_id#1(hidden), projected_row_id#1(hidden)], stream_key: [t._row_id, projected_row_id, t._row_id#1, projected_row_id#1, unnest], pk_columns: [t._row_id, projected_row_id, t._row_id#1, projected_row_id#1, unnest], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id] }
-      ├─StreamExchange { dist: HashShard(Unnest($0)) }
-      │ └─StreamShare { id: 3 }
-      │   └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
-      │     └─StreamProjectSet { select_list: [Unnest($0), $1] }
-      │       └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(Unnest($0)) }
-        └─StreamShare { id: 3 }
-          └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
-            └─StreamProjectSet { select_list: [Unnest($0), $1] }
-              └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id) }
+      └─StreamHashJoin { type: Inner, predicate: Unnest($0) = Unnest($0), output: [Unnest($0), t._row_id, projected_row_id, t._row_id, projected_row_id] }
+        ├─StreamExchange { dist: HashShard(Unnest($0)) }
+        │ └─StreamShare { id: 3 }
+        │   └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
+        │     └─StreamProjectSet { select_list: [Unnest($0), $1] }
+        │       └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(Unnest($0)) }
+          └─StreamShare { id: 3 }
+            └─StreamProject { exprs: [Unnest($0), t._row_id, projected_row_id] }
+              └─StreamProjectSet { select_list: [Unnest($0), $1] }
+                └─StreamTableScan { table: t, columns: [t.x, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - name: issue-10080
   sql: |
     with cte as (SELECT 1 as v1, unnest(array[1,2,3,4,5]) AS v2) select v1 from cte;
diff --git a/src/frontend/planner_test/tests/testdata/output/select_except.yaml b/src/frontend/planner_test/tests/testdata/output/select_except.yaml
index 2193524b7076f..ffd6da30b90bc 100644
--- a/src/frontend/planner_test/tests/testdata/output/select_except.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/select_except.yaml
@@ -34,11 +34,12 @@
         └─BatchScan { table: t, columns: [t.v1, t.v2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v3, v2, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v1], pk_columns: [t._row_id, t._row_id#1, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v3, t.v2, t._row_id, t._row_id] }
-      ├─StreamExchange { dist: HashShard(t.v1) }
-      │ └─StreamTableScan { table: t, columns: [t.v1, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(t.v1) }
-        └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v3, t.v2, t._row_id, t._row_id] }
+        ├─StreamExchange { dist: HashShard(t.v1) }
+        │ └─StreamTableScan { table: t, columns: [t.v1, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(t.v1) }
+          └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - name: qualified wildcard
   sql: |
     create table t (v1 int, v2 int, v3 int);
@@ -52,11 +53,12 @@
         └─BatchScan { table: t, columns: [t.v1, t.v2, t.v3], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, v2, v3, t._row_id(hidden), t._row_id#1(hidden)], stream_key: [t._row_id, t._row_id#1, v1], pk_columns: [t._row_id, t._row_id#1, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v2, t.v3, t._row_id, t._row_id] }
-      ├─StreamExchange { dist: HashShard(t.v1) }
-      │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
-      └─StreamExchange { dist: HashShard(t.v1) }
-        └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+    └─StreamExchange { dist: HashShard(t.v1, t._row_id, t._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t.v1 = t.v1, output: [t.v1, t.v2, t.v3, t._row_id, t._row_id] }
+        ├─StreamExchange { dist: HashShard(t.v1) }
+        │ └─StreamTableScan { table: t, columns: [t.v1, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
+        └─StreamExchange { dist: HashShard(t.v1) }
+          └─StreamTableScan { table: t, columns: [t.v1, t.v2, t.v3, t._row_id], pk: [t._row_id], dist: UpstreamHashShard(t._row_id) }
 - name: except with unknown column
   sql: |
     create table t (v1 int, v2 int, v3 int);
diff --git a/src/frontend/planner_test/tests/testdata/output/share.yaml b/src/frontend/planner_test/tests/testdata/output/share.yaml
index 2815b00784b1d..15404d6d863ab 100644
--- a/src/frontend/planner_test/tests/testdata/output/share.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/share.yaml
@@ -343,24 +343,25 @@
           └─BatchSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [a_id, b_id, a_ts, b_ts, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, a_id], pk_columns: [_row_id, _row_id#1, a_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: id = id, output: [id, id, date_time, date_time, _row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard(id) }
-      │ └─StreamProject { exprs: [id, date_time, _row_id] }
-      │   └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [id, date_time, $expr1, _row_id], cleaned_by_watermark: true }
-      │     ├─StreamProject { exprs: [id, date_time, AtTimeZone(date_time, 'UTC':Varchar) as $expr1, _row_id] }
-      │     │ └─StreamFilter { predicate: (initial_bid = 1:Int32) }
-      │     │   └─StreamShare { id: 4 }
-      │     │     └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
-      │     │       └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
-      │     │         └─StreamRowIdGen { row_id_index: 10 }
-      │     │           └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
-      │     └─StreamExchange { dist: Broadcast }
-      │       └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:00:01':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
-      │         └─StreamNow { output: [now] }
-      └─StreamExchange { dist: HashShard(id) }
-        └─StreamFilter { predicate: (initial_bid = 2:Int32) }
-          └─StreamShare { id: 4 }
-            └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
-              └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
-                └─StreamRowIdGen { row_id_index: 10 }
-                  └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+    └─StreamExchange { dist: HashShard(id, _row_id, _row_id) }
+      └─StreamHashJoin { type: Inner, predicate: id = id, output: [id, id, date_time, date_time, _row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard(id) }
+        │ └─StreamProject { exprs: [id, date_time, _row_id] }
+        │   └─StreamDynamicFilter { predicate: ($expr1 > $expr2), output_watermarks: [$expr1], output: [id, date_time, $expr1, _row_id], cleaned_by_watermark: true }
+        │     ├─StreamProject { exprs: [id, date_time, AtTimeZone(date_time, 'UTC':Varchar) as $expr1, _row_id] }
+        │     │ └─StreamFilter { predicate: (initial_bid = 1:Int32) }
+        │     │   └─StreamShare { id: 4 }
+        │     │     └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
+        │     │       └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
+        │     │         └─StreamRowIdGen { row_id_index: 10 }
+        │     │           └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
+        │     └─StreamExchange { dist: Broadcast }
+        │       └─StreamProject { exprs: [SubtractWithTimeZone(now, '00:00:01':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+        │         └─StreamNow { output: [now] }
+        └─StreamExchange { dist: HashShard(id) }
+          └─StreamFilter { predicate: (initial_bid = 2:Int32) }
+            └─StreamShare { id: 4 }
+              └─StreamProject { exprs: [id, initial_bid, date_time, _row_id] }
+                └─StreamFilter { predicate: ((initial_bid = 1:Int32) OR (initial_bid = 2:Int32)) }
+                  └─StreamRowIdGen { row_id_index: 10 }
+                    └─StreamSource { source: auction, columns: [id, item_name, description, initial_bid, reserve, date_time, expires, seller, category, extra, _row_id] }
diff --git a/src/frontend/planner_test/tests/testdata/output/shared_views.yaml b/src/frontend/planner_test/tests/testdata/output/shared_views.yaml
index 775812f77b59c..3777705c97ced 100644
--- a/src/frontend/planner_test/tests/testdata/output/shared_views.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/shared_views.yaml
@@ -23,19 +23,20 @@
                       └─LogicalScan { table: t1, columns: [t1.x, t1.y, t1._row_id] }
   stream_plan: |-
     StreamMaterialize { columns: [z, a, b, t1._row_id(hidden), t1._row_id#1(hidden), t1._row_id#2(hidden), t1.x(hidden)], stream_key: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, z], pk_columns: [t1._row_id, t1._row_id#1, t1._row_id#2, t1.x, z], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x] }
-      ├─StreamExchange { dist: HashShard($expr1) }
-      │ └─StreamShare { id: 3 }
-      │   └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
-      │     └─StreamFilter { predicate: (t1.y > 0:Int32) }
-      │       └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard($expr2) }
-        └─StreamProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3, t1._row_id, t1._row_id, t1.x] }
-          └─StreamHashJoin { type: Inner, predicate: t1.x = $expr1, output: [t1.x, t1.y, $expr1, t1._row_id, t1._row_id] }
-            ├─StreamExchange { dist: HashShard(t1.x) }
-            │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-            └─StreamExchange { dist: HashShard($expr1) }
-              └─StreamShare { id: 3 }
-                └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
-                  └─StreamFilter { predicate: (t1.y > 0:Int32) }
-                    └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+    └─StreamExchange { dist: HashShard($expr1, t1._row_id, t1._row_id, t1._row_id, t1.x) }
+      └─StreamHashJoin { type: Inner, predicate: $expr1 = $expr2, output: [$expr1, $expr2, $expr3, t1._row_id, t1._row_id, t1._row_id, t1.x] }
+        ├─StreamExchange { dist: HashShard($expr1) }
+        │ └─StreamShare { id: 3 }
+        │   └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
+        │     └─StreamFilter { predicate: (t1.y > 0:Int32) }
+        │       └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard($expr2) }
+          └─StreamProject { exprs: [(t1.x * $expr1) as $expr2, (t1.y * $expr1) as $expr3, t1._row_id, t1._row_id, t1.x] }
+            └─StreamHashJoin { type: Inner, predicate: t1.x = $expr1, output: [t1.x, t1.y, $expr1, t1._row_id, t1._row_id] }
+              ├─StreamExchange { dist: HashShard(t1.x) }
+              │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+              └─StreamExchange { dist: HashShard($expr1) }
+                └─StreamShare { id: 3 }
+                  └─StreamProject { exprs: [(t1.x + t1.y) as $expr1, t1._row_id] }
+                    └─StreamFilter { predicate: (t1.y > 0:Int32) }
+                      └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/subquery.yaml b/src/frontend/planner_test/tests/testdata/output/subquery.yaml
index 1ad1d9f92c418..e07e84e040929 100644
--- a/src/frontend/planner_test/tests/testdata/output/subquery.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/subquery.yaml
@@ -346,21 +346,22 @@
                     └─BatchScan { table: auction, columns: [auction.date_time], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [date_time, window_start, window_end, auction._row_id(hidden)], stream_key: [auction._row_id, window_start, window_end, date_time], pk_columns: [auction._row_id, window_start, window_end, date_time], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all }
-      ├─StreamExchange { dist: HashShard(auction.date_time) }
-      │ └─StreamShare { id: 3 }
-      │   └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
-      │     └─StreamFilter { predicate: IsNotNull(auction.date_time) }
-      │       └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
-      └─StreamProject { exprs: [auction.date_time] }
-        └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
-          └─StreamProject { exprs: [auction.date_time] }
-            └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
-              └─StreamExchange { dist: HashShard(auction.date_time) }
-                └─StreamShare { id: 3 }
-                  └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
-                    └─StreamFilter { predicate: IsNotNull(auction.date_time) }
-                      └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
+    └─StreamExchange { dist: HashShard(auction.date_time, window_start, window_end, auction._row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: auction.date_time IS NOT DISTINCT FROM auction.date_time, output: all }
+        ├─StreamExchange { dist: HashShard(auction.date_time) }
+        │ └─StreamShare { id: 3 }
+        │   └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
+        │     └─StreamFilter { predicate: IsNotNull(auction.date_time) }
+        │       └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
+        └─StreamProject { exprs: [auction.date_time] }
+          └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
+            └─StreamProject { exprs: [auction.date_time] }
+              └─StreamHashAgg { group_key: [auction.date_time], aggs: [count] }
+                └─StreamExchange { dist: HashShard(auction.date_time) }
+                  └─StreamShare { id: 3 }
+                    └─StreamHopWindow { time_col: auction.date_time, slide: 00:00:01, size: 01:00:00, output: [auction.date_time, window_start, window_end, auction._row_id] }
+                      └─StreamFilter { predicate: IsNotNull(auction.date_time) }
+                        └─StreamTableScan { table: auction, columns: [auction.date_time, auction._row_id], pk: [auction._row_id], dist: UpstreamHashShard(auction._row_id) }
 - sql: |
     CREATE TABLE t (v int);
     SELECT 1 FROM t AS t_inner WHERE EXISTS ( SELECT 1 HAVING t_inner.v > 1);
@@ -535,22 +536,23 @@
                   └─BatchScan { table: t, columns: [t.x], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [x, y, k, sum_x, t.x(hidden)], stream_key: [k, x], pk_columns: [k, x], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, t.y, t.k, sum(Unnest($0)), t.x] }
-      ├─StreamExchange { dist: HashShard(t.x) }
-      │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
-      └─StreamProject { exprs: [t.x, sum(Unnest($0))] }
-        └─StreamHashAgg { group_key: [t.x], aggs: [sum(Unnest($0)), count] }
-          └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, Unnest($0), t.x, projected_row_id] }
-            ├─StreamProject { exprs: [t.x] }
-            │ └─StreamHashAgg { group_key: [t.x], aggs: [count] }
-            │   └─StreamExchange { dist: HashShard(t.x) }
-            │     └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
-            └─StreamProject { exprs: [t.x, Unnest($0), projected_row_id] }
-              └─StreamProjectSet { select_list: [$0, Unnest($0)] }
-                └─StreamProject { exprs: [t.x] }
-                  └─StreamHashAgg { group_key: [t.x], aggs: [count] }
-                    └─StreamExchange { dist: HashShard(t.x) }
-                      └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
+    └─StreamExchange { dist: HashShard(t.x, t.k) }
+      └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, t.y, t.k, sum(Unnest($0)), t.x] }
+        ├─StreamExchange { dist: HashShard(t.x) }
+        │ └─StreamTableScan { table: t, columns: [t.x, t.y, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
+        └─StreamProject { exprs: [t.x, sum(Unnest($0))] }
+          └─StreamHashAgg { group_key: [t.x], aggs: [sum(Unnest($0)), count] }
+            └─StreamHashJoin { type: LeftOuter, predicate: t.x IS NOT DISTINCT FROM t.x, output: [t.x, Unnest($0), t.x, projected_row_id] }
+              ├─StreamProject { exprs: [t.x] }
+              │ └─StreamHashAgg { group_key: [t.x], aggs: [count] }
+              │   └─StreamExchange { dist: HashShard(t.x) }
+              │     └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
+              └─StreamProject { exprs: [t.x, Unnest($0), projected_row_id] }
+                └─StreamProjectSet { select_list: [$0, Unnest($0)] }
+                  └─StreamProject { exprs: [t.x] }
+                    └─StreamHashAgg { group_key: [t.x], aggs: [count] }
+                      └─StreamExchange { dist: HashShard(t.x) }
+                        └─StreamTableScan { table: t, columns: [t.x, t.k], pk: [t.k], dist: UpstreamHashShard(t.k) }
 - name: CorrelatedInputRef in ProjectSet and apply on condition is true.
   sql: |
     create table t(x int[], y int[], k int primary key);
@@ -582,29 +584,29 @@
     create table t(x int[], y int[], k int primary key);
     select *, (select sum(i) from (select unnest(x) i, 1 c) Q where k = c ) as sum_x from t;
   optimized_logical_plan_for_batch: |-
-    LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
+    LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
     ├─LogicalScan { table: t, columns: [t.x, t.y, t.k] }
-    └─LogicalAgg { group_key: [first_value(t.x order_by(t.x ASC)), t.k], aggs: [sum(Unnest($0))] }
-      └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(first_value(t.x order_by(t.x ASC)), first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
-        ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+    └─LogicalAgg { group_key: [internal_last_seen_value(t.x), t.k], aggs: [sum(Unnest($0))] }
+      └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(internal_last_seen_value(t.x), internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
+        ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
         │ └─LogicalScan { table: t, columns: [t.x, t.k] }
-        └─LogicalProject { exprs: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
+        └─LogicalProject { exprs: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
           └─LogicalProjectSet { select_list: [$0, $1, Unnest($0)] }
-            └─LogicalJoin { type: Inner, on: true, output: [first_value(t.x order_by(t.x ASC)), t.k] }
-              ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+            └─LogicalJoin { type: Inner, on: true, output: [internal_last_seen_value(t.x), t.k] }
+              ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
               │ └─LogicalScan { table: t, columns: [t.x, t.k], predicate: (t.k = 1:Int32) }
               └─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
   optimized_logical_plan_for_stream: |-
-    LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
+    LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(t.x, internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [t.x, t.y, t.k, sum(Unnest($0))] }
     ├─LogicalScan { table: t, columns: [t.x, t.y, t.k] }
-    └─LogicalAgg { group_key: [first_value(t.x order_by(t.x ASC)), t.k], aggs: [sum(Unnest($0))] }
-      └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(first_value(t.x order_by(t.x ASC)), first_value(t.x order_by(t.x ASC))) AND IsNotDistinctFrom(t.k, t.k), output: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
-        ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+    └─LogicalAgg { group_key: [internal_last_seen_value(t.x), t.k], aggs: [sum(Unnest($0))] }
+      └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(internal_last_seen_value(t.x), internal_last_seen_value(t.x)) AND IsNotDistinctFrom(t.k, t.k), output: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
+        ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
         │ └─LogicalScan { table: t, columns: [t.x, t.k] }
-        └─LogicalProject { exprs: [first_value(t.x order_by(t.x ASC)), t.k, Unnest($0)] }
+        └─LogicalProject { exprs: [internal_last_seen_value(t.x), t.k, Unnest($0)] }
           └─LogicalProjectSet { select_list: [$0, $1, Unnest($0)] }
-            └─LogicalJoin { type: Inner, on: true, output: [first_value(t.x order_by(t.x ASC)), t.k] }
-              ├─LogicalAgg { group_key: [t.k], aggs: [first_value(t.x order_by(t.x ASC))] }
+            └─LogicalJoin { type: Inner, on: true, output: [internal_last_seen_value(t.x), t.k] }
+              ├─LogicalAgg { group_key: [t.k], aggs: [internal_last_seen_value(t.x)] }
               │ └─LogicalScan { table: t, columns: [t.x, t.k], predicate: (t.k = 1:Int32) }
               └─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
 - name: CorrelatedInputRef in ProjectSet and apply on condition refers to table function.
@@ -632,16 +634,17 @@
                       └─BatchScan { table: integers, columns: [integers.i], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [i, col, integers._row_id(hidden), integers.i(hidden)], stream_key: [integers._row_id, i], pk_columns: [i, integers._row_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, row_number, integers._row_id, integers.i] }
-      ├─StreamExchange { dist: HashShard(integers.i) }
-      │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
-      └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
-        └─StreamProject { exprs: [integers.i, row_number, integers._row_id] }
-          └─StreamOverWindow { window_functions: [row_number() OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
-            └─StreamExchange { dist: HashShard(integers.i) }
-              └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
-                └─StreamFilter { predicate: IsNotNull(integers.i) }
-                  └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+    └─StreamExchange { dist: HashShard(integers.i, integers._row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, row_number, integers._row_id, integers.i] }
+        ├─StreamExchange { dist: HashShard(integers.i) }
+        │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+        └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
+          └─StreamProject { exprs: [integers.i, row_number, integers._row_id] }
+            └─StreamOverWindow { window_functions: [row_number() OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
+              └─StreamExchange { dist: HashShard(integers.i) }
+                └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
+                  └─StreamFilter { predicate: IsNotNull(integers.i) }
+                    └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
 - name: test over window subquery 2 (with nested loop join so cannot be transformed into a stream plan)
   sql: |
     CREATE TABLE integers(i INTEGER);
@@ -690,16 +693,17 @@
                       └─BatchScan { table: integers, columns: [integers.i], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [i, col, integers._row_id(hidden), integers.i(hidden)], stream_key: [integers._row_id, i], pk_columns: [i, integers._row_id], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, sum, integers._row_id, integers.i] }
-      ├─StreamExchange { dist: HashShard(integers.i) }
-      │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
-      └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
-        └─StreamProject { exprs: [integers.i, sum, integers._row_id] }
-          └─StreamOverWindow { window_functions: [sum(integers.i) OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
-            └─StreamExchange { dist: HashShard(integers.i) }
-              └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
-                └─StreamFilter { predicate: IsNotNull(integers.i) }
-                  └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+    └─StreamExchange { dist: HashShard(integers.i, integers._row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: integers.i IS NOT DISTINCT FROM integers.i, output: [integers.i, sum, integers._row_id, integers.i] }
+        ├─StreamExchange { dist: HashShard(integers.i) }
+        │ └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+        └─StreamGroupTopN { order: [integers.i ASC], limit: 1, offset: 0, group_key: [integers.i] }
+          └─StreamProject { exprs: [integers.i, sum, integers._row_id] }
+            └─StreamOverWindow { window_functions: [sum(integers.i) OVER(PARTITION BY integers.i ORDER BY integers.i ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
+              └─StreamExchange { dist: HashShard(integers.i) }
+                └─StreamProject { exprs: [integers.i, integers.i, integers._row_id] }
+                  └─StreamFilter { predicate: IsNotNull(integers.i) }
+                    └─StreamTableScan { table: integers, columns: [integers.i, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
 - name: test over window subquery 4  (with nested loop join so cannot be transformed into a stream plan)
   sql: |
     CREATE TABLE integers(i INTEGER);
@@ -747,17 +751,18 @@
                     └─BatchScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [i, integers._row_id(hidden), $expr1(hidden), integers.correlated_col(hidden)], stream_key: [integers._row_id, $expr1, integers.correlated_col], pk_columns: [integers._row_id, $expr1, integers.correlated_col], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: $expr1 = sum AND integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.i, integers._row_id, $expr1, integers.correlated_col] }
-      ├─StreamExchange { dist: HashShard(integers.correlated_col, $expr1) }
-      │ └─StreamProject { exprs: [integers.i, integers.correlated_col, integers.i::Int64 as $expr1, integers._row_id] }
-      │   └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
-      └─StreamExchange { dist: HashShard(rows.correlated_col, sum) }
-        └─StreamProject { exprs: [rows.correlated_col, sum, rows._row_id, rows.k] }
-          └─StreamOverWindow { window_functions: [sum(rows.v) OVER(PARTITION BY rows.correlated_col, rows.k ORDER BY rows.v ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
-            └─StreamExchange { dist: HashShard(rows.correlated_col, rows.k) }
-              └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
-                └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
-                  └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
+    └─StreamExchange { dist: HashShard(integers._row_id, $expr1, integers.correlated_col) }
+      └─StreamHashJoin { type: LeftSemi, predicate: $expr1 = sum AND integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.i, integers._row_id, $expr1, integers.correlated_col] }
+        ├─StreamExchange { dist: HashShard(integers.correlated_col, $expr1) }
+        │ └─StreamProject { exprs: [integers.i, integers.correlated_col, integers.i::Int64 as $expr1, integers._row_id] }
+        │   └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+        └─StreamExchange { dist: HashShard(rows.correlated_col, sum) }
+          └─StreamProject { exprs: [rows.correlated_col, sum, rows._row_id, rows.k] }
+            └─StreamOverWindow { window_functions: [sum(rows.v) OVER(PARTITION BY rows.correlated_col, rows.k ORDER BY rows.v ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)] }
+              └─StreamExchange { dist: HashShard(rows.correlated_col, rows.k) }
+                └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
+                  └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
+                    └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
 - name: test cardinality visitor with correlated filter
   sql: |
     CREATE TABLE t1(i INT);
@@ -818,21 +823,22 @@
                           └─BatchScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [i, correlated_col, integers._row_id(hidden), 2:Int64(hidden)], stream_key: [integers._row_id, correlated_col, 2:Int64], pk_columns: [integers._row_id, correlated_col, 2:Int64], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: integers.correlated_col IS NOT DISTINCT FROM integers.correlated_col AND 2:Int64 = $expr1, output: [integers.i, integers.correlated_col, integers._row_id, 2:Int64] }
-      ├─StreamExchange { dist: HashShard(integers.correlated_col) }
-      │ └─StreamProject { exprs: [integers.i, integers.correlated_col, 2:Int64, integers._row_id] }
-      │   └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
-      └─StreamProject { exprs: [integers.correlated_col, (count(distinct rows.k) + count(distinct rows.v)) as $expr1] }
-        └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count(distinct rows.k), count(distinct rows.v), count] }
-          └─StreamHashJoin { type: LeftOuter, predicate: integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.correlated_col, rows.k, rows.v, rows._row_id] }
-            ├─StreamProject { exprs: [integers.correlated_col] }
-            │ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count] }
-            │   └─StreamExchange { dist: HashShard(integers.correlated_col) }
-            │     └─StreamTableScan { table: integers, columns: [integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
-            └─StreamExchange { dist: HashShard(rows.correlated_col) }
-              └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
-                └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
-                  └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
+    └─StreamExchange { dist: HashShard(integers.correlated_col, integers._row_id, 2:Int64) }
+      └─StreamHashJoin { type: LeftSemi, predicate: integers.correlated_col IS NOT DISTINCT FROM integers.correlated_col AND 2:Int64 = $expr1, output: [integers.i, integers.correlated_col, integers._row_id, 2:Int64] }
+        ├─StreamExchange { dist: HashShard(integers.correlated_col) }
+        │ └─StreamProject { exprs: [integers.i, integers.correlated_col, 2:Int64, integers._row_id] }
+        │   └─StreamTableScan { table: integers, columns: [integers.i, integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+        └─StreamProject { exprs: [integers.correlated_col, (count(distinct rows.k) + count(distinct rows.v)) as $expr1] }
+          └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count(distinct rows.k), count(distinct rows.v), count] }
+            └─StreamHashJoin { type: LeftOuter, predicate: integers.correlated_col IS NOT DISTINCT FROM rows.correlated_col, output: [integers.correlated_col, rows.k, rows.v, rows._row_id] }
+              ├─StreamProject { exprs: [integers.correlated_col] }
+              │ └─StreamHashAgg { group_key: [integers.correlated_col], aggs: [count] }
+              │   └─StreamExchange { dist: HashShard(integers.correlated_col) }
+              │     └─StreamTableScan { table: integers, columns: [integers.correlated_col, integers._row_id], pk: [integers._row_id], dist: UpstreamHashShard(integers._row_id) }
+              └─StreamExchange { dist: HashShard(rows.correlated_col) }
+                └─StreamProject { exprs: [rows.correlated_col, rows.k, rows.v, rows._row_id] }
+                  └─StreamFilter { predicate: IsNotNull(rows.correlated_col) }
+                    └─StreamTableScan { table: rows, columns: [rows.k, rows.v, rows.correlated_col, rows._row_id], pk: [rows._row_id], dist: UpstreamHashShard(rows._row_id) }
 - name: test hop window subquery 1
   sql: |
     create table t1 (k int primary key, ts timestamp);
@@ -848,12 +854,13 @@
                 └─BatchValues { rows: [[1:Int32], [2:Int32]] }
   stream_plan: |-
     StreamMaterialize { columns: [col, k, ts, window_start, window_end], stream_key: [col, window_start, window_end], pk_columns: [col, window_start, window_end], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: 1:Int32 = t1.k, output: all }
-      ├─StreamAppendOnlyDedup { dedup_cols: [1:Int32] }
-      │ └─StreamExchange { dist: HashShard(1:Int32) }
-      │   └─StreamProject { exprs: [1:Int32] }
-      │     └─StreamValues { rows: [[1:Int32, 0:Int64], [2:Int32, 1:Int64]] }
-      └─StreamExchange { dist: HashShard(t1.k) }
-        └─StreamHopWindow { time_col: t1.ts, slide: 00:10:00, size: 00:30:00, output: all }
-          └─StreamFilter { predicate: IsNotNull(t1.ts) }
-            └─StreamTableScan { table: t1, columns: [t1.k, t1.ts], pk: [t1.k], dist: UpstreamHashShard(t1.k) }
+    └─StreamExchange { dist: HashShard(1:Int32, window_start, window_end) }
+      └─StreamHashJoin { type: Inner, predicate: 1:Int32 = t1.k, output: all }
+        ├─StreamAppendOnlyDedup { dedup_cols: [1:Int32] }
+        │ └─StreamExchange { dist: HashShard(1:Int32) }
+        │   └─StreamProject { exprs: [1:Int32] }
+        │     └─StreamValues { rows: [[1:Int32, 0:Int64], [2:Int32, 1:Int64]] }
+        └─StreamExchange { dist: HashShard(t1.k) }
+          └─StreamHopWindow { time_col: t1.ts, slide: 00:10:00, size: 00:30:00, output: all }
+            └─StreamFilter { predicate: IsNotNull(t1.ts) }
+              └─StreamTableScan { table: t1, columns: [t1.k, t1.ts], pk: [t1.k], dist: UpstreamHashShard(t1.k) }
diff --git a/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml b/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml
index 6d216ad9c81c4..0d393c378ff85 100644
--- a/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/subquery_expr_correlated.yaml
@@ -466,14 +466,14 @@
                     └─LogicalScan { table: c, columns: [c.c1, c.c2, c.c3, c._row_id] }
   optimized_logical_plan_for_batch: |-
     LogicalAgg { aggs: [count] }
-    └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.a3, first_value(a.a3 order_by(a.a3 ASC))) AND IsNotDistinctFrom(b.b2, b.b2), output: [] }
+    └─LogicalJoin { type: Inner, on: IsNotDistinctFrom(a.a3, internal_last_seen_value(a.a3)) AND IsNotDistinctFrom(b.b2, b.b2), output: [] }
       ├─LogicalJoin { type: Inner, on: (a.a3 = b.b2), output: all }
       │ ├─LogicalScan { table: a, columns: [a.a3] }
       │ └─LogicalScan { table: b, columns: [b.b2] }
       └─LogicalFilter { predicate: (3:Int32 = count(1:Int32)) }
-        └─LogicalAgg { group_key: [first_value(a.a3 order_by(a.a3 ASC)), b.b2], aggs: [count(1:Int32)] }
-          └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(first_value(a.a3 order_by(a.a3 ASC)), c.c3) AND IsNotDistinctFrom(b.b2, c.c2), output: [first_value(a.a3 order_by(a.a3 ASC)), b.b2, 1:Int32] }
-            ├─LogicalAgg { group_key: [b.b2], aggs: [first_value(a.a3 order_by(a.a3 ASC))] }
+        └─LogicalAgg { group_key: [internal_last_seen_value(a.a3), b.b2], aggs: [count(1:Int32)] }
+          └─LogicalJoin { type: LeftOuter, on: IsNotDistinctFrom(internal_last_seen_value(a.a3), c.c3) AND IsNotDistinctFrom(b.b2, c.c2), output: [internal_last_seen_value(a.a3), b.b2, 1:Int32] }
+            ├─LogicalAgg { group_key: [b.b2], aggs: [internal_last_seen_value(a.a3)] }
             │ └─LogicalJoin { type: Inner, on: (a.a3 = b.b2), output: all }
             │   ├─LogicalScan { table: a, columns: [a.a3] }
             │   └─LogicalScan { table: b, columns: [b.b2] }
@@ -717,15 +717,16 @@
                 └─BatchScan { table: t2, columns: [t2.x], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [x, y, t1._row_id(hidden)], stream_key: [t1._row_id, x], pk_columns: [t1._row_id, x], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: t1.x IS NOT DISTINCT FROM t2.x, output: all }
-      ├─StreamExchange { dist: HashShard(t1.x) }
-      │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamProject { exprs: [t2.x] }
-        └─StreamGroupTopN { order: [t2.x ASC], limit: 1, offset: 0, group_key: [t2.x] }
-          └─StreamExchange { dist: HashShard(t2.x) }
-            └─StreamProject { exprs: [t2.x, t2.x, t2._row_id] }
-              └─StreamFilter { predicate: IsNotNull(t2.x) }
-                └─StreamTableScan { table: t2, columns: [t2.x, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.x, t1._row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: t1.x IS NOT DISTINCT FROM t2.x, output: all }
+        ├─StreamExchange { dist: HashShard(t1.x) }
+        │ └─StreamTableScan { table: t1, columns: [t1.x, t1.y, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamProject { exprs: [t2.x] }
+          └─StreamGroupTopN { order: [t2.x ASC], limit: 1, offset: 0, group_key: [t2.x] }
+            └─StreamExchange { dist: HashShard(t2.x) }
+              └─StreamProject { exprs: [t2.x, t2.x, t2._row_id] }
+                └─StreamFilter { predicate: IsNotNull(t2.x) }
+                  └─StreamTableScan { table: t2, columns: [t2.x, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - sql: |
     create table t1(x int, y int);
     create table t2(x int, y int);
@@ -883,14 +884,15 @@
               └─BatchScan { table: t2, columns: [t2.v2, t2.k2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1, k1], pk_columns: [t1._row_id, v1, k1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
-      ├─StreamExchange { dist: HashShard(t1.k1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
-        └─StreamExchange { dist: HashShard(t2.k2) }
-          └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
-            └─StreamFilter { predicate: IsNotNull(t2.k2) }
-              └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1.k1, t1._row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
+        ├─StreamExchange { dist: HashShard(t1.k1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
+          └─StreamExchange { dist: HashShard(t2.k2) }
+            └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
+              └─StreamFilter { predicate: IsNotNull(t2.k2) }
+                └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: test ApplyTopNTransposeRule case 2
   sql: |
     create table t1 (v1 int, k1 int);
@@ -908,16 +910,17 @@
               └─BatchScan { table: t2, columns: [t2.v2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1], pk_columns: [t1._row_id, v1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2, output: all }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v2) }
-        └─StreamProject { exprs: [t2.v2, t2._row_id] }
-          └─StreamTopN { order: [t2.v2 ASC], limit: 1, offset: 0 }
-            └─StreamExchange { dist: Single }
-              └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [$expr1] }
-                └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
-                  └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2, output: all }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v2) }
+          └─StreamProject { exprs: [t2.v2, t2._row_id] }
+            └─StreamTopN { order: [t2.v2 ASC], limit: 1, offset: 0 }
+              └─StreamExchange { dist: Single }
+                └─StreamGroupTopN { order: [t2.v2 ASC], limit: 1, offset: 0, group_key: [$expr1] }
+                  └─StreamProject { exprs: [t2.v2, t2._row_id, Vnode(t2._row_id) as $expr1] }
+                    └─StreamTableScan { table: t2, columns: [t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: test ApplyLimitTransposeRule case 1
   sql: |
     create table t1 (v1 int, k1 int);
@@ -935,11 +938,12 @@
               └─BatchScan { table: t2, columns: [t2.v2, t2.k2], distribution: SomeShard }
   stream_plan: |-
     StreamMaterialize { columns: [v1, k1, t1._row_id(hidden)], stream_key: [t1._row_id, v1, k1], pk_columns: [t1._row_id, v1, k1], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
-      ├─StreamExchange { dist: HashShard(t1.k1) }
-      │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamGroupTopN { order: [t2.k2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
-        └─StreamExchange { dist: HashShard(t2.k2) }
-          └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
-            └─StreamFilter { predicate: IsNotNull(t2.k2) }
-              └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1.k1, t1._row_id) }
+      └─StreamHashJoin { type: LeftSemi, predicate: t1.v1 = t2.v2 AND t1.k1 IS NOT DISTINCT FROM t2.k2, output: all }
+        ├─StreamExchange { dist: HashShard(t1.k1) }
+        │ └─StreamTableScan { table: t1, columns: [t1.v1, t1.k1, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamGroupTopN { order: [t2.k2 ASC], limit: 1, offset: 0, group_key: [t2.k2] }
+          └─StreamExchange { dist: HashShard(t2.k2) }
+            └─StreamProject { exprs: [t2.k2, t2.v2, t2._row_id] }
+              └─StreamFilter { predicate: IsNotNull(t2.k2) }
+                └─StreamTableScan { table: t2, columns: [t2.v2, t2.k2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml
index 6673d86fd9745..29e391853cf8a 100644
--- a/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/temporal_filter.yaml
@@ -122,19 +122,20 @@
     select * from t1 join t2 on a = b AND ta < now() - interval '1 hour' and ta >= now() - interval '2 hour';
   stream_plan: |-
     StreamMaterialize { columns: [a, ta, b, tb, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, a], pk_columns: [t1._row_id, t2._row_id, a], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.a) }
-      │ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
-      │   ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
-      │   │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      │   │ └─StreamExchange { dist: Broadcast }
-      │   │   └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
-      │   │     └─StreamNow { output: [now] }
-      │   └─StreamExchange { dist: Broadcast }
-      │     └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
-      │       └─StreamNow { output: [now] }
-      └─StreamExchange { dist: HashShard(t2.b) }
-        └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.a, t1._row_id, t2._row_id) }
+      └─StreamHashJoin { type: Inner, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.a) }
+        │ └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
+        │   ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
+        │   │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        │   │ └─StreamExchange { dist: Broadcast }
+        │   │   └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+        │   │     └─StreamNow { output: [now] }
+        │   └─StreamExchange { dist: Broadcast }
+        │     └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+        │       └─StreamNow { output: [now] }
+        └─StreamExchange { dist: HashShard(t2.b) }
+          └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: Temporal filter in on clause for left join's left side
   sql: |
     create table t1 (a int, ta timestamp with time zone);
@@ -150,19 +151,20 @@
     select * from t1 right join t2 on a = b AND ta < now() - interval '1 hour' and ta >= now() - interval '2 hour';
   stream_plan: |-
     StreamMaterialize { columns: [a, ta, b, tb, t2._row_id(hidden), t1._row_id(hidden)], stream_key: [t2._row_id, t1._row_id, b], pk_columns: [t2._row_id, t1._row_id, b], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: t2.b = t1.a, output: [t1.a, t1.ta, t2.b, t2.tb, t2._row_id, t1._row_id] }
-      ├─StreamExchange { dist: HashShard(t2.b) }
-      │ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-      └─StreamExchange { dist: HashShard(t1.a) }
-        └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
-          ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
-          │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-          │ └─StreamExchange { dist: Broadcast }
-          │   └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
-          │     └─StreamNow { output: [now] }
-          └─StreamExchange { dist: Broadcast }
-            └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
-              └─StreamNow { output: [now] }
+    └─StreamExchange { dist: HashShard(t2.b, t2._row_id, t1._row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: t2.b = t1.a, output: [t1.a, t1.ta, t2.b, t2.tb, t2._row_id, t1._row_id] }
+        ├─StreamExchange { dist: HashShard(t2.b) }
+        │ └─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+        └─StreamExchange { dist: HashShard(t1.a) }
+          └─StreamDynamicFilter { predicate: (t1.ta < $expr2), output: [t1.a, t1.ta, t1._row_id] }
+            ├─StreamDynamicFilter { predicate: (t1.ta >= $expr1), output_watermarks: [t1.ta], output: [t1.a, t1.ta, t1._row_id], cleaned_by_watermark: true }
+            │ ├─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+            │ └─StreamExchange { dist: Broadcast }
+            │   └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+            │     └─StreamNow { output: [now] }
+            └─StreamExchange { dist: Broadcast }
+              └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+                └─StreamNow { output: [now] }
 - name: Temporal filter in on clause for full join's left side
   sql: |
     create table t1 (a int, ta timestamp with time zone);
@@ -178,19 +180,20 @@
     select * from t1 left join t2 on a = b AND tb < now() - interval '1 hour' and tb >= now() - interval '2 hour';
   stream_plan: |-
     StreamMaterialize { columns: [a, ta, b, tb, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, a], pk_columns: [t1._row_id, t2._row_id, a], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftOuter, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.a) }
-      │ └─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.b) }
-        └─StreamDynamicFilter { predicate: (t2.tb < $expr2), output: [t2.b, t2.tb, t2._row_id] }
-          ├─StreamDynamicFilter { predicate: (t2.tb >= $expr1), output_watermarks: [t2.tb], output: [t2.b, t2.tb, t2._row_id], cleaned_by_watermark: true }
-          │ ├─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
-          │ └─StreamExchange { dist: Broadcast }
-          │   └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
-          │     └─StreamNow { output: [now] }
-          └─StreamExchange { dist: Broadcast }
-            └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
-              └─StreamNow { output: [now] }
+    └─StreamExchange { dist: HashShard(t1.a, t1._row_id, t2._row_id) }
+      └─StreamHashJoin { type: LeftOuter, predicate: t1.a = t2.b, output: [t1.a, t1.ta, t2.b, t2.tb, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.a) }
+        │ └─StreamTableScan { table: t1, columns: [t1.a, t1.ta, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.b) }
+          └─StreamDynamicFilter { predicate: (t2.tb < $expr2), output: [t2.b, t2.tb, t2._row_id] }
+            ├─StreamDynamicFilter { predicate: (t2.tb >= $expr1), output_watermarks: [t2.tb], output: [t2.b, t2.tb, t2._row_id], cleaned_by_watermark: true }
+            │ ├─StreamTableScan { table: t2, columns: [t2.b, t2.tb, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+            │ └─StreamExchange { dist: Broadcast }
+            │   └─StreamProject { exprs: [SubtractWithTimeZone(now, '02:00:00':Interval, 'UTC':Varchar) as $expr1], output_watermarks: [$expr1] }
+            │     └─StreamNow { output: [now] }
+            └─StreamExchange { dist: Broadcast }
+              └─StreamProject { exprs: [SubtractWithTimeZone(now, '01:00:00':Interval, 'UTC':Varchar) as $expr2], output_watermarks: [$expr2] }
+                └─StreamNow { output: [now] }
 - name: Temporal filter in on clause for right join's right side
   sql: |
     create table t1 (a int, ta timestamp with time zone);
diff --git a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml
index a1020b8d16ee5..f49a82be2dd78 100644
--- a/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/temporal_join.yaml
@@ -6,11 +6,12 @@
     select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on id1= id2
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2, output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
-      ├─StreamExchange { dist: HashShard(stream.id1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
-        └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2, output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+        ├─StreamExchange { dist: HashShard(stream.id1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+          └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
   batch_error: |-
     Not supported: do not support temporal join for batch queries
     HINT: please use temporal join in streaming queries
@@ -21,11 +22,12 @@
     select id1, a1, id2, a2 from stream join version FOR SYSTEM_TIME AS OF PROCTIME() on id1 = id2 where a2 < 10;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
-      ├─StreamExchange { dist: HashShard(stream.id1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
-        └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) }
+      └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+        ├─StreamExchange { dist: HashShard(stream.id1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+          └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
 - name: implicit join with temporal tables
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -33,11 +35,12 @@
     select id1, a1, id2, a2 from stream, version FOR SYSTEM_TIME AS OF PROCTIME() where id1 = id2 AND a2 < 10;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1], pk_columns: [stream._row_id, id1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
-      ├─StreamExchange { dist: HashShard(stream.id1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
-        └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream._row_id) }
+      └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND (version.a2 < 10:Int32), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+        ├─StreamExchange { dist: HashShard(stream.id1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+          └─StreamTableScan { table: version, columns: [version.id2, version.a2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
 - name: Multi join key for temporal join
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -45,11 +48,12 @@
     select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and id1 = id2 where b2 != a2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden)], stream_key: [stream._row_id, id1, a1], pk_columns: [stream._row_id, id1, a1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND stream.a1 = version.a2 AND (version.b2 <> version.a2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
-      ├─StreamExchange { dist: HashShard(stream.id1, stream.a1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2, version.a2) }
-        └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2], pk: [version.id2, version.a2], dist: UpstreamHashShard(version.id2, version.a2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream.a1, stream._row_id) }
+      └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version.id2 AND stream.a1 = version.a2 AND (version.b2 <> version.a2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id] }
+        ├─StreamExchange { dist: HashShard(stream.id1, stream.a1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2, version.a2) }
+          └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2], pk: [version.id2, version.a2], dist: UpstreamHashShard(version.id2, version.a2) }
 - name: Temporal join with Aggregation
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -101,15 +105,16 @@
     join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.k = version2.k where a1 < 10;
   stream_plan: |-
     StreamMaterialize { columns: [k, x1, x2, a1, b1, stream._row_id(hidden), version2.k(hidden)], stream_key: [stream._row_id, k], pk_columns: [stream._row_id, k], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: stream.k = version2.k, output: [stream.k, version1.x1, version2.x2, stream.a1, stream.b1, stream._row_id, version2.k] }
-      ├─StreamTemporalJoin { type: Inner, predicate: stream.k = version1.k, output: [stream.k, stream.a1, stream.b1, version1.x1, stream._row_id, version1.k] }
-      │ ├─StreamExchange { dist: HashShard(stream.k) }
-      │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
-      │ │   └─StreamTableScan { table: stream, columns: [stream.k, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.k) }
-      │   └─StreamTableScan { table: version1, columns: [version1.k, version1.x1], pk: [version1.k], dist: UpstreamHashShard(version1.k) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.k) }
-        └─StreamTableScan { table: version2, columns: [version2.k, version2.x2], pk: [version2.k], dist: UpstreamHashShard(version2.k) }
+    └─StreamExchange { dist: HashShard(stream.k, stream._row_id) }
+      └─StreamTemporalJoin { type: Inner, predicate: stream.k = version2.k, output: [stream.k, version1.x1, version2.x2, stream.a1, stream.b1, stream._row_id, version2.k] }
+        ├─StreamTemporalJoin { type: Inner, predicate: stream.k = version1.k, output: [stream.k, stream.a1, stream.b1, version1.x1, stream._row_id, version1.k] }
+        │ ├─StreamExchange { dist: HashShard(stream.k) }
+        │ │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
+        │ │   └─StreamTableScan { table: stream, columns: [stream.k, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        │ └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.k) }
+        │   └─StreamTableScan { table: version1, columns: [version1.k, version1.x1], pk: [version1.k], dist: UpstreamHashShard(version1.k) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.k) }
+          └─StreamTableScan { table: version2, columns: [version2.k, version2.x2], pk: [version2.k], dist: UpstreamHashShard(version2.k) }
 - name: multi-way temporal join with different keys
   sql: |
     create table stream(id1 int, id2 int, a1 int, b1 int) APPEND ONLY;
@@ -121,16 +126,17 @@
     join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.id2 = version2.id2 where a1 < 10;
   stream_plan: |-
     StreamMaterialize { columns: [id1, x1, id2, x2, a1, b1, stream._row_id(hidden), version2.id2(hidden)], stream_key: [stream._row_id, id1, id2], pk_columns: [stream._row_id, id1, id2], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
-      ├─StreamExchange { dist: HashShard(stream.id2) }
-      │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
-      │   ├─StreamExchange { dist: HashShard(stream.id1) }
-      │   │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
-      │   │   └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      │   └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
-      │     └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
-        └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream.id2, stream._row_id) }
+      └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
+        ├─StreamExchange { dist: HashShard(stream.id2) }
+        │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
+        │   ├─StreamExchange { dist: HashShard(stream.id1) }
+        │   │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
+        │   │   └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        │   └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
+        │     └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
+          └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
 - name: multi-way temporal join with different keys
   sql: |
     create table stream(id1 int, id2 int, a1 int, b1 int) APPEND ONLY;
@@ -142,16 +148,17 @@
     join version2 FOR SYSTEM_TIME AS OF PROCTIME() on stream.id2 = version2.id2 where a1 < 10;
   stream_plan: |-
     StreamMaterialize { columns: [id1, x1, id2, x2, a1, b1, stream._row_id(hidden), version2.id2(hidden)], stream_key: [stream._row_id, id1, id2], pk_columns: [stream._row_id, id1, id2], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
-      ├─StreamExchange { dist: HashShard(stream.id2) }
-      │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
-      │   ├─StreamExchange { dist: HashShard(stream.id1) }
-      │   │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
-      │   │   └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      │   └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
-      │     └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
-        └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream.id2, stream._row_id) }
+      └─StreamTemporalJoin { type: Inner, predicate: stream.id2 = version2.id2, output: [stream.id1, version1.x1, stream.id2, version2.x2, stream.a1, stream.b1, stream._row_id, version2.id2] }
+        ├─StreamExchange { dist: HashShard(stream.id2) }
+        │ └─StreamTemporalJoin { type: Inner, predicate: stream.id1 = version1.id1, output: [stream.id1, stream.id2, stream.a1, stream.b1, version1.x1, stream._row_id, version1.id1] }
+        │   ├─StreamExchange { dist: HashShard(stream.id1) }
+        │   │ └─StreamFilter { predicate: (stream.a1 < 10:Int32) }
+        │   │   └─StreamTableScan { table: stream, columns: [stream.id1, stream.id2, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        │   └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version1.id1) }
+        │     └─StreamTableScan { table: version1, columns: [version1.id1, version1.x1], pk: [version1.id1], dist: UpstreamHashShard(version1.id1) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version2.id2) }
+          └─StreamTableScan { table: version2, columns: [version2.id2, version2.x2], pk: [version2.id2], dist: UpstreamHashShard(version2.id2) }
 - name: temporal join with an index (distribution key size = 1)
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -160,11 +167,12 @@
     select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
-      ├─StreamExchange { dist: HashShard(stream.a1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
-        └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
+    └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+        ├─StreamExchange { dist: HashShard(stream.a1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
+          └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
 - name: temporal join with an index (distribution key size = 2)
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -173,11 +181,12 @@
     select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
-      ├─StreamExchange { dist: HashShard(stream.a1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
-        └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
+    └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+        ├─StreamExchange { dist: HashShard(stream.a1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
+          └─StreamTableScan { table: idx2, columns: [idx2.a2, idx2.b2, idx2.id2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
 - name: temporal join with an index (index column size = 1)
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -186,11 +195,12 @@
     select id1, a1, id2, a2 from stream left join idx2 FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, stream.b1, a1], pk_columns: [stream._row_id, id2, stream.b1, a1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.b1 = idx2.b2 AND (stream.a1 = idx2.a2), output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
-      ├─StreamExchange { dist: HashShard(stream.b1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.b2) }
-        └─StreamTableScan { table: idx2, columns: [idx2.b2, idx2.id2, idx2.a2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.b2) }
+    └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.b1 = idx2.b2 AND (stream.a1 = idx2.a2), output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+        ├─StreamExchange { dist: HashShard(stream.b1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.b2) }
+          └─StreamTableScan { table: idx2, columns: [idx2.b2, idx2.id2, idx2.a2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.b2) }
 - name: temporal join with singleton table
   sql: |
     create table t (a int) append only;
@@ -212,11 +222,12 @@
     select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx.a2 AND stream.b1 = idx.b2, output: [stream.id1, stream.a1, idx.id2, idx.a2, stream._row_id, stream.b1] }
-      ├─StreamExchange { dist: HashShard(stream.a1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx.a2) }
-        └─StreamTableScan { table: idx, columns: [idx.id2, idx.a2, idx.b2], pk: [idx.id2], dist: UpstreamHashShard(idx.a2) }
+    └─StreamExchange { dist: HashShard(stream.a1, idx.id2, stream._row_id, stream.b1) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx.a2 AND stream.b1 = idx.b2, output: [stream.id1, stream.a1, idx.id2, idx.a2, stream._row_id, stream.b1] }
+        ├─StreamExchange { dist: HashShard(stream.a1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx.a2) }
+          └─StreamTableScan { table: idx, columns: [idx.id2, idx.a2, idx.b2], pk: [idx.id2], dist: UpstreamHashShard(idx.a2) }
 - name: index selection for temporal join (with two indexes) and should choose the index with a longer prefix..
   sql: |
     create table stream(id1 int, a1 int, b1 int) APPEND ONLY;
@@ -226,11 +237,12 @@
     select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden)], stream_key: [stream._row_id, id2, a1, stream.b1], pk_columns: [stream._row_id, id2, a1, stream.b1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
-      ├─StreamExchange { dist: HashShard(stream.a1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
-        └─StreamTableScan { table: idx2, columns: [idx2.id2, idx2.a2, idx2.b2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
+    └─StreamExchange { dist: HashShard(stream.a1, idx2.id2, stream._row_id, stream.b1) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.a1 = idx2.a2 AND stream.b1 = idx2.b2, output: [stream.id1, stream.a1, idx2.id2, idx2.a2, stream._row_id, stream.b1] }
+        ├─StreamExchange { dist: HashShard(stream.a1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(idx2.a2) }
+          └─StreamTableScan { table: idx2, columns: [idx2.id2, idx2.a2, idx2.b2], pk: [idx2.id2], dist: UpstreamHashShard(idx2.a2) }
 - name: index selection for temporal join (with three indexes) and should choose primary table.
   sql: |
     create table stream(id1 int, a1 int, b1 int, c1 int) APPEND ONLY;
@@ -241,11 +253,12 @@
     select id1, a1, id2, a2 from stream left join version FOR SYSTEM_TIME AS OF PROCTIME() on a1 = a2 and b1 = b2 and c1 = c2 and id1 = id2;
   stream_plan: |-
     StreamMaterialize { columns: [id1, a1, id2, a2, stream._row_id(hidden), stream.b1(hidden), stream.c1(hidden)], stream_key: [stream._row_id, id1, a1, stream.b1, stream.c1], pk_columns: [stream._row_id, id1, a1, stream.b1, stream.c1], pk_conflict: NoCheck }
-    └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2 AND (stream.a1 = version.a2) AND (stream.b1 = version.b2) AND (stream.c1 = version.c2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id, stream.b1, stream.c1] }
-      ├─StreamExchange { dist: HashShard(stream.id1) }
-      │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream.c1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
-      └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
-        └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2, version.c2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
+    └─StreamExchange { dist: HashShard(stream.id1, stream.a1, stream._row_id, stream.b1, stream.c1) }
+      └─StreamTemporalJoin { type: LeftOuter, predicate: stream.id1 = version.id2 AND (stream.a1 = version.a2) AND (stream.b1 = version.b2) AND (stream.c1 = version.c2), output: [stream.id1, stream.a1, version.id2, version.a2, stream._row_id, stream.b1, stream.c1] }
+        ├─StreamExchange { dist: HashShard(stream.id1) }
+        │ └─StreamTableScan { table: stream, columns: [stream.id1, stream.a1, stream.b1, stream.c1, stream._row_id], pk: [stream._row_id], dist: UpstreamHashShard(stream._row_id) }
+        └─StreamExchange [no_shuffle] { dist: UpstreamHashShard(version.id2) }
+          └─StreamTableScan { table: version, columns: [version.id2, version.a2, version.b2, version.c2], pk: [version.id2], dist: UpstreamHashShard(version.id2) }
 - name: index selection for temporal join (two index) and no one matches.
   sql: |
     create table stream(id1 int, a1 int, b1 int, c1 int) APPEND ONLY;
diff --git a/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml b/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml
index eaaa1f8e5c8d5..fdf928a0c9c84 100644
--- a/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/tpch_variant.yaml
@@ -243,214 +243,218 @@
           └─BatchSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id(hidden), _row_id#1(hidden), r_regionkey(hidden), _row_id#2(hidden), _row_id#3(hidden), _row_id#4(hidden), ps_suppkey(hidden), n_nationkey(hidden), ps_supplycost(hidden), p_partkey#1(hidden)], stream_key: [_row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, p_partkey, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_columns: [s_acctbal, n_name, s_name, p_partkey, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] }
-      ├─StreamExchange { dist: HashShard(p_partkey) }
-      │ └─StreamShare { id: 26 }
-      │   └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
-      │     ├─StreamExchange { dist: HashShard(n_nationkey) }
-      │     │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
-      │     │   ├─StreamExchange { dist: HashShard(r_regionkey) }
-      │     │   │ └─StreamShare { id: 3 }
-      │     │   │   └─StreamProject { exprs: [r_regionkey, _row_id] }
-      │     │   │     └─StreamRowIdGen { row_id_index: 3 }
-      │     │   │       └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
-      │     │   └─StreamExchange { dist: HashShard(n_regionkey) }
-      │     │     └─StreamShare { id: 7 }
-      │     │       └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
-      │     │         └─StreamRowIdGen { row_id_index: 4 }
-      │     │           └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
-      │     └─StreamExchange { dist: HashShard(s_nationkey) }
-      │       └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
-      │         ├─StreamExchange { dist: HashShard(ps_suppkey) }
-      │         │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
-      │         │   ├─StreamExchange { dist: HashShard(p_partkey) }
-      │         │   │ └─StreamRowIdGen { row_id_index: 9 }
-      │         │   │   └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
-      │         │   └─StreamExchange { dist: HashShard(ps_partkey) }
-      │         │     └─StreamShare { id: 15 }
-      │         │       └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
-      │         │         └─StreamRowIdGen { row_id_index: 5 }
-      │         │           └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
-      │         └─StreamExchange { dist: HashShard(s_suppkey) }
-      │           └─StreamShare { id: 21 }
-      │             └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-      │               └─StreamRowIdGen { row_id_index: 7 }
-      │                 └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-      └─StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
-        └─StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] }
-          └─StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] }
-            ├─StreamAppendOnlyDedup { dedup_cols: [p_partkey] }
-            │ └─StreamExchange { dist: HashShard(p_partkey) }
-            │   └─StreamProject { exprs: [p_partkey] }
-            │     └─StreamShare { id: 26 }
-            │       └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
-            │         ├─StreamExchange { dist: HashShard(n_nationkey) }
-            │         │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
-            │         │   ├─StreamExchange { dist: HashShard(r_regionkey) }
-            │         │   │ └─StreamShare { id: 3 }
-            │         │   │   └─StreamProject { exprs: [r_regionkey, _row_id] }
-            │         │   │     └─StreamRowIdGen { row_id_index: 3 }
-            │         │   │       └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
-            │         │   └─StreamExchange { dist: HashShard(n_regionkey) }
-            │         │     └─StreamShare { id: 7 }
-            │         │       └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
-            │         │         └─StreamRowIdGen { row_id_index: 4 }
-            │         │           └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
-            │         └─StreamExchange { dist: HashShard(s_nationkey) }
-            │           └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
-            │             ├─StreamExchange { dist: HashShard(ps_suppkey) }
-            │             │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
-            │             │   ├─StreamExchange { dist: HashShard(p_partkey) }
-            │             │   │ └─StreamRowIdGen { row_id_index: 9 }
-            │             │   │   └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
-            │             │   └─StreamExchange { dist: HashShard(ps_partkey) }
-            │             │     └─StreamShare { id: 15 }
-            │             │       └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
-            │             │         └─StreamRowIdGen { row_id_index: 5 }
-            │             │           └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
-            │             └─StreamExchange { dist: HashShard(s_suppkey) }
-            │               └─StreamShare { id: 21 }
-            │                 └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-            │                   └─StreamRowIdGen { row_id_index: 7 }
-            │                     └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-            └─StreamExchange { dist: HashShard(ps_partkey) }
-              └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] }
-                ├─StreamExchange { dist: HashShard(s_nationkey) }
-                │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] }
-                │   ├─StreamExchange { dist: HashShard(ps_suppkey) }
-                │   │ └─StreamFilter { predicate: IsNotNull(ps_partkey) }
-                │   │   └─StreamShare { id: 15 }
-                │   │     └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
-                │   │       └─StreamRowIdGen { row_id_index: 5 }
-                │   │         └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
-                │   └─StreamExchange { dist: HashShard(s_suppkey) }
-                │     └─StreamShare { id: 21 }
-                │       └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-                │         └─StreamRowIdGen { row_id_index: 7 }
-                │           └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-                └─StreamExchange { dist: HashShard(n_nationkey) }
-                  └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] }
-                    ├─StreamExchange { dist: HashShard(r_regionkey) }
-                    │ └─StreamShare { id: 3 }
-                    │   └─StreamProject { exprs: [r_regionkey, _row_id] }
-                    │     └─StreamRowIdGen { row_id_index: 3 }
-                    │       └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
-                    └─StreamExchange { dist: HashShard(n_regionkey) }
-                      └─StreamShare { id: 7 }
-                        └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
-                          └─StreamRowIdGen { row_id_index: 4 }
-                            └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+    └─StreamExchange { dist: HashShard(p_partkey, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost) }
+      └─StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] }
+        ├─StreamExchange { dist: HashShard(p_partkey) }
+        │ └─StreamShare { id: 26 }
+        │   └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
+        │     ├─StreamExchange { dist: HashShard(n_nationkey) }
+        │     │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
+        │     │   ├─StreamExchange { dist: HashShard(r_regionkey) }
+        │     │   │ └─StreamShare { id: 3 }
+        │     │   │   └─StreamProject { exprs: [r_regionkey, _row_id] }
+        │     │   │     └─StreamRowIdGen { row_id_index: 3 }
+        │     │   │       └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
+        │     │   └─StreamExchange { dist: HashShard(n_regionkey) }
+        │     │     └─StreamShare { id: 7 }
+        │     │       └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
+        │     │         └─StreamRowIdGen { row_id_index: 4 }
+        │     │           └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+        │     └─StreamExchange { dist: HashShard(s_nationkey) }
+        │       └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
+        │         ├─StreamExchange { dist: HashShard(ps_suppkey) }
+        │         │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
+        │         │   ├─StreamExchange { dist: HashShard(p_partkey) }
+        │         │   │ └─StreamRowIdGen { row_id_index: 9 }
+        │         │   │   └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+        │         │   └─StreamExchange { dist: HashShard(ps_partkey) }
+        │         │     └─StreamShare { id: 15 }
+        │         │       └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
+        │         │         └─StreamRowIdGen { row_id_index: 5 }
+        │         │           └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+        │         └─StreamExchange { dist: HashShard(s_suppkey) }
+        │           └─StreamShare { id: 21 }
+        │             └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+        │               └─StreamRowIdGen { row_id_index: 7 }
+        │                 └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+        └─StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
+          └─StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] }
+            └─StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] }
+              ├─StreamAppendOnlyDedup { dedup_cols: [p_partkey] }
+              │ └─StreamExchange { dist: HashShard(p_partkey) }
+              │   └─StreamProject { exprs: [p_partkey] }
+              │     └─StreamShare { id: 26 }
+              │       └─StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] }
+              │         ├─StreamExchange { dist: HashShard(n_nationkey) }
+              │         │ └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] }
+              │         │   ├─StreamExchange { dist: HashShard(r_regionkey) }
+              │         │   │ └─StreamShare { id: 3 }
+              │         │   │   └─StreamProject { exprs: [r_regionkey, _row_id] }
+              │         │   │     └─StreamRowIdGen { row_id_index: 3 }
+              │         │   │       └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
+              │         │   └─StreamExchange { dist: HashShard(n_regionkey) }
+              │         │     └─StreamShare { id: 7 }
+              │         │       └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
+              │         │         └─StreamRowIdGen { row_id_index: 4 }
+              │         │           └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+              │         └─StreamExchange { dist: HashShard(s_nationkey) }
+              │           └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] }
+              │             ├─StreamExchange { dist: HashShard(ps_suppkey) }
+              │             │ └─StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] }
+              │             │   ├─StreamExchange { dist: HashShard(p_partkey) }
+              │             │   │ └─StreamRowIdGen { row_id_index: 9 }
+              │             │   │   └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+              │             │   └─StreamExchange { dist: HashShard(ps_partkey) }
+              │             │     └─StreamShare { id: 15 }
+              │             │       └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
+              │             │         └─StreamRowIdGen { row_id_index: 5 }
+              │             │           └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+              │             └─StreamExchange { dist: HashShard(s_suppkey) }
+              │               └─StreamShare { id: 21 }
+              │                 └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+              │                   └─StreamRowIdGen { row_id_index: 7 }
+              │                     └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+              └─StreamExchange { dist: HashShard(ps_partkey) }
+                └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] }
+                  ├─StreamExchange { dist: HashShard(s_nationkey) }
+                  │ └─StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] }
+                  │   ├─StreamExchange { dist: HashShard(ps_suppkey) }
+                  │   │ └─StreamFilter { predicate: IsNotNull(ps_partkey) }
+                  │   │   └─StreamShare { id: 15 }
+                  │   │     └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
+                  │   │       └─StreamRowIdGen { row_id_index: 5 }
+                  │   │         └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+                  │   └─StreamExchange { dist: HashShard(s_suppkey) }
+                  │     └─StreamShare { id: 21 }
+                  │       └─StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+                  │         └─StreamRowIdGen { row_id_index: 7 }
+                  │           └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+                  └─StreamExchange { dist: HashShard(n_nationkey) }
+                    └─StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] }
+                      ├─StreamExchange { dist: HashShard(r_regionkey) }
+                      │ └─StreamShare { id: 3 }
+                      │   └─StreamProject { exprs: [r_regionkey, _row_id] }
+                      │     └─StreamRowIdGen { row_id_index: 3 }
+                      │       └─StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] }
+                      └─StreamExchange { dist: HashShard(n_regionkey) }
+                        └─StreamShare { id: 7 }
+                          └─StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
+                            └─StreamRowIdGen { row_id_index: 4 }
+                              └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id(hidden), _row_id#1(hidden), r_regionkey(hidden), _row_id#2(hidden), _row_id#3(hidden), _row_id#4(hidden), ps_suppkey(hidden), n_nationkey(hidden), ps_supplycost(hidden), p_partkey#1(hidden)], stream_key: [_row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, p_partkey, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_columns: [s_acctbal, n_name, s_name, p_partkey, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
-            └── StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } { intermediate state table: 26, state tables: [ 25 ], distinct tables: [] }
-                └── StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } { left table: 27, right table: 29, left degree table: 28, right degree table: 30 }
-                    ├── StreamAppendOnlyDedup { dedup_cols: [p_partkey] } { state table: 31 }
-                    │   └── StreamExchange Hash([0]) from 15
-                    └── StreamExchange Hash([0]) from 16
+    └── StreamExchange Hash([3, 8, 9, 10, 11, 12, 13, 14, 15, 16]) from 1
 
     Fragment 1
-    StreamNoOp
-    └── StreamExchange NoShuffle from 2
+    StreamHashJoin { type: Inner, predicate: p_partkey IS NOT DISTINCT FROM p_partkey AND ps_supplycost = min(ps_supplycost), output: [s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id, r_regionkey, _row_id, _row_id, _row_id, ps_suppkey, n_nationkey, ps_supplycost, p_partkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamProject { exprs: [p_partkey, min(ps_supplycost)] }
+        └── StreamHashAgg { group_key: [p_partkey], aggs: [min(ps_supplycost), count] } { intermediate state table: 26, state tables: [ 25 ], distinct tables: [] }
+            └── StreamHashJoin { type: LeftOuter, predicate: p_partkey IS NOT DISTINCT FROM ps_partkey, output: [p_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, _row_id, _row_id, r_regionkey, s_nationkey] } { left table: 27, right table: 29, left degree table: 28, right degree table: 30 }
+                ├── StreamAppendOnlyDedup { dedup_cols: [p_partkey] } { state table: 31 }
+                │   └── StreamExchange Hash([0]) from 16
+                └── StreamExchange Hash([0]) from 17
 
     Fragment 2
-    StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
-    ├── StreamExchange Hash([0]) from 3
-    └── StreamExchange Hash([5]) from 8
+    StreamNoOp
+    └── StreamExchange NoShuffle from 3
 
     Fragment 3
-    StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } { left table: 8, right table: 10, left degree table: 9, right degree table: 11 }
+    StreamHashJoin [append_only] { type: Inner, predicate: n_nationkey = s_nationkey, output: [p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id, r_regionkey, n_nationkey, _row_id, _row_id, _row_id, ps_suppkey] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
     ├── StreamExchange Hash([0]) from 4
-    └── StreamExchange Hash([2]) from 6
+    └── StreamExchange Hash([5]) from 9
 
     Fragment 4
-    StreamNoOp
-    └── StreamExchange NoShuffle from 5
+    StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, n_name, _row_id, r_regionkey, _row_id] } { left table: 8, right table: 10, left degree table: 9, right degree table: 11 }
+    ├── StreamExchange Hash([0]) from 5
+    └── StreamExchange Hash([2]) from 7
 
     Fragment 5
+    StreamNoOp
+    └── StreamExchange NoShuffle from 6
+
+    Fragment 6
     StreamProject { exprs: [r_regionkey, _row_id] }
     └── StreamRowIdGen { row_id_index: 3 }
         └── StreamSource { source: region, columns: [r_regionkey, r_name, r_comment, _row_id] } { source state table: 12 }
 
-    Fragment 6
+    Fragment 7
     StreamNoOp
-    └── StreamExchange NoShuffle from 7
+    └── StreamExchange NoShuffle from 8
 
-    Fragment 7
+    Fragment 8
     StreamProject { exprs: [n_nationkey, n_name, n_regionkey, _row_id] }
     └── StreamRowIdGen { row_id_index: 4 }
         └── StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } { source state table: 13 }
 
-    Fragment 8
+    Fragment 9
     StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [p_partkey, p_mfgr, ps_supplycost, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id, _row_id, ps_suppkey, _row_id] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
-    ├── StreamExchange Hash([2]) from 9
-    └── StreamExchange Hash([0]) from 13
+    ├── StreamExchange Hash([2]) from 10
+    └── StreamExchange Hash([0]) from 14
 
-    Fragment 9
+    Fragment 10
     StreamHashJoin [append_only] { type: Inner, predicate: p_partkey = ps_partkey, output: [p_partkey, p_mfgr, ps_suppkey, ps_supplycost, _row_id, _row_id] } { left table: 18, right table: 20, left degree table: 19, right degree table: 21 }
-    ├── StreamExchange Hash([0]) from 10
-    └── StreamExchange Hash([0]) from 11
+    ├── StreamExchange Hash([0]) from 11
+    └── StreamExchange Hash([0]) from 12
 
-    Fragment 10
+    Fragment 11
     StreamRowIdGen { row_id_index: 9 }
     └── StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } { source state table: 22 }
 
-    Fragment 11
+    Fragment 12
     StreamNoOp
-    └── StreamExchange NoShuffle from 12
+    └── StreamExchange NoShuffle from 13
 
-    Fragment 12
+    Fragment 13
     StreamProject { exprs: [ps_partkey, ps_suppkey, ps_supplycost, _row_id] }
     └── StreamRowIdGen { row_id_index: 5 }
         └── StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } { source state table: 23 }
 
-    Fragment 13
+    Fragment 14
     StreamNoOp
-    └── StreamExchange NoShuffle from 14
+    └── StreamExchange NoShuffle from 15
 
-    Fragment 14
+    Fragment 15
     StreamProject { exprs: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
     └── StreamRowIdGen { row_id_index: 7 }
         └── StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } { source state table: 24 }
 
-    Fragment 15
+    Fragment 16
     StreamProject { exprs: [p_partkey] }
-    └── StreamExchange NoShuffle from 2
+    └── StreamExchange NoShuffle from 3
 
-    Fragment 16
+    Fragment 17
     StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [ps_partkey, ps_supplycost, _row_id, _row_id, ps_suppkey, s_nationkey, _row_id, _row_id, r_regionkey] } { left table: 32, right table: 34, left degree table: 33, right degree table: 35 }
-    ├── StreamExchange Hash([2]) from 17
-    └── StreamExchange Hash([0]) from 20
+    ├── StreamExchange Hash([2]) from 18
+    └── StreamExchange Hash([0]) from 21
 
-    Fragment 17
+    Fragment 18
     StreamHashJoin [append_only] { type: Inner, predicate: ps_suppkey = s_suppkey, output: [ps_partkey, ps_supplycost, s_nationkey, _row_id, ps_suppkey, _row_id] } { left table: 36, right table: 38, left degree table: 37, right degree table: 39 }
-    ├── StreamExchange Hash([1]) from 18
-    └── StreamExchange Hash([0]) from 19
+    ├── StreamExchange Hash([1]) from 19
+    └── StreamExchange Hash([0]) from 20
 
-    Fragment 18
+    Fragment 19
     StreamFilter { predicate: IsNotNull(ps_partkey) }
-    └── StreamExchange NoShuffle from 12
+    └── StreamExchange NoShuffle from 13
 
-    Fragment 19
+    Fragment 20
     StreamNoOp
-    └── StreamExchange NoShuffle from 14
+    └── StreamExchange NoShuffle from 15
 
-    Fragment 20
+    Fragment 21
     StreamHashJoin [append_only] { type: Inner, predicate: r_regionkey = n_regionkey, output: [n_nationkey, _row_id, r_regionkey, _row_id] } { left table: 40, right table: 42, left degree table: 41, right degree table: 43 }
-    ├── StreamExchange Hash([0]) from 21
-    └── StreamExchange Hash([2]) from 22
+    ├── StreamExchange Hash([0]) from 22
+    └── StreamExchange Hash([2]) from 23
 
-    Fragment 21
+    Fragment 22
     StreamNoOp
-    └── StreamExchange NoShuffle from 5
+    └── StreamExchange NoShuffle from 6
 
-    Fragment 22
+    Fragment 23
     StreamNoOp
-    └── StreamExchange NoShuffle from 7
+    └── StreamExchange NoShuffle from 8
 
     Table 0 { columns: [ p_partkey, p_mfgr, s_name, s_address, s_phone, s_acctbal, s_comment, ps_supplycost, n_name, _row_id, _row_id_0, r_regionkey, n_nationkey, _row_id_1, _row_id_2, _row_id_3, ps_suppkey ], primary key: [ $0 ASC, $7 ASC, $9 ASC, $10 ASC, $11 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC, $12 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], distribution key: [ 0 ], read pk prefix len hint: 2 }
 
@@ -540,7 +544,7 @@
 
     Table 43 { columns: [ n_regionkey, _row_id, _degree ], primary key: [ $0 ASC, $1 ASC ], value indices: [ 2 ], distribution key: [ 0 ], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost, p_partkey#1 ], primary key: [ $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ], distribution key: [ 3 ], read pk prefix len hint: 13 }
+    Table 4294967294 { columns: [ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment, _row_id, _row_id#1, r_regionkey, _row_id#2, _row_id#3, _row_id#4, ps_suppkey, n_nationkey, ps_supplycost, p_partkey#1 ], primary key: [ $0 DESC, $2 ASC, $1 ASC, $3 ASC, $8 ASC, $9 ASC, $10 ASC, $11 ASC, $12 ASC, $13 ASC, $14 ASC, $15 ASC, $16 ASC ], value indices: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ], distribution key: [ 3, 8, 9, 10, 11, 12, 13, 14, 15, 16 ], read pk prefix len hint: 13 }
 
 - id: tpch_q5
   before:
@@ -1797,76 +1801,80 @@
                         └─BatchSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id], filter: (None, None) }
   stream_plan: |-
     StreamMaterialize { columns: [s_name, s_address, _row_id(hidden), _row_id#1(hidden), s_nationkey(hidden), s_suppkey(hidden)], stream_key: [_row_id, _row_id#1, s_nationkey, s_suppkey], pk_columns: [s_name, _row_id, _row_id#1, s_nationkey, s_suppkey], pk_conflict: NoCheck }
-    └─StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] }
-      ├─StreamExchange { dist: HashShard(s_suppkey) }
-      │ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] }
-      │   ├─StreamExchange { dist: HashShard(s_nationkey) }
-      │   │ └─StreamRowIdGen { row_id_index: 7 }
-      │   │   └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
-      │   └─StreamExchange { dist: HashShard(n_nationkey) }
-      │     └─StreamRowIdGen { row_id_index: 4 }
-      │       └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
-      └─StreamExchange { dist: HashShard(ps_suppkey) }
-        └─StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] }
-          └─StreamFilter { predicate: ($expr1 > $expr2) }
-            └─StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all }
-              ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
-              │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] }
-              │   └─StreamShare { id: 13 }
-              │     └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
-              │       ├─StreamExchange { dist: HashShard(ps_partkey) }
-              │       │ └─StreamRowIdGen { row_id_index: 5 }
-              │       │   └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
-              │       └─StreamExchange { dist: HashShard(p_partkey) }
-              │         └─StreamProject { exprs: [p_partkey, _row_id] }
-              │           └─StreamRowIdGen { row_id_index: 9 }
-              │             └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
-              └─StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] }
-                └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] }
-                  └─StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] }
-                    ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
-                    │ └─StreamProject { exprs: [ps_partkey, ps_suppkey] }
-                    │   └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] }
-                    │     └─StreamShare { id: 13 }
-                    │       └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
-                    │         ├─StreamExchange { dist: HashShard(ps_partkey) }
-                    │         │ └─StreamRowIdGen { row_id_index: 5 }
-                    │         │   └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
-                    │         └─StreamExchange { dist: HashShard(p_partkey) }
-                    │           └─StreamProject { exprs: [p_partkey, _row_id] }
-                    │             └─StreamRowIdGen { row_id_index: 9 }
-                    │               └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
-                    └─StreamExchange { dist: HashShard(l_partkey, l_suppkey) }
-                      └─StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] }
-                        └─StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) }
-                          └─StreamRowIdGen { row_id_index: 16 }
-                            └─StreamSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id] }
+    └─StreamExchange { dist: HashShard(_row_id, _row_id, s_nationkey, s_suppkey) }
+      └─StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] }
+        ├─StreamExchange { dist: HashShard(s_suppkey) }
+        │ └─StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] }
+        │   ├─StreamExchange { dist: HashShard(s_nationkey) }
+        │   │ └─StreamRowIdGen { row_id_index: 7 }
+        │   │   └─StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] }
+        │   └─StreamExchange { dist: HashShard(n_nationkey) }
+        │     └─StreamRowIdGen { row_id_index: 4 }
+        │       └─StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] }
+        └─StreamExchange { dist: HashShard(ps_suppkey) }
+          └─StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] }
+            └─StreamFilter { predicate: ($expr1 > $expr2) }
+              └─StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all }
+                ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
+                │ └─StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] }
+                │   └─StreamShare { id: 13 }
+                │     └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
+                │       ├─StreamExchange { dist: HashShard(ps_partkey) }
+                │       │ └─StreamRowIdGen { row_id_index: 5 }
+                │       │   └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+                │       └─StreamExchange { dist: HashShard(p_partkey) }
+                │         └─StreamProject { exprs: [p_partkey, _row_id] }
+                │           └─StreamRowIdGen { row_id_index: 9 }
+                │             └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+                └─StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] }
+                  └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] }
+                    └─StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] }
+                      ├─StreamExchange { dist: HashShard(ps_partkey, ps_suppkey) }
+                      │ └─StreamProject { exprs: [ps_partkey, ps_suppkey] }
+                      │   └─StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] }
+                      │     └─StreamShare { id: 13 }
+                      │       └─StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] }
+                      │         ├─StreamExchange { dist: HashShard(ps_partkey) }
+                      │         │ └─StreamRowIdGen { row_id_index: 5 }
+                      │         │   └─StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] }
+                      │         └─StreamExchange { dist: HashShard(p_partkey) }
+                      │           └─StreamProject { exprs: [p_partkey, _row_id] }
+                      │             └─StreamRowIdGen { row_id_index: 9 }
+                      │               └─StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] }
+                      └─StreamExchange { dist: HashShard(l_partkey, l_suppkey) }
+                        └─StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] }
+                          └─StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) }
+                            └─StreamRowIdGen { row_id_index: 16 }
+                              └─StreamSource { source: lineitem, columns: [l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment, _row_id] }
   stream_dist_plan: |+
     Fragment 0
     StreamMaterialize { columns: [s_name, s_address, _row_id(hidden), _row_id#1(hidden), s_nationkey(hidden), s_suppkey(hidden)], stream_key: [_row_id, _row_id#1, s_nationkey, s_suppkey], pk_columns: [s_name, _row_id, _row_id#1, s_nationkey, s_suppkey], pk_conflict: NoCheck }
     ├── materialized table: 4294967294
-    └── StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
-        ├── StreamExchange Hash([0]) from 1
-        └── StreamExchange Hash([0]) from 4
+    └── StreamExchange Hash([2, 3, 4, 5]) from 1
 
     Fragment 1
-    StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
-    ├── StreamExchange Hash([3]) from 2
-    └── StreamExchange Hash([0]) from 3
+    StreamHashJoin { type: LeftSemi, predicate: s_suppkey = ps_suppkey, output: [s_name, s_address, _row_id, _row_id, s_nationkey, s_suppkey] } { left table: 0, right table: 2, left degree table: 1, right degree table: 3 }
+    ├── StreamExchange Hash([0]) from 2
+    └── StreamExchange Hash([0]) from 5
 
     Fragment 2
+    StreamHashJoin [append_only] { type: Inner, predicate: s_nationkey = n_nationkey, output: [s_suppkey, s_name, s_address, _row_id, s_nationkey, _row_id] } { left table: 4, right table: 6, left degree table: 5, right degree table: 7 }
+    ├── StreamExchange Hash([3]) from 3
+    └── StreamExchange Hash([0]) from 4
+
+    Fragment 3
     StreamRowIdGen { row_id_index: 7 }
     └── StreamSource { source: supplier, columns: [s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, _row_id] } { source state table: 8 }
 
-    Fragment 3
+    Fragment 4
     StreamRowIdGen { row_id_index: 4 }
     └── StreamSource { source: nation, columns: [n_nationkey, n_name, n_regionkey, n_comment, _row_id] } { source state table: 9 }
 
-    Fragment 4
+    Fragment 5
     StreamProject { exprs: [ps_suppkey, _row_id, ps_partkey, ps_partkey, ps_suppkey] }
     └── StreamFilter { predicate: ($expr1 > $expr2) }
         └── StreamHashJoin { type: Inner, predicate: ps_partkey IS NOT DISTINCT FROM ps_partkey AND ps_suppkey IS NOT DISTINCT FROM ps_suppkey, output: all } { left table: 10, right table: 12, left degree table: 11, right degree table: 13 }
-            ├── StreamExchange Hash([0, 1]) from 5
+            ├── StreamExchange Hash([0, 1]) from 6
             └── StreamProject { exprs: [ps_partkey, ps_suppkey, (0.5:Decimal * sum(l_quantity)) as $expr2] }
                 └── StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [sum(l_quantity), count] } { intermediate state table: 20, state tables: [], distinct tables: [] }
                     └── StreamHashJoin { type: LeftOuter, predicate: ps_partkey IS NOT DISTINCT FROM l_partkey AND ps_suppkey IS NOT DISTINCT FROM l_suppkey, output: [ps_partkey, ps_suppkey, l_quantity, _row_id] }
@@ -1874,33 +1882,33 @@
                         ├── right table: 23
                         ├── left degree table: 22
                         ├── right degree table: 24
-                        ├── StreamExchange Hash([0, 1]) from 9
-                        └── StreamExchange Hash([0, 1]) from 10
+                        ├── StreamExchange Hash([0, 1]) from 10
+                        └── StreamExchange Hash([0, 1]) from 11
 
-    Fragment 5
+    Fragment 6
     StreamProject { exprs: [ps_partkey, ps_suppkey, ps_availqty::Decimal as $expr1, _row_id] }
-    └── StreamExchange NoShuffle from 6
+    └── StreamExchange NoShuffle from 7
 
-    Fragment 6
+    Fragment 7
     StreamHashJoin { type: LeftSemi, predicate: ps_partkey = p_partkey, output: [ps_partkey, ps_suppkey, ps_availqty, _row_id] } { left table: 14, right table: 16, left degree table: 15, right degree table: 17 }
-    ├── StreamExchange Hash([0]) from 7
-    └── StreamExchange Hash([0]) from 8
+    ├── StreamExchange Hash([0]) from 8
+    └── StreamExchange Hash([0]) from 9
 
-    Fragment 7
+    Fragment 8
     StreamRowIdGen { row_id_index: 5 }
     └── StreamSource { source: partsupp, columns: [ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, _row_id] } { source state table: 18 }
 
-    Fragment 8
+    Fragment 9
     StreamProject { exprs: [p_partkey, _row_id] }
     └── StreamRowIdGen { row_id_index: 9 }
         └── StreamSource { source: part, columns: [p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, _row_id] } { source state table: 19 }
 
-    Fragment 9
+    Fragment 10
     StreamProject { exprs: [ps_partkey, ps_suppkey] }
     └── StreamHashAgg { group_key: [ps_partkey, ps_suppkey], aggs: [count] } { intermediate state table: 25, state tables: [], distinct tables: [] }
-        └── StreamExchange NoShuffle from 6
+        └── StreamExchange NoShuffle from 7
 
-    Fragment 10
+    Fragment 11
     StreamProject { exprs: [l_partkey, l_suppkey, l_quantity, _row_id] }
     └── StreamFilter { predicate: IsNotNull(l_partkey) AND IsNotNull(l_suppkey) }
         └── StreamRowIdGen { row_id_index: 16 }
@@ -1961,7 +1969,7 @@
 
     Table 26 { columns: [ partition_id, offset_info ], primary key: [ $0 ASC ], value indices: [ 0, 1 ], distribution key: [], read pk prefix len hint: 1 }
 
-    Table 4294967294 { columns: [ s_name, s_address, _row_id, _row_id#1, s_nationkey, s_suppkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 5 ], read pk prefix len hint: 5 }
+    Table 4294967294 { columns: [ s_name, s_address, _row_id, _row_id#1, s_nationkey, s_suppkey ], primary key: [ $0 ASC, $2 ASC, $3 ASC, $4 ASC, $5 ASC ], value indices: [ 0, 1, 2, 3, 4, 5 ], distribution key: [ 2, 3, 4, 5 ], read pk prefix len hint: 5 }
 
 - id: tpch_q21
   before:
diff --git a/src/frontend/planner_test/tests/testdata/output/watermark.yaml b/src/frontend/planner_test/tests/testdata/output/watermark.yaml
index e4ef42b121528..d57d41fa76bc3 100644
--- a/src/frontend/planner_test/tests/testdata/output/watermark.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/watermark.yaml
@@ -79,11 +79,12 @@
     select t1.ts as t1_ts, t2.ts as ts2, t1.v1 as t1_v1, t1.v2 as t1_v2, t2.v1 as t2_v1, t2.v2 as t2_v2 from t1, t2 where t1.ts = t2.ts;
   stream_plan: |-
     StreamMaterialize { columns: [t1_ts, ts2, t1_v1, t1_v2, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_ts], pk_columns: [t1._row_id, t2._row_id, t1_ts], pk_conflict: NoCheck, watermark_columns: [t1_ts, ts2] }
-    └─StreamHashJoin [window, append_only] { type: Inner, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t2.ts, t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.ts) }
-      │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.ts) }
-        └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.ts, t1._row_id, t2._row_id) }
+      └─StreamHashJoin [window, append_only] { type: Inner, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t2.ts, t1.v1, t1.v2, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.ts) }
+        │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.ts) }
+          └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: left semi window join
   sql: |
     create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
@@ -91,11 +92,12 @@
     select t1.ts as t1_ts, t1.v1 as t1_v1, t1.v2 as t1_v2 from t1 where exists (select * from t2 where t1.ts = t2.ts);
   stream_plan: |-
     StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t1._row_id(hidden)], stream_key: [t1._row_id, t1_ts], pk_columns: [t1._row_id, t1_ts], pk_conflict: NoCheck, watermark_columns: [t1_ts] }
-    └─StreamHashJoin [window] { type: LeftSemi, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts], output: all }
-      ├─StreamExchange { dist: HashShard(t1.ts) }
-      │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.ts) }
-        └─StreamTableScan { table: t2, columns: [t2.ts, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.ts, t1._row_id) }
+      └─StreamHashJoin [window] { type: LeftSemi, predicate: t1.ts = t2.ts, output_watermarks: [t1.ts], output: all }
+        ├─StreamExchange { dist: HashShard(t1.ts) }
+        │ └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.ts) }
+          └─StreamTableScan { table: t2, columns: [t2.ts, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: interval join(left outer join)
   sql: |
     create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
@@ -108,13 +110,14 @@
       └─LogicalScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id] }
   stream_plan: |-
     StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t2_ts, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1], pk_columns: [t1._row_id, t2._row_id, t1_v1], pk_conflict: NoCheck, watermark_columns: [t1_ts, t2_ts] }
-    └─StreamHashJoin [interval] { type: LeftOuter, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
-      │   └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v1) }
-        └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
-          └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+      └─StreamHashJoin [interval] { type: LeftOuter, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
+        │   └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v1) }
+          └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
+            └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: interval join (inner join)
   sql: |
     create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
@@ -127,13 +130,14 @@
       └─LogicalScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id] }
   stream_plan: |-
     StreamMaterialize { columns: [t1_ts, t1_v1, t1_v2, t2_ts, t2_v1, t2_v2, t1._row_id(hidden), t2._row_id(hidden)], stream_key: [t1._row_id, t2._row_id, t1_v1], pk_columns: [t1._row_id, t2._row_id, t1_v1], pk_conflict: NoCheck, watermark_columns: [t1_ts, t2_ts] }
-    └─StreamHashJoin [interval, append_only] { type: Inner, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
-      ├─StreamExchange { dist: HashShard(t1.v1) }
-      │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
-      │   └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
-      └─StreamExchange { dist: HashShard(t2.v1) }
-        └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
-          └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
+    └─StreamExchange { dist: HashShard(t1.v1, t1._row_id, t2._row_id) }
+      └─StreamHashJoin [interval, append_only] { type: Inner, predicate: t1.v1 = t2.v1 AND (t1.ts >= $expr2) AND ($expr1 <= t2.ts), conditions_to_clean_left_state_table: (t1.ts >= $expr2), conditions_to_clean_right_state_table: ($expr1 <= t2.ts), output_watermarks: [t1.ts, t2.ts], output: [t1.ts, t1.v1, t1.v2, t2.ts, t2.v1, t2.v2, t1._row_id, t2._row_id] }
+        ├─StreamExchange { dist: HashShard(t1.v1) }
+        │ └─StreamProject { exprs: [t1.ts, t1.v1, t1.v2, AddWithTimeZone(t1.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr1, t1._row_id], output_watermarks: [t1.ts, $expr1] }
+        │   └─StreamTableScan { table: t1, columns: [t1.ts, t1.v1, t1.v2, t1._row_id], pk: [t1._row_id], dist: UpstreamHashShard(t1._row_id) }
+        └─StreamExchange { dist: HashShard(t2.v1) }
+          └─StreamProject { exprs: [t2.ts, t2.v1, t2.v2, AddWithTimeZone(t2.ts, '00:00:01':Interval, 'UTC':Varchar) as $expr2, t2._row_id], output_watermarks: [t2.ts, $expr2] }
+            └─StreamTableScan { table: t2, columns: [t2.ts, t2.v1, t2.v2, t2._row_id], pk: [t2._row_id], dist: UpstreamHashShard(t2._row_id) }
 - name: union all
   sql: |
     create table t1 (ts timestamp with time zone, v1 int, v2 int, watermark for ts as ts - INTERVAL '1' SECOND) append only;
diff --git a/src/frontend/planner_test/tests/testdata/output/window_join.yaml b/src/frontend/planner_test/tests/testdata/output/window_join.yaml
index 4113a6021e866..17c5e76f6e806 100644
--- a/src/frontend/planner_test/tests/testdata/output/window_join.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/window_join.yaml
@@ -12,15 +12,16 @@
     select * from t1, t2 where ts1 = ts2 and a1 = a2;
   stream_plan: |-
     StreamMaterialize { columns: [ts1, a1, b1, ts2, a2, b2, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, ts1, a1], pk_columns: [_row_id, _row_id#1, ts1, a1], pk_conflict: NoCheck, watermark_columns: [ts1, ts2] }
-    └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard(ts1, a1) }
-      │ └─StreamRowIdGen { row_id_index: 3 }
-      │   └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
-      │     └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
-      └─StreamExchange { dist: HashShard(ts2, a2) }
-        └─StreamRowIdGen { row_id_index: 3 }
-          └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
-            └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
+    └─StreamExchange { dist: HashShard(ts1, a1, _row_id, _row_id) }
+      └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard(ts1, a1) }
+        │ └─StreamRowIdGen { row_id_index: 3 }
+        │   └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
+        │     └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
+        └─StreamExchange { dist: HashShard(ts2, a2) }
+          └─StreamRowIdGen { row_id_index: 3 }
+            └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
+              └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
 - name: Window join expression reorder
   sql: |
     create source t1 (ts1 timestamp with time zone, a1 int, b1 int, watermark for ts1 as ts1 - INTERVAL '1' SECOND) with (
@@ -34,12 +35,13 @@
     select * from t1, t2 where a1 = a2 and ts1 = ts2;
   stream_plan: |-
     StreamMaterialize { columns: [ts1, a1, b1, ts2, a2, b2, _row_id(hidden), _row_id#1(hidden)], stream_key: [_row_id, _row_id#1, a1, ts1], pk_columns: [_row_id, _row_id#1, a1, ts1], pk_conflict: NoCheck, watermark_columns: [ts1, ts2] }
-    └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
-      ├─StreamExchange { dist: HashShard(ts1, a1) }
-      │ └─StreamRowIdGen { row_id_index: 3 }
-      │   └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
-      │     └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
-      └─StreamExchange { dist: HashShard(ts2, a2) }
-        └─StreamRowIdGen { row_id_index: 3 }
-          └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
-            └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
+    └─StreamExchange { dist: HashShard(ts1, a1, _row_id, _row_id) }
+      └─StreamHashJoin [window, append_only] { type: Inner, predicate: ts1 = ts2 AND a1 = a2, output_watermarks: [ts1, ts2], output: [ts1, a1, b1, ts2, a2, b2, _row_id, _row_id] }
+        ├─StreamExchange { dist: HashShard(ts1, a1) }
+        │ └─StreamRowIdGen { row_id_index: 3 }
+        │   └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts1, expr: (ts1 - '00:00:01':Interval) }], output_watermarks: [ts1] }
+        │     └─StreamSource { source: t1, columns: [ts1, a1, b1, _row_id] }
+        └─StreamExchange { dist: HashShard(ts2, a2) }
+          └─StreamRowIdGen { row_id_index: 3 }
+            └─StreamWatermarkFilter { watermark_descs: [Desc { column: ts2, expr: (ts2 - '00:00:01':Interval) }], output_watermarks: [ts2] }
+              └─StreamSource { source: t2, columns: [ts2, a2, b2, _row_id] }
diff --git a/src/frontend/src/binder/mod.rs b/src/frontend/src/binder/mod.rs
index 974730cd16237..f1038f9bf5943 100644
--- a/src/frontend/src/binder/mod.rs
+++ b/src/frontend/src/binder/mod.rs
@@ -363,6 +363,13 @@ impl Binder {
     }
 }
 
+/// The column name stored in [`BindContext`] for a column without an alias.
+pub const UNNAMED_COLUMN: &str = "?column?";
+/// The table name stored in [`BindContext`] for a subquery without an alias.
+const UNNAMED_SUBQUERY: &str = "?subquery?";
+/// The table name stored in [`BindContext`] for a column group.
+const COLUMN_GROUP_PREFIX: &str = "?column_group_id?";
+
 #[cfg(test)]
 pub mod test_utils {
     use risingwave_common::types::DataType;
@@ -380,10 +387,3 @@ pub mod test_utils {
         Binder::new_with_param_types(&SessionImpl::mock(), param_types)
     }
 }
-
-/// The column name stored in [`BindContext`] for a column without an alias.
-pub const UNNAMED_COLUMN: &str = "?column?";
-/// The table name stored in [`BindContext`] for a subquery without an alias.
-const UNNAMED_SUBQUERY: &str = "?subquery?";
-/// The table name stored in [`BindContext`] for a column group.
-const COLUMN_GROUP_PREFIX: &str = "?column_group_id?";
diff --git a/src/frontend/src/handler/create_mv.rs b/src/frontend/src/handler/create_mv.rs
index 3fa9129f39743..053ba5aa30f19 100644
--- a/src/frontend/src/handler/create_mv.rs
+++ b/src/frontend/src/handler/create_mv.rs
@@ -26,6 +26,7 @@ use crate::binder::{Binder, BoundQuery, BoundSetExpr};
 use crate::catalog::{check_valid_column_name, CatalogError};
 use crate::handler::privilege::resolve_query_privileges;
 use crate::handler::HandlerArgs;
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::Explain;
 use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef, RelationCollectorVisitor};
 use crate::planner::Planner;
@@ -175,7 +176,7 @@ It only indicates the physical clustering of the data, which may improve the per
 
         let (plan, table) =
             gen_create_mv_plan(&session, context.into(), query, name, columns, emit_mode)?;
-        let context = plan.plan_base().ctx.clone();
+        let context = plan.plan_base().ctx().clone();
         let mut graph = build_graph(plan);
         graph.parallelism = session
             .config()
diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
index 32279dd4e70eb..ddb1d697b856d 100644
--- a/src/frontend/src/handler/create_sink.rs
+++ b/src/frontend/src/handler/create_sink.rs
@@ -244,6 +244,7 @@ fn bind_sink_format_desc(value: SinkSchema) -> Result<SinkFormatDesc> {
         E::Json => SinkEncode::Json,
         E::Protobuf => SinkEncode::Protobuf,
         E::Avro => SinkEncode::Avro,
+        E::Template => SinkEncode::Template,
         e @ (E::Native | E::Csv | E::Bytes) => {
             return Err(ErrorCode::BindError(format!("sink encode unsupported: {e}")).into())
         }
@@ -262,6 +263,7 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock<HashMap<String, HashMap<Format, V
         use risingwave_connector::sink::kafka::KafkaSink;
         use risingwave_connector::sink::kinesis::KinesisSink;
         use risingwave_connector::sink::pulsar::PulsarSink;
+        use risingwave_connector::sink::redis::RedisSink;
         use risingwave_connector::sink::Sink as _;
 
         convert_args!(hashmap!(
@@ -280,6 +282,10 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock<HashMap<String, HashMap<Format, V
                     Format::Upsert => vec![Encode::Json],
                     Format::Debezium => vec![Encode::Json],
                 ),
+                RedisSink::SINK_NAME => hashmap!(
+                    Format::Plain => vec![Encode::Json,Encode::Template],
+                    Format::Upsert => vec![Encode::Json,Encode::Template],
+                ),
         ))
     });
 pub fn validate_compatibility(connector: &str, format_desc: &SinkSchema) -> Result<()> {
diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs
index 149f39bead330..174ed23e03ec5 100644
--- a/src/frontend/src/handler/mod.rs
+++ b/src/frontend/src/handler/mod.rs
@@ -73,6 +73,7 @@ mod show;
 mod transaction;
 pub mod util;
 pub mod variable;
+mod wait;
 
 /// The [`PgResponseBuilder`] used by RisingWave.
 pub type RwPgResponseBuilder = PgResponseBuilder<PgResponseStream>;
@@ -419,6 +420,7 @@ pub async fn handle(
             }
         }
         Statement::Flush => flush::handle_flush(handler_args).await,
+        Statement::Wait => wait::handle_wait(handler_args).await,
         Statement::SetVariable {
             local: _,
             variable,
diff --git a/src/meta/src/model_v2/ext/mod.rs b/src/frontend/src/handler/wait.rs
similarity index 53%
rename from src/meta/src/model_v2/ext/mod.rs
rename to src/frontend/src/handler/wait.rs
index 47a5ce8623dc4..83f2784ec8c17 100644
--- a/src/meta/src/model_v2/ext/mod.rs
+++ b/src/frontend/src/handler/wait.rs
@@ -12,5 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-mod hummock;
-pub use hummock::*;
+use pgwire::pg_response::{PgResponse, StatementType};
+use risingwave_common::error::Result;
+
+use super::RwPgResponse;
+use crate::handler::HandlerArgs;
+use crate::session::SessionImpl;
+
+pub(super) async fn handle_wait(handler_args: HandlerArgs) -> Result<RwPgResponse> {
+    do_wait(&handler_args.session).await?;
+    Ok(PgResponse::empty_result(StatementType::WAIT))
+}
+
+pub(crate) async fn do_wait(session: &SessionImpl) -> Result<()> {
+    let client = session.env().meta_client();
+    client.wait().await?;
+    Ok(())
+}
diff --git a/src/frontend/src/lib.rs b/src/frontend/src/lib.rs
index 0a036b8e96233..450f49b6394cf 100644
--- a/src/frontend/src/lib.rs
+++ b/src/frontend/src/lib.rs
@@ -15,7 +15,7 @@
 #![allow(clippy::derive_partial_eq_without_eq)]
 #![feature(map_try_insert)]
 #![feature(negative_impls)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(proc_macro_hygiene, stmt_expr_attributes)]
 #![feature(trait_alias)]
 #![feature(extract_if)]
@@ -32,7 +32,6 @@
 #![feature(extend_one)]
 #![feature(type_alias_impl_trait)]
 #![feature(impl_trait_in_assoc_type)]
-#![feature(async_fn_in_trait)]
 #![feature(result_flattening)]
 #![recursion_limit = "256"]
 
diff --git a/src/frontend/src/meta_client.rs b/src/frontend/src/meta_client.rs
index ae90c2e345f9f..d37c5dec127f1 100644
--- a/src/frontend/src/meta_client.rs
+++ b/src/frontend/src/meta_client.rs
@@ -43,6 +43,8 @@ pub trait FrontendMetaClient: Send + Sync {
 
     async fn flush(&self, checkpoint: bool) -> Result<HummockSnapshot>;
 
+    async fn wait(&self) -> Result<()>;
+
     async fn cancel_creating_jobs(&self, jobs: PbJobs) -> Result<Vec<u32>>;
 
     async fn list_table_fragments(
@@ -111,6 +113,10 @@ impl FrontendMetaClient for FrontendMetaClientImpl {
         self.0.flush(checkpoint).await
     }
 
+    async fn wait(&self) -> Result<()> {
+        self.0.wait().await
+    }
+
     async fn cancel_creating_jobs(&self, infos: PbJobs) -> Result<Vec<u32>> {
         self.0.cancel_creating_jobs(infos).await
     }
diff --git a/src/frontend/src/optimizer/plan_node/batch.rs b/src/frontend/src/optimizer/plan_node/batch.rs
index d62a85095d21c..2cb2360b3e51d 100644
--- a/src/frontend/src/optimizer/plan_node/batch.rs
+++ b/src/frontend/src/optimizer/plan_node/batch.rs
@@ -12,14 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use super::generic::GenericPlanRef;
+use super::generic::PhysicalPlanRef;
 use crate::optimizer::property::Order;
 
-/// A subtrait of [`GenericPlanRef`] for batch plans.
+/// A subtrait of [`PhysicalPlanRef`] for batch plans.
 ///
 /// Due to the lack of refactoring, all plan nodes currently implement this trait
 /// through [`super::PlanBase`]. One may still use this trait as a bound for
-/// expecting a batch plan, in contrast to [`GenericPlanRef`].
-pub trait BatchPlanRef: GenericPlanRef {
+/// accessing a batch plan, in contrast to [`GenericPlanRef`] or
+/// [`PhysicalPlanRef`].
+///
+/// [`GenericPlanRef`]: super::generic::GenericPlanRef
+pub trait BatchPlanRef: PhysicalPlanRef {
     fn order(&self) -> &Order;
 }
diff --git a/src/frontend/src/optimizer/plan_node/batch_delete.rs b/src/frontend/src/optimizer/plan_node/batch_delete.rs
index 42db0a1c4a774..85d22a46b450e 100644
--- a/src/frontend/src/optimizer/plan_node/batch_delete.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_delete.rs
@@ -33,11 +33,8 @@ pub struct BatchDelete {
 impl BatchDelete {
     pub fn new(core: generic::Delete<PlanRef>) -> Self {
         assert_eq!(core.input.distribution(), &Distribution::Single);
-        let base: PlanBase = PlanBase::new_batch_from_logical(
-            &core,
-            core.input.distribution().clone(),
-            Order::any(),
-        );
+        let base: PlanBase =
+            PlanBase::new_batch_with_core(&core, core.input.distribution().clone(), Order::any());
         Self { base, core }
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/batch_exchange.rs b/src/frontend/src/optimizer/plan_node/batch_exchange.rs
index 583838e877c5e..6477c7ec213e2 100644
--- a/src/frontend/src/optimizer/plan_node/batch_exchange.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_exchange.rs
@@ -17,6 +17,8 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::{ExchangeNode, MergeSortExchangeNode};
 
+use super::batch::BatchPlanRef;
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
 use crate::optimizer::plan_node::ToLocalBatch;
@@ -43,12 +45,12 @@ impl Distill for BatchExchange {
     fn distill<'a>(&self) -> XmlNode<'a> {
         let input_schema = self.input.schema();
         let order = OrderDisplay {
-            order: &self.base.order,
+            order: self.base.order(),
             input_schema,
         }
         .distill();
         let dist = Pretty::display(&DistributionDisplay {
-            distribution: &self.base.dist,
+            distribution: self.base.distribution(),
             input_schema,
         });
         childless_record("BatchExchange", vec![("order", order), ("dist", dist)])
@@ -75,18 +77,18 @@ impl ToDistributedBatch for BatchExchange {
 /// The serialization of Batch Exchange is default cuz it will be rewritten in scheduler.
 impl ToBatchPb for BatchExchange {
     fn to_batch_prost_body(&self) -> NodeBody {
-        if self.base.order.is_any() {
+        if self.base.order().is_any() {
             NodeBody::Exchange(ExchangeNode {
                 sources: vec![],
-                input_schema: self.base.schema.to_prost(),
+                input_schema: self.base.schema().to_prost(),
             })
         } else {
             NodeBody::MergeSortExchange(MergeSortExchangeNode {
                 exchange: Some(ExchangeNode {
                     sources: vec![],
-                    input_schema: self.base.schema.to_prost(),
+                    input_schema: self.base.schema().to_prost(),
                 }),
-                column_orders: self.base.order.to_protobuf(),
+                column_orders: self.base.order().to_protobuf(),
             })
         }
     }
diff --git a/src/frontend/src/optimizer/plan_node/batch_expand.rs b/src/frontend/src/optimizer/plan_node/batch_expand.rs
index 870368701be44..af4413b9e5152 100644
--- a/src/frontend/src/optimizer/plan_node/batch_expand.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_expand.rs
@@ -41,7 +41,7 @@ impl BatchExpand {
             | Distribution::UpstreamHashShard(_, _) => Distribution::SomeShard,
             Distribution::Broadcast => unreachable!(),
         };
-        let base = PlanBase::new_batch_from_logical(&core, dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
         BatchExpand { base, core }
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_filter.rs b/src/frontend/src/optimizer/plan_node/batch_filter.rs
index 6bc5086c7a29b..4bff7cbfee3c0 100644
--- a/src/frontend/src/optimizer/plan_node/batch_filter.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_filter.rs
@@ -32,7 +32,7 @@ pub struct BatchFilter {
 impl BatchFilter {
     pub fn new(core: generic::Filter<PlanRef>) -> Self {
         // TODO: derive from input
-        let base = PlanBase::new_batch_from_logical(
+        let base = PlanBase::new_batch_with_core(
             &core,
             core.input.distribution().clone(),
             core.input.order().clone(),
diff --git a/src/frontend/src/optimizer/plan_node/batch_group_topn.rs b/src/frontend/src/optimizer/plan_node/batch_group_topn.rs
index 8f6684dc4d85b..70ee8328623f5 100644
--- a/src/frontend/src/optimizer/plan_node/batch_group_topn.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_group_topn.rs
@@ -33,11 +33,8 @@ pub struct BatchGroupTopN {
 impl BatchGroupTopN {
     pub fn new(core: generic::TopN<PlanRef>) -> Self {
         assert!(!core.group_key.is_empty());
-        let base = PlanBase::new_batch_from_logical(
-            &core,
-            core.input.distribution().clone(),
-            Order::any(),
-        );
+        let base =
+            PlanBase::new_batch_with_core(&core, core.input.distribution().clone(), Order::any());
         BatchGroupTopN { base, core }
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs b/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs
index fa14076912689..b4ab3341ace29 100644
--- a/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_hash_agg.rs
@@ -42,7 +42,7 @@ impl BatchHashAgg {
         let dist = core
             .i2o_col_mapping()
             .rewrite_provided_distribution(input_dist);
-        let base = PlanBase::new_batch_from_logical(&core, dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
         BatchHashAgg { base, core }
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_hash_join.rs b/src/frontend/src/optimizer/plan_node/batch_hash_join.rs
index 5adfa6f5fd622..bad586d4af1e4 100644
--- a/src/frontend/src/optimizer/plan_node/batch_hash_join.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_hash_join.rs
@@ -46,7 +46,7 @@ pub struct BatchHashJoin {
 impl BatchHashJoin {
     pub fn new(core: generic::Join<PlanRef>, eq_join_predicate: EqJoinPredicate) -> Self {
         let dist = Self::derive_dist(core.left.distribution(), core.right.distribution(), &core);
-        let base = PlanBase::new_batch_from_logical(&core, dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
 
         Self {
             base,
@@ -91,7 +91,7 @@ impl BatchHashJoin {
 
 impl Distill for BatchHashJoin {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("type", Pretty::debug(&self.core.join_type)));
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_hop_window.rs b/src/frontend/src/optimizer/plan_node/batch_hop_window.rs
index 68381956b8a9a..2a4a27f9a0583 100644
--- a/src/frontend/src/optimizer/plan_node/batch_hop_window.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_hop_window.rs
@@ -44,11 +44,8 @@ impl BatchHopWindow {
         let distribution = core
             .i2o_col_mapping()
             .rewrite_provided_distribution(core.input.distribution());
-        let base = PlanBase::new_batch_from_logical(
-            &core,
-            distribution,
-            core.get_out_column_index_order(),
-        );
+        let base =
+            PlanBase::new_batch_with_core(&core, distribution, core.get_out_column_index_order());
         BatchHopWindow {
             base,
             core,
diff --git a/src/frontend/src/optimizer/plan_node/batch_insert.rs b/src/frontend/src/optimizer/plan_node/batch_insert.rs
index 0a2d2dddec8c5..aec05eee145b8 100644
--- a/src/frontend/src/optimizer/plan_node/batch_insert.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_insert.rs
@@ -18,6 +18,7 @@ use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::InsertNode;
 use risingwave_pb::plan_common::{DefaultColumns, IndexAndExpr};
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{generic, ExprRewritable, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
 use crate::expr::Expr;
@@ -34,11 +35,8 @@ pub struct BatchInsert {
 impl BatchInsert {
     pub fn new(core: generic::Insert<PlanRef>) -> Self {
         assert_eq!(core.input.distribution(), &Distribution::Single);
-        let base: PlanBase = PlanBase::new_batch_from_logical(
-            &core,
-            core.input.distribution().clone(),
-            Order::any(),
-        );
+        let base: PlanBase =
+            PlanBase::new_batch_with_core(&core, core.input.distribution().clone(), Order::any());
 
         BatchInsert { base, core }
     }
@@ -46,7 +44,9 @@ impl BatchInsert {
 
 impl Distill for BatchInsert {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let vec = self.core.fields_pretty(self.base.ctx.is_explain_verbose());
+        let vec = self
+            .core
+            .fields_pretty(self.base.ctx().is_explain_verbose());
         childless_record("BatchInsert", vec)
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/batch_limit.rs b/src/frontend/src/optimizer/plan_node/batch_limit.rs
index 17ee2c3fb69f3..93b14d0198979 100644
--- a/src/frontend/src/optimizer/plan_node/batch_limit.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_limit.rs
@@ -16,6 +16,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::LimitNode;
 
+use super::generic::PhysicalPlanRef;
 use super::utils::impl_distill_by_unit;
 use super::{
     generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch,
@@ -32,7 +33,7 @@ pub struct BatchLimit {
 
 impl BatchLimit {
     pub fn new(core: generic::Limit<PlanRef>) -> Self {
-        let base = PlanBase::new_batch_from_logical(
+        let base = PlanBase::new_batch_with_core(
             &core,
             core.input.distribution().clone(),
             core.input.order().clone(),
diff --git a/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs b/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs
index 99eb905da4661..48f99668c3af7 100644
--- a/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_lookup_join.rs
@@ -18,7 +18,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::{DistributedLookupJoinNode, LocalLookupJoinNode};
 
-use super::generic::{self};
+use super::generic::{self, GenericPlanRef};
 use super::utils::{childless_record, Distill};
 use super::ExprRewritable;
 use crate::expr::{Expr, ExprRewriter};
@@ -68,7 +68,7 @@ impl BatchLookupJoin {
         assert!(eq_join_predicate.has_eq());
         assert!(eq_join_predicate.eq_keys_are_type_aligned());
         let dist = Self::derive_dist(core.left.distribution(), &core);
-        let base = PlanBase::new_batch_from_logical(&core, dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
         Self {
             base,
             core,
@@ -112,7 +112,7 @@ impl BatchLookupJoin {
 
 impl Distill for BatchLookupJoin {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("type", Pretty::debug(&self.core.join_type)));
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs b/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs
index d743523d05911..8980ad2f23f6d 100644
--- a/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_nested_loop_join.rs
@@ -17,7 +17,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::NestedLoopJoinNode;
 
-use super::generic::{self};
+use super::generic::{self, GenericPlanRef};
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeBinary, ToBatchPb, ToDistributedBatch};
 use crate::expr::{Expr, ExprImpl, ExprRewriter};
@@ -37,7 +37,7 @@ pub struct BatchNestedLoopJoin {
 impl BatchNestedLoopJoin {
     pub fn new(core: generic::Join<PlanRef>) -> Self {
         let dist = Self::derive_dist(core.left.distribution(), core.right.distribution());
-        let base = PlanBase::new_batch_from_logical(&core, dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
         Self { base, core }
     }
 
@@ -51,7 +51,7 @@ impl BatchNestedLoopJoin {
 
 impl Distill for BatchNestedLoopJoin {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("type", Pretty::debug(&self.core.join_type)));
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_over_window.rs b/src/frontend/src/optimizer/plan_node/batch_over_window.rs
index f04587059aecd..fb455758f331a 100644
--- a/src/frontend/src/optimizer/plan_node/batch_over_window.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_over_window.rs
@@ -17,6 +17,7 @@ use risingwave_common::util::sort_util::{ColumnOrder, OrderType};
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::SortOverWindowNode;
 
+use super::batch::BatchPlanRef;
 use super::generic::PlanWindowFunction;
 use super::utils::impl_distill_by_unit;
 use super::{
@@ -46,7 +47,7 @@ impl BatchOverWindow {
                 .collect(),
         );
 
-        let base = PlanBase::new_batch_from_logical(&core, input_dist, order);
+        let base = PlanBase::new_batch_with_core(&core, input_dist, order);
         BatchOverWindow { base, core }
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_project.rs b/src/frontend/src/optimizer/plan_node/batch_project.rs
index 591d7d13caed8..642683967c5c3 100644
--- a/src/frontend/src/optimizer/plan_node/batch_project.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_project.rs
@@ -18,6 +18,7 @@ use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::ProjectNode;
 use risingwave_pb::expr::ExprNode;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{
     generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch,
@@ -43,7 +44,7 @@ impl BatchProject {
             .i2o_col_mapping()
             .rewrite_provided_order(core.input.order());
 
-        let base = PlanBase::new_batch_from_logical(&core, distribution, order);
+        let base = PlanBase::new_batch_with_core(&core, distribution, order);
         BatchProject { base, core }
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/batch_project_set.rs b/src/frontend/src/optimizer/plan_node/batch_project_set.rs
index 331ca8e5235de..5888df9d15889 100644
--- a/src/frontend/src/optimizer/plan_node/batch_project_set.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_project_set.rs
@@ -38,11 +38,8 @@ impl BatchProjectSet {
             .i2o_col_mapping()
             .rewrite_provided_distribution(core.input.distribution());
 
-        let base = PlanBase::new_batch_from_logical(
-            &core,
-            distribution,
-            core.get_out_column_index_order(),
-        );
+        let base =
+            PlanBase::new_batch_with_core(&core, distribution, core.get_out_column_index_order());
         BatchProjectSet { base, core }
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs
index dc0e553caf308..6834ed29353b9 100644
--- a/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_seq_scan.rs
@@ -24,6 +24,8 @@ use risingwave_pb::batch_plan::row_seq_scan_node::ChunkSize;
 use risingwave_pb::batch_plan::{RowSeqScanNode, SysRowSeqScanNode};
 use risingwave_pb::plan_common::PbColumnDesc;
 
+use super::batch::BatchPlanRef;
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
 use super::utils::{childless_record, Distill};
 use super::{generic, ExprRewritable, PlanBase, PlanRef, ToBatchPb, ToDistributedBatch};
 use crate::catalog::ColumnId;
@@ -46,7 +48,7 @@ impl BatchSeqScan {
         } else {
             core.get_out_column_index_order()
         };
-        let base = PlanBase::new_batch_from_logical(&core, dist, order);
+        let base = PlanBase::new_batch_with_core(&core, dist, order);
 
         {
             // validate scan_range
@@ -180,7 +182,7 @@ fn range_to_string(name: &str, range: &(Bound<ScalarImpl>, Bound<ScalarImpl>)) -
 
 impl Distill for BatchSeqScan {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(4);
         vec.push(("table", Pretty::from(self.core.table_name.clone())));
         vec.push(("columns", self.core.columns_pretty(verbose)));
@@ -196,7 +198,7 @@ impl Distill for BatchSeqScan {
         if verbose {
             let dist = Pretty::display(&DistributionDisplay {
                 distribution: self.distribution(),
-                input_schema: &self.base.schema,
+                input_schema: self.base.schema(),
             });
             vec.push(("distribution", dist));
         }
diff --git a/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs b/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs
index ff184324a5fb9..bae8d70c2eedf 100644
--- a/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_simple_agg.rs
@@ -16,7 +16,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::SortAggNode;
 
-use super::generic::{self, PlanAggCall};
+use super::generic::{self, GenericPlanRef, PlanAggCall};
 use super::utils::impl_distill_by_unit;
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
 use crate::expr::ExprRewriter;
@@ -32,7 +32,7 @@ pub struct BatchSimpleAgg {
 impl BatchSimpleAgg {
     pub fn new(core: generic::Agg<PlanRef>) -> Self {
         let input_dist = core.input.distribution().clone();
-        let base = PlanBase::new_batch_from_logical(&core, input_dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, input_dist, Order::any());
         BatchSimpleAgg { base, core }
     }
 
@@ -41,8 +41,11 @@ impl BatchSimpleAgg {
     }
 
     fn two_phase_agg_enabled(&self) -> bool {
-        let session_ctx = self.base.ctx.session_ctx();
-        session_ctx.config().get_enable_two_phase_agg()
+        self.base
+            .ctx()
+            .session_ctx()
+            .config()
+            .get_enable_two_phase_agg()
     }
 
     pub(crate) fn can_two_phase_agg(&self) -> bool {
diff --git a/src/frontend/src/optimizer/plan_node/batch_sort.rs b/src/frontend/src/optimizer/plan_node/batch_sort.rs
index 8576a18c19333..e7bff6d51d85b 100644
--- a/src/frontend/src/optimizer/plan_node/batch_sort.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_sort.rs
@@ -17,6 +17,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::SortNode;
 
+use super::batch::BatchPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch};
 use crate::optimizer::plan_node::ToLocalBatch;
@@ -56,7 +57,7 @@ impl PlanTreeNodeUnary for BatchSort {
     }
 
     fn clone_with_input(&self, input: PlanRef) -> Self {
-        Self::new(input, self.base.order.clone())
+        Self::new(input, self.base.order().clone())
     }
 }
 impl_plan_tree_node_for_unary! {BatchSort}
@@ -70,7 +71,7 @@ impl ToDistributedBatch for BatchSort {
 
 impl ToBatchPb for BatchSort {
     fn to_batch_prost_body(&self) -> NodeBody {
-        let column_orders = self.base.order.to_protobuf();
+        let column_orders = self.base.order().to_protobuf();
         NodeBody::Sort(SortNode { column_orders })
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs b/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs
index 00facef473a37..2252d4c0c0ee0 100644
--- a/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_sort_agg.rs
@@ -54,7 +54,7 @@ impl BatchSortAgg {
 
         let order = core.i2o_col_mapping().rewrite_provided_order(&input_order);
 
-        let base = PlanBase::new_batch_from_logical(&core, dist, order);
+        let base = PlanBase::new_batch_with_core(&core, dist, order);
 
         BatchSortAgg {
             base,
diff --git a/src/frontend/src/optimizer/plan_node/batch_source.rs b/src/frontend/src/optimizer/plan_node/batch_source.rs
index 8d43b4a7f6663..9e2cd6006db0b 100644
--- a/src/frontend/src/optimizer/plan_node/batch_source.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_source.rs
@@ -19,6 +19,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::SourceNode;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, column_names_pretty, Distill};
 use super::{
     generic, ExprRewritable, PlanBase, PlanRef, ToBatchPb, ToDistributedBatch, ToLocalBatch,
@@ -35,7 +36,7 @@ pub struct BatchSource {
 
 impl BatchSource {
     pub fn new(core: generic::Source) -> Self {
-        let base = PlanBase::new_batch_from_logical(
+        let base = PlanBase::new_batch_with_core(
             &core,
             // Use `Single` by default, will be updated later with `clone_with_dist`.
             Distribution::Single,
@@ -58,8 +59,9 @@ impl BatchSource {
     }
 
     pub fn clone_with_dist(&self) -> Self {
-        let mut base = self.base.clone();
-        base.dist = Distribution::SomeShard;
+        let base = self
+            .base
+            .clone_with_new_distribution(Distribution::SomeShard);
         Self {
             base,
             core: self.core.clone(),
diff --git a/src/frontend/src/optimizer/plan_node/batch_table_function.rs b/src/frontend/src/optimizer/plan_node/batch_table_function.rs
index 91aa1af0abbe7..0b9887cd4aaba 100644
--- a/src/frontend/src/optimizer/plan_node/batch_table_function.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_table_function.rs
@@ -17,6 +17,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::TableFunctionNode;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchPb, ToDistributedBatch};
 use crate::expr::ExprRewriter;
@@ -39,7 +40,7 @@ impl BatchTableFunction {
     }
 
     pub fn with_dist(logical: LogicalTableFunction, dist: Distribution) -> Self {
-        let ctx = logical.base.ctx.clone();
+        let ctx = logical.base.ctx().clone();
         let base = PlanBase::new_batch(ctx, logical.schema().clone(), dist, Order::any());
         BatchTableFunction { base, logical }
     }
diff --git a/src/frontend/src/optimizer/plan_node/batch_topn.rs b/src/frontend/src/optimizer/plan_node/batch_topn.rs
index e5f44bd2ef0e2..b2eda24046d28 100644
--- a/src/frontend/src/optimizer/plan_node/batch_topn.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_topn.rs
@@ -35,7 +35,7 @@ pub struct BatchTopN {
 impl BatchTopN {
     pub fn new(core: generic::TopN<PlanRef>) -> Self {
         assert!(core.group_key.is_empty());
-        let base = PlanBase::new_batch_from_logical(
+        let base = PlanBase::new_batch_with_core(
             &core,
             core.input.distribution().clone(),
             // BatchTopN outputs data in the order of specified order
diff --git a/src/frontend/src/optimizer/plan_node/batch_union.rs b/src/frontend/src/optimizer/plan_node/batch_union.rs
index 31b4a541dfe4a..c7c71111174c6 100644
--- a/src/frontend/src/optimizer/plan_node/batch_union.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_union.rs
@@ -40,7 +40,7 @@ impl BatchUnion {
             Distribution::SomeShard
         };
 
-        let base = PlanBase::new_batch_from_logical(&core, dist, Order::any());
+        let base = PlanBase::new_batch_with_core(&core, dist, Order::any());
         BatchUnion { base, core }
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/batch_update.rs b/src/frontend/src/optimizer/plan_node/batch_update.rs
index feebedeb07aaf..20e4b8b6b966c 100644
--- a/src/frontend/src/optimizer/plan_node/batch_update.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_update.rs
@@ -18,6 +18,7 @@ use risingwave_common::error::Result;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::UpdateNode;
 
+use super::generic::GenericPlanRef;
 use super::utils::impl_distill_by_unit;
 use super::{
     generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, ToBatchPb, ToDistributedBatch,
diff --git a/src/frontend/src/optimizer/plan_node/batch_values.rs b/src/frontend/src/optimizer/plan_node/batch_values.rs
index 5f4e2308493a9..9348cddba7422 100644
--- a/src/frontend/src/optimizer/plan_node/batch_values.rs
+++ b/src/frontend/src/optimizer/plan_node/batch_values.rs
@@ -18,6 +18,7 @@ use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::values_node::ExprTuple;
 use risingwave_pb::batch_plan::ValuesNode;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{
     ExprRewritable, LogicalValues, PlanBase, PlanRef, PlanTreeNodeLeaf, ToBatchPb,
@@ -42,7 +43,7 @@ impl BatchValues {
     }
 
     pub fn with_dist(logical: LogicalValues, dist: Distribution) -> Self {
-        let ctx = logical.base.ctx.clone();
+        let ctx = logical.base.ctx().clone();
         let base = PlanBase::new_batch(ctx, logical.schema().clone(), dist, Order::any());
         BatchValues { base, logical }
     }
diff --git a/src/frontend/src/optimizer/plan_node/generic/agg.rs b/src/frontend/src/optimizer/plan_node/generic/agg.rs
index 2fb251ca89aa6..e0c7e339ee6a6 100644
--- a/src/frontend/src/optimizer/plan_node/generic/agg.rs
+++ b/src/frontend/src/optimizer/plan_node/generic/agg.rs
@@ -516,11 +516,8 @@ impl<PlanRef: stream::StreamPlanRef> Agg<PlanRef> {
                 // we use materialized input state for non-retractable aggregate function.
                 // for backward compatibility, the state type is same as the return type.
                 // its values in the intermediate state table are always null.
-            } else {
-                field.data_type = sig
-                    .state_type
-                    .clone()
-                    .unwrap_or(sig.ret_type.as_exact().clone());
+            } else if let Some(state_type) = &sig.state_type {
+                field.data_type = state_type.clone();
             }
         }
         let in_dist_key = self.input.distribution().dist_column_indices().to_vec();
diff --git a/src/frontend/src/optimizer/plan_node/generic/mod.rs b/src/frontend/src/optimizer/plan_node/generic/mod.rs
index 49038500b4301..aec59c90bcc4e 100644
--- a/src/frontend/src/optimizer/plan_node/generic/mod.rs
+++ b/src/frontend/src/optimizer/plan_node/generic/mod.rs
@@ -18,9 +18,9 @@ use std::hash::Hash;
 use pretty_xmlish::XmlNode;
 use risingwave_common::catalog::Schema;
 
-use super::{stream, EqJoinPredicate};
+use super::{stream, EqJoinPredicate, PlanNodeId};
 use crate::optimizer::optimizer_context::OptimizerContextRef;
-use crate::optimizer::property::FunctionalDependencySet;
+use crate::optimizer::property::{Distribution, FunctionalDependencySet};
 
 pub mod dynamic_filter;
 pub use dynamic_filter::*;
@@ -85,21 +85,18 @@ macro_rules! impl_distill_unit_from_fields {
 pub(super) use impl_distill_unit_from_fields;
 
 pub trait GenericPlanRef: Eq + Hash {
+    fn id(&self) -> PlanNodeId;
     fn schema(&self) -> &Schema;
     fn stream_key(&self) -> Option<&[usize]>;
     fn functional_dependency(&self) -> &FunctionalDependencySet;
     fn ctx(&self) -> OptimizerContextRef;
 }
 
+pub trait PhysicalPlanRef: GenericPlanRef {
+    fn distribution(&self) -> &Distribution;
+}
+
 pub trait GenericPlanNode {
-    /// return (schema, `stream_key`, fds)
-    fn logical_properties(&self) -> (Schema, Option<Vec<usize>>, FunctionalDependencySet) {
-        (
-            self.schema(),
-            self.stream_key(),
-            self.functional_dependency(),
-        )
-    }
     fn functional_dependency(&self) -> FunctionalDependencySet;
     fn schema(&self) -> Schema;
     fn stream_key(&self) -> Option<Vec<usize>>;
diff --git a/src/frontend/src/optimizer/plan_node/logical_apply.rs b/src/frontend/src/optimizer/plan_node/logical_apply.rs
index 7640f093fc933..b398ce7494f61 100644
--- a/src/frontend/src/optimizer/plan_node/logical_apply.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_apply.rs
@@ -18,7 +18,9 @@ use risingwave_common::catalog::Schema;
 use risingwave_common::error::{ErrorCode, Result, RwError};
 use risingwave_pb::plan_common::JoinType;
 
-use super::generic::{self, push_down_into_join, push_down_join_condition, GenericPlanNode};
+use super::generic::{
+    self, push_down_into_join, push_down_join_condition, GenericPlanNode, GenericPlanRef,
+};
 use super::utils::{childless_record, Distill};
 use super::{
     ColPrunable, LogicalJoin, LogicalProject, PlanBase, PlanRef, PlanTreeNodeBinary,
diff --git a/src/frontend/src/optimizer/plan_node/logical_expand.rs b/src/frontend/src/optimizer/plan_node/logical_expand.rs
index b32374e6dc427..d1f3b666feef5 100644
--- a/src/frontend/src/optimizer/plan_node/logical_expand.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_expand.rs
@@ -15,6 +15,7 @@
 use itertools::Itertools;
 use risingwave_common::error::Result;
 
+use super::generic::GenericPlanRef;
 use super::utils::impl_distill_by_unit;
 use super::{
     gen_filter_and_pushdown, generic, BatchExpand, ColPrunable, ExprRewritable, PlanBase, PlanRef,
@@ -192,7 +193,7 @@ mod tests {
         let mut values = LogicalValues::new(vec![], Schema { fields }, ctx);
         values
             .base
-            .functional_dependency
+            .functional_dependency_mut()
             .add_functional_dependency_by_column_indices(&[0], &[1, 2]);
 
         let column_subsets = vec![vec![0, 1], vec![2]];
diff --git a/src/frontend/src/optimizer/plan_node/logical_filter.rs b/src/frontend/src/optimizer/plan_node/logical_filter.rs
index 72ee7d246b83d..a62b91aac5277 100644
--- a/src/frontend/src/optimizer/plan_node/logical_filter.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_filter.rs
@@ -18,6 +18,7 @@ use risingwave_common::bail;
 use risingwave_common::error::Result;
 use risingwave_common::types::DataType;
 
+use super::generic::GenericPlanRef;
 use super::utils::impl_distill_by_unit;
 use super::{
     generic, ColPrunable, ExprRewritable, LogicalProject, PlanBase, PlanRef, PlanTreeNodeUnary,
@@ -462,7 +463,7 @@ mod tests {
         // 3 --> 1, 2
         values
             .base
-            .functional_dependency
+            .functional_dependency_mut()
             .add_functional_dependency_by_column_indices(&[3], &[1, 2]);
         // v1 = 0 AND v2 = v3
         let predicate = ExprImpl::FunctionCall(Box::new(
diff --git a/src/frontend/src/optimizer/plan_node/logical_hop_window.rs b/src/frontend/src/optimizer/plan_node/logical_hop_window.rs
index e4bd65efa647c..da2ec2138c3d1 100644
--- a/src/frontend/src/optimizer/plan_node/logical_hop_window.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_hop_window.rs
@@ -17,7 +17,7 @@ use itertools::Itertools;
 use risingwave_common::error::Result;
 use risingwave_common::types::Interval;
 
-use super::generic::GenericPlanNode;
+use super::generic::{GenericPlanNode, GenericPlanRef};
 use super::utils::impl_distill_by_unit;
 use super::{
     gen_filter_and_pushdown, generic, BatchHopWindow, ColPrunable, ExprRewritable, LogicalFilter,
@@ -446,7 +446,7 @@ mod test {
         // 0, 1 --> 2
         values
             .base
-            .functional_dependency
+            .functional_dependency_mut()
             .add_functional_dependency_by_column_indices(&[0, 1], &[2]);
         let hop_window: PlanRef = LogicalHopWindow::new(
             values.into(),
diff --git a/src/frontend/src/optimizer/plan_node/logical_insert.rs b/src/frontend/src/optimizer/plan_node/logical_insert.rs
index f44000b502223..e93b77d79c1f2 100644
--- a/src/frontend/src/optimizer/plan_node/logical_insert.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_insert.rs
@@ -16,6 +16,7 @@ use pretty_xmlish::XmlNode;
 use risingwave_common::catalog::TableVersionId;
 use risingwave_common::error::Result;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{
     gen_filter_and_pushdown, generic, BatchInsert, ColPrunable, ExprRewritable, LogicalProject,
@@ -90,7 +91,9 @@ impl_plan_tree_node_for_unary! {LogicalInsert}
 
 impl Distill for LogicalInsert {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let vec = self.core.fields_pretty(self.base.ctx.is_explain_verbose());
+        let vec = self
+            .core
+            .fields_pretty(self.base.ctx().is_explain_verbose());
         childless_record("LogicalInsert", vec)
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/logical_join.rs b/src/frontend/src/optimizer/plan_node/logical_join.rs
index cfc49a1da3353..a586af2f0bf42 100644
--- a/src/frontend/src/optimizer/plan_node/logical_join.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_join.rs
@@ -55,7 +55,7 @@ pub struct LogicalJoin {
 
 impl Distill for LogicalJoin {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("type", Pretty::debug(&self.join_type())));
 
@@ -1296,7 +1296,8 @@ impl ToBatch for LogicalJoin {
         logical_join.left = logical_join.left.to_batch()?;
         logical_join.right = logical_join.right.to_batch()?;
 
-        let config = self.base.ctx.session_ctx().config();
+        let ctx = self.base.ctx();
+        let config = ctx.session_ctx().config();
 
         if predicate.has_eq() {
             if !predicate.eq_keys_are_type_aligned() {
@@ -2000,7 +2001,7 @@ mod tests {
             // 0 --> 1
             values
                 .base
-                .functional_dependency
+                .functional_dependency_mut()
                 .add_functional_dependency_by_column_indices(&[0], &[1]);
             values
         };
@@ -2014,7 +2015,7 @@ mod tests {
             // 0 --> 1, 2
             values
                 .base
-                .functional_dependency
+                .functional_dependency_mut()
                 .add_functional_dependency_by_column_indices(&[0], &[1, 2]);
             values
         };
diff --git a/src/frontend/src/optimizer/plan_node/logical_multi_join.rs b/src/frontend/src/optimizer/plan_node/logical_multi_join.rs
index 1c0253ab7aafd..9b740abd7718e 100644
--- a/src/frontend/src/optimizer/plan_node/logical_multi_join.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_multi_join.rs
@@ -863,6 +863,7 @@ mod test {
     use super::*;
     use crate::expr::{FunctionCall, InputRef};
     use crate::optimizer::optimizer_context::OptimizerContext;
+    use crate::optimizer::plan_node::generic::GenericPlanRef;
     use crate::optimizer::plan_node::LogicalValues;
     use crate::optimizer::property::FunctionalDependency;
     #[tokio::test]
@@ -883,7 +884,7 @@ mod test {
             // 0 --> 1
             values
                 .base
-                .functional_dependency
+                .functional_dependency_mut()
                 .add_functional_dependency_by_column_indices(&[0], &[1]);
             values
         };
@@ -897,7 +898,7 @@ mod test {
             // 0 --> 1, 2
             values
                 .base
-                .functional_dependency
+                .functional_dependency_mut()
                 .add_functional_dependency_by_column_indices(&[0], &[1, 2]);
             values
         };
@@ -910,7 +911,7 @@ mod test {
             // {} --> 0
             values
                 .base
-                .functional_dependency
+                .functional_dependency_mut()
                 .add_functional_dependency_by_column_indices(&[], &[0]);
             values
         };
diff --git a/src/frontend/src/optimizer/plan_node/logical_now.rs b/src/frontend/src/optimizer/plan_node/logical_now.rs
index 2792c4848e3b3..1d720db15b71a 100644
--- a/src/frontend/src/optimizer/plan_node/logical_now.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_now.rs
@@ -18,6 +18,7 @@ use risingwave_common::catalog::{Field, Schema};
 use risingwave_common::error::Result;
 use risingwave_common::types::DataType;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{
     ColPrunable, ColumnPruningContext, ExprRewritable, LogicalFilter, PlanBase, PlanRef,
@@ -53,7 +54,7 @@ impl LogicalNow {
 
 impl Distill for LogicalNow {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let vec = if self.base.ctx.is_explain_verbose() {
+        let vec = if self.base.ctx().is_explain_verbose() {
             vec![("output", column_names_pretty(self.schema()))]
         } else {
             vec![]
diff --git a/src/frontend/src/optimizer/plan_node/logical_project.rs b/src/frontend/src/optimizer/plan_node/logical_project.rs
index f3bb51cc7f971..a96de7d91ecd5 100644
--- a/src/frontend/src/optimizer/plan_node/logical_project.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_project.rs
@@ -284,7 +284,7 @@ impl ToStream for LogicalProject {
         // But the target size of `out_col_change` should be the same as the length of the new
         // schema.
         let (map, _) = out_col_change.into_parts();
-        let out_col_change = ColIndexMapping::with_target_size(map, proj.base.schema.len());
+        let out_col_change = ColIndexMapping::with_target_size(map, proj.base.schema().len());
         Ok((proj.into(), out_col_change))
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/logical_project_set.rs b/src/frontend/src/optimizer/plan_node/logical_project_set.rs
index cba907eeeb379..4bf6b18cdabe3 100644
--- a/src/frontend/src/optimizer/plan_node/logical_project_set.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_project_set.rs
@@ -427,7 +427,7 @@ mod test {
         let mut values = LogicalValues::new(vec![], Schema { fields }, ctx);
         values
             .base
-            .functional_dependency
+            .functional_dependency_mut()
             .add_functional_dependency_by_column_indices(&[1], &[2]);
         let project_set = LogicalProjectSet::new(
             values.into(),
@@ -449,8 +449,9 @@ mod test {
         );
         let fd_set: HashSet<FunctionalDependency> = project_set
             .base
-            .functional_dependency
-            .into_dependencies()
+            .functional_dependency()
+            .as_dependencies()
+            .clone()
             .into_iter()
             .collect();
         let expected_fd_set: HashSet<FunctionalDependency> =
diff --git a/src/frontend/src/optimizer/plan_node/logical_scan.rs b/src/frontend/src/optimizer/plan_node/logical_scan.rs
index a499a9c6ea3d3..07d2a6c7653e7 100644
--- a/src/frontend/src/optimizer/plan_node/logical_scan.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_scan.rs
@@ -273,7 +273,7 @@ impl LogicalScan {
             self.output_col_idx().to_vec(),
             self.core.table_desc.clone(),
             self.indexes().to_vec(),
-            self.base.ctx.clone(),
+            self.base.ctx().clone(),
             predicate,
             self.for_system_time_as_of_proctime(),
             self.table_cardinality(),
@@ -288,7 +288,7 @@ impl LogicalScan {
             output_col_idx,
             self.core.table_desc.clone(),
             self.indexes().to_vec(),
-            self.base.ctx.clone(),
+            self.base.ctx().clone(),
             self.predicate().clone(),
             self.for_system_time_as_of_proctime(),
             self.table_cardinality(),
@@ -309,7 +309,7 @@ impl_plan_tree_node_for_leaf! {LogicalScan}
 
 impl Distill for LogicalScan {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(5);
         vec.push(("table", Pretty::from(self.table_name().to_owned())));
         let key_is_columns =
@@ -440,7 +440,7 @@ impl LogicalScan {
             let (scan_ranges, predicate) = self.predicate().clone().split_to_scan_ranges(
                 self.core.table_desc.clone(),
                 self.base
-                    .ctx
+                    .ctx()
                     .session_ctx()
                     .config()
                     .get_max_split_range_gap(),
@@ -551,7 +551,7 @@ impl ToStream for LogicalScan {
                 None.into(),
             )));
         }
-        match self.base.stream_key.is_none() {
+        match self.base.stream_key().is_none() {
             true => {
                 let mut col_ids = HashSet::new();
 
diff --git a/src/frontend/src/optimizer/plan_node/logical_share.rs b/src/frontend/src/optimizer/plan_node/logical_share.rs
index d924ee7180168..d6b5711740a98 100644
--- a/src/frontend/src/optimizer/plan_node/logical_share.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_share.rs
@@ -69,7 +69,7 @@ impl LogicalShare {
     }
 
     pub(super) fn pretty_fields<'a>(base: &PlanBase, name: &'a str) -> XmlNode<'a> {
-        childless_record(name, vec![("id", Pretty::debug(&base.id.0))])
+        childless_record(name, vec![("id", Pretty::debug(&base.id().0))])
     }
 }
 
diff --git a/src/frontend/src/optimizer/plan_node/logical_source.rs b/src/frontend/src/optimizer/plan_node/logical_source.rs
index 1d37da9eaa40f..45a5fbcb2240f 100644
--- a/src/frontend/src/optimizer/plan_node/logical_source.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_source.rs
@@ -28,6 +28,7 @@ use risingwave_connector::source::{ConnectorProperties, DataType};
 use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn;
 use risingwave_pb::plan_common::GeneratedColumnDesc;
 
+use super::generic::GenericPlanRef;
 use super::stream_watermark_filter::StreamWatermarkFilter;
 use super::utils::{childless_record, Distill};
 use super::{
@@ -204,7 +205,7 @@ impl LogicalSource {
             ..self.core.clone()
         };
         let mut new_s3_plan: PlanRef = StreamSource {
-            base: PlanBase::new_stream_with_logical(
+            base: PlanBase::new_stream_with_core(
                 &logical_source,
                 Distribution::Single,
                 true, // `list` will keep listing all objects, it must be append-only
@@ -506,7 +507,7 @@ impl PredicatePushdown for LogicalSource {
 
         let mut new_conjunctions = Vec::with_capacity(predicate.conjunctions.len());
         for expr in predicate.conjunctions {
-            if let Some(e) = expr_to_kafka_timestamp_range(expr, &mut range, &self.base.schema) {
+            if let Some(e) = expr_to_kafka_timestamp_range(expr, &mut range, self.base.schema()) {
                 // Not recognized, so push back
                 new_conjunctions.push(e);
             }
diff --git a/src/frontend/src/optimizer/plan_node/logical_union.rs b/src/frontend/src/optimizer/plan_node/logical_union.rs
index 51e4e620cf4ca..1f02b026c0020 100644
--- a/src/frontend/src/optimizer/plan_node/logical_union.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_union.rs
@@ -130,7 +130,7 @@ impl ToBatch for LogicalUnion {
         if !self.all() {
             let batch_union = BatchUnion::new(new_logical).into();
             Ok(BatchHashAgg::new(
-                generic::Agg::new(vec![], (0..self.base.schema.len()).collect(), batch_union)
+                generic::Agg::new(vec![], (0..self.base.schema().len()).collect(), batch_union)
                     .with_enable_two_phase(false),
             )
             .into())
@@ -170,7 +170,7 @@ impl ToStream for LogicalUnion {
         &self,
         ctx: &mut RewriteStreamContext,
     ) -> Result<(PlanRef, ColIndexMapping)> {
-        let original_schema = self.base.schema.clone();
+        let original_schema = self.base.schema().clone();
         let original_schema_len = original_schema.len();
         let mut rewrites = vec![];
         for input in &self.core.inputs {
@@ -353,7 +353,7 @@ mod tests {
 
         // Check the result
         let union = plan.as_logical_union().unwrap();
-        assert_eq!(union.base.schema.len(), 2);
+        assert_eq!(union.base.schema().len(), 2);
     }
 
     #[tokio::test]
diff --git a/src/frontend/src/optimizer/plan_node/logical_update.rs b/src/frontend/src/optimizer/plan_node/logical_update.rs
index 80e4f350d8edb..1dbe1d3d3c5c9 100644
--- a/src/frontend/src/optimizer/plan_node/logical_update.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_update.rs
@@ -15,6 +15,7 @@
 use risingwave_common::catalog::TableVersionId;
 use risingwave_common::error::Result;
 
+use super::generic::GenericPlanRef;
 use super::utils::impl_distill_by_unit;
 use super::{
     gen_filter_and_pushdown, generic, BatchUpdate, ColPrunable, ExprRewritable, LogicalProject,
diff --git a/src/frontend/src/optimizer/plan_node/logical_values.rs b/src/frontend/src/optimizer/plan_node/logical_values.rs
index c6a3d2ac0564e..e62c6400f2015 100644
--- a/src/frontend/src/optimizer/plan_node/logical_values.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_values.rs
@@ -21,6 +21,7 @@ use risingwave_common::catalog::{Field, Schema};
 use risingwave_common::error::Result;
 use risingwave_common::types::{DataType, ScalarImpl};
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{
     BatchValues, ColPrunable, ExprRewritable, LogicalFilter, PlanBase, PlanRef, PredicatePushdown,
@@ -144,7 +145,7 @@ impl ColPrunable for LogicalValues {
             .iter()
             .map(|i| self.schema().fields[*i].clone())
             .collect();
-        Self::new(rows, Schema { fields }, self.base.ctx.clone()).into()
+        Self::new(rows, Schema { fields }, self.base.ctx().clone()).into()
     }
 }
 
diff --git a/src/frontend/src/optimizer/plan_node/merge_eq_nodes.rs b/src/frontend/src/optimizer/plan_node/merge_eq_nodes.rs
index 73f82e86aa260..9f2e8d94634be 100644
--- a/src/frontend/src/optimizer/plan_node/merge_eq_nodes.rs
+++ b/src/frontend/src/optimizer/plan_node/merge_eq_nodes.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;
 use std::hash::Hash;
 
+use super::generic::GenericPlanRef;
 use super::{EndoPlan, LogicalShare, PlanNodeId, PlanRef, PlanTreeNodeUnary, VisitPlan};
 use crate::optimizer::plan_visitor;
 use crate::utils::{Endo, Visit};
diff --git a/src/frontend/src/optimizer/plan_node/mod.rs b/src/frontend/src/optimizer/plan_node/mod.rs
index 188787c93b8c0..f16ebfb0c792c 100644
--- a/src/frontend/src/optimizer/plan_node/mod.rs
+++ b/src/frontend/src/optimizer/plan_node/mod.rs
@@ -46,7 +46,7 @@ use serde::Serialize;
 use smallvec::SmallVec;
 
 use self::batch::BatchPlanRef;
-use self::generic::GenericPlanRef;
+use self::generic::{GenericPlanRef, PhysicalPlanRef};
 use self::stream::StreamPlanRef;
 use self::utils::Distill;
 use super::property::{Distribution, FunctionalDependencySet, Order};
@@ -419,29 +419,31 @@ impl PlanTreeNode for PlanRef {
     }
 }
 
-impl StreamPlanRef for PlanRef {
-    fn distribution(&self) -> &Distribution {
-        &self.plan_base().dist
+impl PlanNodeMeta for PlanRef {
+    fn node_type(&self) -> PlanNodeType {
+        self.0.node_type()
     }
 
-    fn append_only(&self) -> bool {
-        self.plan_base().append_only
+    fn plan_base(&self) -> &PlanBase {
+        self.0.plan_base()
     }
 
-    fn emit_on_window_close(&self) -> bool {
-        self.plan_base().emit_on_window_close
+    fn convention(&self) -> Convention {
+        self.0.convention()
     }
 }
 
-impl BatchPlanRef for PlanRef {
-    fn order(&self) -> &Order {
-        &self.plan_base().order
+/// Implement for every type that provides [`PlanBase`] through [`PlanNodeMeta`].
+impl<P> GenericPlanRef for P
+where
+    P: PlanNodeMeta + Eq + Hash,
+{
+    fn id(&self) -> PlanNodeId {
+        self.plan_base().id()
     }
-}
 
-impl GenericPlanRef for PlanRef {
     fn schema(&self) -> &Schema {
-        &self.plan_base().schema
+        self.plan_base().schema()
     }
 
     fn stream_key(&self) -> Option<&[usize]> {
@@ -457,6 +459,47 @@ impl GenericPlanRef for PlanRef {
     }
 }
 
+/// Implement for every type that provides [`PlanBase`] through [`PlanNodeMeta`].
+// TODO: further constrain the convention to be `Stream` or `Batch`.
+impl<P> PhysicalPlanRef for P
+where
+    P: PlanNodeMeta + Eq + Hash,
+{
+    fn distribution(&self) -> &Distribution {
+        self.plan_base().distribution()
+    }
+}
+
+/// Implement for every type that provides [`PlanBase`] through [`PlanNodeMeta`].
+// TODO: further constrain the convention to be `Stream`.
+impl<P> StreamPlanRef for P
+where
+    P: PlanNodeMeta + Eq + Hash,
+{
+    fn append_only(&self) -> bool {
+        self.plan_base().append_only()
+    }
+
+    fn emit_on_window_close(&self) -> bool {
+        self.plan_base().emit_on_window_close()
+    }
+
+    fn watermark_columns(&self) -> &FixedBitSet {
+        self.plan_base().watermark_columns()
+    }
+}
+
+/// Implement for every type that provides [`PlanBase`] through [`PlanNodeMeta`].
+// TODO: further constrain the convention to be `Batch`.
+impl<P> BatchPlanRef for P
+where
+    P: PlanNodeMeta + Eq + Hash,
+{
+    fn order(&self) -> &Order {
+        self.plan_base().order()
+    }
+}
+
 /// In order to let expression display id started from 1 for explaining, hidden column names and
 /// other places. We will reset expression display id to 0 and clone the whole plan to reset the
 /// schema.
@@ -512,15 +555,15 @@ pub(crate) fn pretty_config() -> PrettyConfig {
 
 impl dyn PlanNode {
     pub fn id(&self) -> PlanNodeId {
-        self.plan_base().id
+        self.plan_base().id()
     }
 
     pub fn ctx(&self) -> OptimizerContextRef {
-        self.plan_base().ctx.clone()
+        self.plan_base().ctx().clone()
     }
 
     pub fn schema(&self) -> &Schema {
-        &self.plan_base().schema
+        self.plan_base().schema()
     }
 
     pub fn stream_key(&self) -> Option<&[usize]> {
@@ -528,27 +571,28 @@ impl dyn PlanNode {
     }
 
     pub fn order(&self) -> &Order {
-        &self.plan_base().order
+        self.plan_base().order()
     }
 
+    // TODO: avoid no manual delegation
     pub fn distribution(&self) -> &Distribution {
-        &self.plan_base().dist
+        self.plan_base().distribution()
     }
 
     pub fn append_only(&self) -> bool {
-        self.plan_base().append_only
+        self.plan_base().append_only()
     }
 
     pub fn emit_on_window_close(&self) -> bool {
-        self.plan_base().emit_on_window_close
+        self.plan_base().emit_on_window_close()
     }
 
     pub fn functional_dependency(&self) -> &FunctionalDependencySet {
-        &self.plan_base().functional_dependency
+        self.plan_base().functional_dependency()
     }
 
     pub fn watermark_columns(&self) -> &FixedBitSet {
-        &self.plan_base().watermark_columns
+        self.plan_base().watermark_columns()
     }
 
     /// Serialize the plan node and its children to a stream plan proto.
diff --git a/src/frontend/src/optimizer/plan_node/plan_base.rs b/src/frontend/src/optimizer/plan_node/plan_base.rs
index e9a5bf26885bf..51b1aa5f41141 100644
--- a/src/frontend/src/optimizer/plan_node/plan_base.rs
+++ b/src/frontend/src/optimizer/plan_node/plan_base.rs
@@ -14,47 +14,132 @@
 
 use educe::Educe;
 use fixedbitset::FixedBitSet;
-use paste::paste;
 use risingwave_common::catalog::Schema;
 
 use super::generic::GenericPlanNode;
 use super::*;
-use crate::for_all_plan_nodes;
 use crate::optimizer::optimizer_context::OptimizerContextRef;
 use crate::optimizer::property::{Distribution, FunctionalDependencySet, Order};
 
-/// the common fields of all nodes, please make a field named `base` in
-/// every planNode and correctly value it when construct the planNode.
-#[derive(Clone, Debug, Educe)]
-#[educe(PartialEq, Eq, Hash)]
-pub struct PlanBase {
-    #[educe(PartialEq(ignore))]
-    #[educe(Hash(ignore))]
-    pub id: PlanNodeId,
-    #[educe(PartialEq(ignore))]
-    #[educe(Hash(ignore))]
-    pub ctx: OptimizerContextRef,
-    pub schema: Schema,
-    /// the pk indices of the PlanNode's output, a empty stream key vec means there is no stream key
-    pub stream_key: Option<Vec<usize>>,
-    /// The order property of the PlanNode's output, store an `&Order::any()` here will not affect
-    /// correctness, but insert unnecessary sort in plan
-    pub order: Order,
+/// Common extra fields for physical plan nodes.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+struct PhysicalCommonExtra {
     /// The distribution property of the PlanNode's output, store an `Distribution::any()` here
     /// will not affect correctness, but insert unnecessary exchange in plan
-    pub dist: Distribution,
+    dist: Distribution,
+}
+
+/// Extra fields for stream plan nodes.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+struct StreamExtra {
+    /// Common fields for physical plan nodes.
+    physical: PhysicalCommonExtra,
+
     /// The append-only property of the PlanNode's output is a stream-only property. Append-only
     /// means the stream contains only insert operation.
-    pub append_only: bool,
+    append_only: bool,
     /// Whether the output is emitted on window close.
-    pub emit_on_window_close: bool,
-    pub functional_dependency: FunctionalDependencySet,
+    emit_on_window_close: bool,
     /// The watermark column indices of the PlanNode's output. There could be watermark output from
     /// this stream operator.
-    pub watermark_columns: FixedBitSet,
+    watermark_columns: FixedBitSet,
+}
+
+/// Extra fields for batch plan nodes.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+struct BatchExtra {
+    /// Common fields for physical plan nodes.
+    physical: PhysicalCommonExtra,
+
+    /// The order property of the PlanNode's output, store an `&Order::any()` here will not affect
+    /// correctness, but insert unnecessary sort in plan
+    order: Order,
+}
+
+/// Extra fields for physical plan nodes.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+enum PhysicalExtra {
+    Stream(StreamExtra),
+    Batch(BatchExtra),
+}
+
+impl PhysicalExtra {
+    fn common(&self) -> &PhysicalCommonExtra {
+        match self {
+            PhysicalExtra::Stream(stream) => &stream.physical,
+            PhysicalExtra::Batch(batch) => &batch.physical,
+        }
+    }
+
+    fn common_mut(&mut self) -> &mut PhysicalCommonExtra {
+        match self {
+            PhysicalExtra::Stream(stream) => &mut stream.physical,
+            PhysicalExtra::Batch(batch) => &mut batch.physical,
+        }
+    }
+
+    fn stream(&self) -> &StreamExtra {
+        match self {
+            PhysicalExtra::Stream(extra) => extra,
+            _ => panic!("access stream properties from batch plan node"),
+        }
+    }
+
+    fn batch(&self) -> &BatchExtra {
+        match self {
+            PhysicalExtra::Batch(extra) => extra,
+            _ => panic!("access batch properties from stream plan node"),
+        }
+    }
+}
+
+/// the common fields of all nodes, please make a field named `base` in
+/// every planNode and correctly value it when construct the planNode.
+///
+/// All fields are intentionally made private and immutable, as they should
+/// normally be the same as the given [`GenericPlanNode`] when constructing.
+///
+/// - To access them, use traits including [`GenericPlanRef`],
+///   [`PhysicalPlanRef`], [`StreamPlanRef`] and [`BatchPlanRef`].
+/// - To mutate them, use methods like `new_*` or `clone_with_*`.
+#[derive(Clone, Debug, Educe)]
+#[educe(PartialEq, Eq, Hash)]
+pub struct PlanBase {
+    // -- common fields --
+    #[educe(PartialEq(ignore), Hash(ignore))]
+    id: PlanNodeId,
+    #[educe(PartialEq(ignore), Hash(ignore))]
+    ctx: OptimizerContextRef,
+
+    schema: Schema,
+    /// the pk indices of the PlanNode's output, a empty stream key vec means there is no stream key
+    // TODO: this is actually a logical and stream only property
+    stream_key: Option<Vec<usize>>,
+    functional_dependency: FunctionalDependencySet,
+
+    /// Extra fields if the plan node is physical.
+    physical_extra: Option<PhysicalExtra>,
+}
+
+impl PlanBase {
+    fn physical_extra(&self) -> &PhysicalExtra {
+        self.physical_extra
+            .as_ref()
+            .expect("access physical properties from logical plan node")
+    }
+
+    fn physical_extra_mut(&mut self) -> &mut PhysicalExtra {
+        self.physical_extra
+            .as_mut()
+            .expect("access physical properties from logical plan node")
+    }
 }
 
 impl generic::GenericPlanRef for PlanBase {
+    fn id(&self) -> PlanNodeId {
+        self.id
+    }
+
     fn schema(&self) -> &Schema {
         &self.schema
     }
@@ -72,23 +157,29 @@ impl generic::GenericPlanRef for PlanBase {
     }
 }
 
-impl stream::StreamPlanRef for PlanBase {
+impl generic::PhysicalPlanRef for PlanBase {
     fn distribution(&self) -> &Distribution {
-        &self.dist
+        &self.physical_extra().common().dist
     }
+}
 
+impl stream::StreamPlanRef for PlanBase {
     fn append_only(&self) -> bool {
-        self.append_only
+        self.physical_extra().stream().append_only
     }
 
     fn emit_on_window_close(&self) -> bool {
-        self.emit_on_window_close
+        self.physical_extra().stream().emit_on_window_close
+    }
+
+    fn watermark_columns(&self) -> &FixedBitSet {
+        &self.physical_extra().stream().watermark_columns
     }
 }
 
 impl batch::BatchPlanRef for PlanBase {
     fn order(&self) -> &Order {
-        &self.order
+        &self.physical_extra().batch().order
     }
 }
 
@@ -100,47 +191,22 @@ impl PlanBase {
         functional_dependency: FunctionalDependencySet,
     ) -> Self {
         let id = ctx.next_plan_node_id();
-        let watermark_columns = FixedBitSet::with_capacity(schema.len());
         Self {
             id,
             ctx,
             schema,
             stream_key,
-            dist: Distribution::Single,
-            order: Order::any(),
-            // Logical plan node won't touch `append_only` field
-            append_only: true,
-            emit_on_window_close: false,
             functional_dependency,
-            watermark_columns,
+            physical_extra: None,
         }
     }
 
-    pub fn new_logical_with_core(node: &impl GenericPlanNode) -> Self {
+    pub fn new_logical_with_core(core: &impl GenericPlanNode) -> Self {
         Self::new_logical(
-            node.ctx(),
-            node.schema(),
-            node.stream_key(),
-            node.functional_dependency(),
-        )
-    }
-
-    pub fn new_stream_with_logical(
-        logical: &impl GenericPlanNode,
-        dist: Distribution,
-        append_only: bool,
-        emit_on_window_close: bool,
-        watermark_columns: FixedBitSet,
-    ) -> Self {
-        Self::new_stream(
-            logical.ctx(),
-            logical.schema(),
-            logical.stream_key(),
-            logical.functional_dependency(),
-            dist,
-            append_only,
-            emit_on_window_close,
-            watermark_columns,
+            core.ctx(),
+            core.schema(),
+            core.stream_key(),
+            core.functional_dependency(),
         )
     }
 
@@ -160,22 +226,36 @@ impl PlanBase {
             id,
             ctx,
             schema,
-            dist,
-            order: Order::any(),
             stream_key,
-            append_only,
-            emit_on_window_close,
             functional_dependency,
-            watermark_columns,
+            physical_extra: Some(PhysicalExtra::Stream({
+                StreamExtra {
+                    physical: PhysicalCommonExtra { dist },
+                    append_only,
+                    emit_on_window_close,
+                    watermark_columns,
+                }
+            })),
         }
     }
 
-    pub fn new_batch_from_logical(
-        logical: &impl GenericPlanNode,
+    pub fn new_stream_with_core(
+        core: &impl GenericPlanNode,
         dist: Distribution,
-        order: Order,
+        append_only: bool,
+        emit_on_window_close: bool,
+        watermark_columns: FixedBitSet,
     ) -> Self {
-        Self::new_batch(logical.ctx(), logical.schema(), dist, order)
+        Self::new_stream(
+            core.ctx(),
+            core.schema(),
+            core.stream_key(),
+            core.functional_dependency(),
+            dist,
+            append_only,
+            emit_on_window_close,
+            watermark_columns,
+        )
     }
 
     pub fn new_batch(
@@ -186,75 +266,49 @@ impl PlanBase {
     ) -> Self {
         let id = ctx.next_plan_node_id();
         let functional_dependency = FunctionalDependencySet::new(schema.len());
-        let watermark_columns = FixedBitSet::with_capacity(schema.len());
         Self {
             id,
             ctx,
             schema,
-            dist,
-            order,
             stream_key: None,
-            // Batch plan node won't touch `append_only` field
-            append_only: true,
-            emit_on_window_close: false, // TODO(rc): batch EOWC support?
             functional_dependency,
-            watermark_columns,
+            physical_extra: Some(PhysicalExtra::Batch({
+                BatchExtra {
+                    physical: PhysicalCommonExtra { dist },
+                    order,
+                }
+            })),
         }
     }
 
-    pub fn derive_stream_plan_base(plan_node: &PlanRef) -> Self {
-        PlanBase::new_stream(
-            plan_node.ctx(),
-            plan_node.schema().clone(),
-            plan_node.stream_key().map(|v| v.to_vec()),
-            plan_node.functional_dependency().clone(),
-            plan_node.distribution().clone(),
-            plan_node.append_only(),
-            plan_node.emit_on_window_close(),
-            plan_node.watermark_columns().clone(),
-        )
+    pub fn new_batch_with_core(
+        core: &impl GenericPlanNode,
+        dist: Distribution,
+        order: Order,
+    ) -> Self {
+        Self::new_batch(core.ctx(), core.schema(), dist, order)
     }
 
     pub fn clone_with_new_plan_id(&self) -> Self {
         let mut new = self.clone();
-        new.id = self.ctx.next_plan_node_id();
+        new.id = self.ctx().next_plan_node_id();
+        new
+    }
+
+    /// Clone the plan node with a new distribution.
+    ///
+    /// Panics if the plan node is not physical.
+    pub fn clone_with_new_distribution(&self, dist: Distribution) -> Self {
+        let mut new = self.clone();
+        new.physical_extra_mut().common_mut().dist = dist;
         new
     }
 }
 
-macro_rules! impl_base_delegate {
-    ($( { $convention:ident, $name:ident }),*) => {
-        $(paste! {
-            impl [<$convention $name>] {
-                pub fn id(&self) -> PlanNodeId {
-                    self.plan_base().id
-                }
-                 pub fn ctx(&self) -> OptimizerContextRef {
-                    self.plan_base().ctx()
-                }
-                pub fn schema(&self) -> &Schema {
-                    &self.plan_base().schema
-                }
-                pub fn stream_key(&self) -> Option<&[usize]> {
-                    self.plan_base().stream_key()
-                }
-                pub fn order(&self) -> &Order {
-                    &self.plan_base().order
-                }
-                pub fn distribution(&self) -> &Distribution {
-                    &self.plan_base().dist
-                }
-                pub fn append_only(&self) -> bool {
-                    self.plan_base().append_only
-                }
-                pub fn emit_on_window_close(&self) -> bool {
-                    self.plan_base().emit_on_window_close
-                }
-                pub fn functional_dependency(&self) -> &FunctionalDependencySet {
-                    &self.plan_base().functional_dependency
-                }
-            }
-        })*
+// Mutators for testing only.
+#[cfg(test)]
+impl PlanBase {
+    pub fn functional_dependency_mut(&mut self) -> &mut FunctionalDependencySet {
+        &mut self.functional_dependency
     }
 }
-for_all_plan_nodes! { impl_base_delegate }
diff --git a/src/frontend/src/optimizer/plan_node/stream.rs b/src/frontend/src/optimizer/plan_node/stream.rs
index 2edf997bf91fd..866c62c2413a5 100644
--- a/src/frontend/src/optimizer/plan_node/stream.rs
+++ b/src/frontend/src/optimizer/plan_node/stream.rs
@@ -12,16 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use super::generic::GenericPlanRef;
-use crate::optimizer::property::Distribution;
+use fixedbitset::FixedBitSet;
 
-/// A subtrait of [`GenericPlanRef`] for stream plans.
+use super::generic::PhysicalPlanRef;
+
+/// A subtrait of [`PhysicalPlanRef`] for stream plans.
 ///
 /// Due to the lack of refactoring, all plan nodes currently implement this trait
 /// through [`super::PlanBase`]. One may still use this trait as a bound for
-/// expecting a stream plan, in contrast to [`GenericPlanRef`].
-pub trait StreamPlanRef: GenericPlanRef {
-    fn distribution(&self) -> &Distribution;
+/// accessing a stream plan, in contrast to [`GenericPlanRef`] or
+/// [`PhysicalPlanRef`].
+///
+/// [`GenericPlanRef`]: super::generic::GenericPlanRef
+pub trait StreamPlanRef: PhysicalPlanRef {
     fn append_only(&self) -> bool;
     fn emit_on_window_close(&self) -> bool;
+    fn watermark_columns(&self) -> &FixedBitSet;
 }
diff --git a/src/frontend/src/optimizer/plan_node/stream_dedup.rs b/src/frontend/src/optimizer/plan_node/stream_dedup.rs
index 6e96d0eab0e93..51b5e589e886e 100644
--- a/src/frontend/src/optimizer/plan_node/stream_dedup.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_dedup.rs
@@ -17,7 +17,7 @@ use risingwave_common::util::sort_util::OrderType;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::DedupNode;
 
-use super::generic::{self, GenericPlanNode, GenericPlanRef};
+use super::generic::{self, GenericPlanNode, GenericPlanRef, PhysicalPlanRef};
 use super::stream::StreamPlanRef;
 use super::utils::{impl_distill_by_unit, TableCatalogBuilder};
 use super::{ExprRewritable, PlanBase, PlanTreeNodeUnary, StreamNode};
@@ -37,7 +37,7 @@ impl StreamDedup {
         // A dedup operator must be append-only.
         assert!(input.append_only());
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             input.distribution().clone(),
             true,
diff --git a/src/frontend/src/optimizer/plan_node/stream_delta_join.rs b/src/frontend/src/optimizer/plan_node/stream_delta_join.rs
index db9e6ac296bbf..bb18f9cffdf0f 100644
--- a/src/frontend/src/optimizer/plan_node/stream_delta_join.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_delta_join.rs
@@ -20,7 +20,7 @@ use risingwave_pb::plan_common::JoinType;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::{ArrangementInfo, DeltaIndexJoinNode};
 
-use super::generic::{self};
+use super::generic::{self, GenericPlanRef};
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeBinary, StreamNode};
 use crate::expr::{Expr, ExprRewriter};
@@ -67,7 +67,7 @@ impl StreamDeltaJoin {
             core.i2o_col_mapping().rewrite_bitset(&watermark_columns)
         };
         // TODO: derive from input
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             append_only,
@@ -90,7 +90,7 @@ impl StreamDeltaJoin {
 
 impl Distill for StreamDeltaJoin {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("type", Pretty::debug(&self.core.join_type)));
 
diff --git a/src/frontend/src/optimizer/plan_node/stream_dml.rs b/src/frontend/src/optimizer/plan_node/stream_dml.rs
index c9f969384c3a4..9b000974786e4 100644
--- a/src/frontend/src/optimizer/plan_node/stream_dml.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_dml.rs
@@ -17,6 +17,7 @@ use pretty_xmlish::{Pretty, XmlNode};
 use risingwave_common::catalog::{ColumnDesc, INITIAL_TABLE_VERSION_ID};
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::stream_fragmenter::BuildFragmentGraphState;
diff --git a/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs b/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs
index e1ca18da937e9..a4b74f37208e7 100644
--- a/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_dynamic_filter.rs
@@ -17,7 +17,8 @@ pub use risingwave_pb::expr::expr_node::Type as ExprType;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::DynamicFilterNode;
 
-use super::generic::DynamicFilter;
+use super::generic::{DynamicFilter, GenericPlanRef};
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, column_names_pretty, watermark_pretty, Distill};
 use super::{generic, ExprRewritable};
 use crate::expr::{Expr, ExprImpl};
@@ -37,7 +38,7 @@ impl StreamDynamicFilter {
         let watermark_columns = core.watermark_columns(core.right().watermark_columns()[0]);
 
         // TODO: derive from input
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             core.left().distribution().clone(),
             false, /* we can have a new abstraction for append only and monotonically increasing
@@ -78,11 +79,11 @@ impl StreamDynamicFilter {
 
 impl Distill for StreamDynamicFilter {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let pred = self.core.pretty_field();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("predicate", pred));
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             vec.push(("output_watermarks", ow));
         }
         vec.push(("output", column_names_pretty(self.schema())));
diff --git a/src/frontend/src/optimizer/plan_node/stream_eowc_over_window.rs b/src/frontend/src/optimizer/plan_node/stream_eowc_over_window.rs
index 9418af8e4a364..d8c5a9635ce59 100644
--- a/src/frontend/src/optimizer/plan_node/stream_eowc_over_window.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_eowc_over_window.rs
@@ -18,7 +18,7 @@ use fixedbitset::FixedBitSet;
 use risingwave_common::util::sort_util::OrderType;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
-use super::generic::{self, PlanWindowFunction};
+use super::generic::{self, GenericPlanRef, PlanWindowFunction};
 use super::utils::{impl_distill_by_unit, TableCatalogBuilder};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::stream_fragmenter::BuildFragmentGraphState;
@@ -50,7 +50,7 @@ impl StreamEowcOverWindow {
         // ancient rows in some rarely updated partitions that are emitted at the end of time.
         let watermark_columns = FixedBitSet::with_capacity(core.output_len());
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             input.distribution().clone(),
             true,
diff --git a/src/frontend/src/optimizer/plan_node/stream_exchange.rs b/src/frontend/src/optimizer/plan_node/stream_exchange.rs
index 0fa1713bf4488..99e6c3c5161a1 100644
--- a/src/frontend/src/optimizer/plan_node/stream_exchange.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_exchange.rs
@@ -16,6 +16,8 @@ use pretty_xmlish::{Pretty, XmlNode};
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::{DispatchStrategy, DispatcherType, ExchangeNode};
 
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, plan_node_name, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::optimizer::property::{Distribution, DistributionDisplay};
@@ -78,7 +80,7 @@ impl StreamExchange {
 impl Distill for StreamExchange {
     fn distill<'a>(&self) -> XmlNode<'a> {
         let distribution_display = DistributionDisplay {
-            distribution: &self.base.dist,
+            distribution: self.base.distribution(),
             input_schema: self.input.schema(),
         };
         childless_record(
@@ -117,13 +119,13 @@ impl StreamNode for StreamExchange {
                 })
             } else {
                 Some(DispatchStrategy {
-                    r#type: match &self.base.dist {
+                    r#type: match &self.base.distribution() {
                         Distribution::HashShard(_) => DispatcherType::Hash,
                         Distribution::Single => DispatcherType::Simple,
                         Distribution::Broadcast => DispatcherType::Broadcast,
                         _ => panic!("Do not allow Any or AnyShard in serialization process"),
                     } as i32,
-                    dist_key_indices: match &self.base.dist {
+                    dist_key_indices: match &self.base.distribution() {
                         Distribution::HashShard(keys) => {
                             keys.iter().map(|num| *num as u32).collect()
                         }
diff --git a/src/frontend/src/optimizer/plan_node/stream_expand.rs b/src/frontend/src/optimizer/plan_node/stream_expand.rs
index e0f8852a19fb5..5959b8d6be4d2 100644
--- a/src/frontend/src/optimizer/plan_node/stream_expand.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_expand.rs
@@ -48,7 +48,7 @@ impl StreamExpand {
                 .map(|idx| idx + input.schema().len()),
         );
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             input.append_only(),
diff --git a/src/frontend/src/optimizer/plan_node/stream_filter.rs b/src/frontend/src/optimizer/plan_node/stream_filter.rs
index ff4d344607776..0f000e6b8c0db 100644
--- a/src/frontend/src/optimizer/plan_node/stream_filter.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_filter.rs
@@ -34,7 +34,7 @@ impl StreamFilter {
         let input = core.input.clone();
         let dist = input.distribution().clone();
         // Filter executor won't change the append-only behavior of the stream.
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             input.append_only(),
diff --git a/src/frontend/src/optimizer/plan_node/stream_fs_fetch.rs b/src/frontend/src/optimizer/plan_node/stream_fs_fetch.rs
index 190c05c0a5ba1..95fd72e9f6aa0 100644
--- a/src/frontend/src/optimizer/plan_node/stream_fs_fetch.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_fs_fetch.rs
@@ -48,7 +48,7 @@ impl_plan_tree_node_for_unary! { StreamFsFetch }
 
 impl StreamFsFetch {
     pub fn new(input: PlanRef, source: generic::Source) -> Self {
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &source,
             Distribution::SomeShard,
             source.catalog.as_ref().map_or(true, |s| s.append_only),
diff --git a/src/frontend/src/optimizer/plan_node/stream_group_topn.rs b/src/frontend/src/optimizer/plan_node/stream_group_topn.rs
index 14711d353f9d8..3e8f3c00206c4 100644
--- a/src/frontend/src/optimizer/plan_node/stream_group_topn.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_group_topn.rs
@@ -16,7 +16,8 @@ use fixedbitset::FixedBitSet;
 use pretty_xmlish::XmlNode;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
-use super::generic::{DistillUnit, TopNLimit};
+use super::generic::{DistillUnit, GenericPlanRef, TopNLimit};
+use super::stream::StreamPlanRef;
 use super::utils::{plan_node_name, watermark_pretty, Distill};
 use super::{generic, ExprRewritable, PlanBase, PlanTreeNodeUnary, StreamNode};
 use crate::optimizer::plan_node::generic::GenericPlanNode;
@@ -135,7 +136,7 @@ impl Distill for StreamGroupTopN {
             { "append_only", self.input().append_only() },
         );
         let mut node = self.core.distill_with_name(name);
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             node.fields.push(("output_watermarks".into(), ow));
         }
         node
diff --git a/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs b/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs
index 25e1ac801f97c..55ab6b5906e59 100644
--- a/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_hash_agg.rs
@@ -18,10 +18,11 @@ use pretty_xmlish::XmlNode;
 use risingwave_common::error::{ErrorCode, Result};
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
-use super::generic::{self, PlanAggCall};
+use super::generic::{self, GenericPlanRef, PlanAggCall};
 use super::utils::{childless_record, plan_node_name, watermark_pretty, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::expr::ExprRewriter;
+use crate::optimizer::plan_node::stream::StreamPlanRef;
 use crate::stream_fragmenter::BuildFragmentGraphState;
 use crate::utils::{ColIndexMapping, ColIndexMappingRewriteExt, IndexSet};
 
@@ -85,7 +86,7 @@ impl StreamHashAgg {
         }
 
         // Hash agg executor might change the append-only behavior of the stream.
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             emit_on_window_close, // in EOWC mode, we produce append only output
@@ -142,7 +143,7 @@ impl StreamHashAgg {
 impl Distill for StreamHashAgg {
     fn distill<'a>(&self) -> XmlNode<'a> {
         let mut vec = self.core.fields_pretty();
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             vec.push(("output_watermarks", ow));
         }
         childless_record(
@@ -214,7 +215,7 @@ impl StreamNode for StreamHashAgg {
                 })
                 .collect(),
             row_count_index: self.row_count_idx as u32,
-            emit_on_window_close: self.base.emit_on_window_close,
+            emit_on_window_close: self.base.emit_on_window_close(),
         })
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/stream_hash_join.rs b/src/frontend/src/optimizer/plan_node/stream_hash_join.rs
index 0075b1730b4eb..9d9c41425c4b1 100644
--- a/src/frontend/src/optimizer/plan_node/stream_hash_join.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_hash_join.rs
@@ -20,7 +20,8 @@ use risingwave_pb::plan_common::JoinType;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::{DeltaExpression, HashJoinNode, PbInequalityPair};
 
-use super::generic::Join;
+use super::generic::{GenericPlanRef, Join};
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, plan_node_name, watermark_pretty, Distill};
 use super::{
     generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeBinary, StreamDeltaJoin, StreamNode,
@@ -178,7 +179,7 @@ impl StreamHashJoin {
         };
 
         // TODO: derive from input
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             append_only,
@@ -291,7 +292,7 @@ impl Distill for StreamHashJoin {
             { "interval", self.clean_left_state_conjunction_idx.is_some() && self.clean_right_state_conjunction_idx.is_some() },
             { "append_only", self.is_append_only },
         );
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(6);
         vec.push(("type", Pretty::debug(&self.core.join_type)));
 
@@ -316,7 +317,7 @@ impl Distill for StreamHashJoin {
         if let Some(i) = self.clean_right_state_conjunction_idx {
             vec.push(("conditions_to_clean_right_state_table", get_cond(i)));
         }
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             vec.push(("output_watermarks", ow));
         }
 
diff --git a/src/frontend/src/optimizer/plan_node/stream_hop_window.rs b/src/frontend/src/optimizer/plan_node/stream_hop_window.rs
index c68b1b307d470..e177be6942360 100644
--- a/src/frontend/src/optimizer/plan_node/stream_hop_window.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_hop_window.rs
@@ -17,6 +17,8 @@ use risingwave_common::util::column_index_mapping::ColIndexMapping;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::HopWindowNode;
 
+use super::generic::GenericPlanRef;
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, watermark_pretty, Distill};
 use super::{generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::expr::{Expr, ExprImpl, ExprRewriter};
@@ -56,7 +58,7 @@ impl StreamHopWindow {
         )
         .rewrite_bitset(&watermark_columns);
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             input.append_only(),
@@ -75,7 +77,7 @@ impl StreamHopWindow {
 impl Distill for StreamHopWindow {
     fn distill<'a>(&self) -> XmlNode<'a> {
         let mut vec = self.core.fields_pretty();
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             vec.push(("output_watermarks", ow));
         }
         childless_record("StreamHopWindow", vec)
diff --git a/src/frontend/src/optimizer/plan_node/stream_materialize.rs b/src/frontend/src/optimizer/plan_node/stream_materialize.rs
index fb17537bc90e6..9c87f1a34abbd 100644
--- a/src/frontend/src/optimizer/plan_node/stream_materialize.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_materialize.rs
@@ -24,11 +24,13 @@ use risingwave_common::util::sort_util::{ColumnOrder, OrderType};
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
 use super::derive::derive_columns;
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{reorganize_elements_id, ExprRewritable, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::catalog::table_catalog::{CreateType, TableCatalog, TableType, TableVersion};
 use crate::catalog::FragmentId;
 use crate::optimizer::plan_node::derive::derive_pk;
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{PlanBase, PlanNodeMeta};
 use crate::optimizer::property::{Cardinality, Distribution, Order, RequiredDist};
 use crate::stream_fragmenter::BuildFragmentGraphState;
@@ -149,7 +151,22 @@ impl StreamMaterialize {
                 TableType::MaterializedView => {
                     assert_matches!(user_distributed_by, RequiredDist::Any);
                     // ensure the same pk will not shuffle to different node
-                    RequiredDist::shard_by_key(input.schema().len(), input.expect_stream_key())
+                    let required_dist =
+                        RequiredDist::shard_by_key(input.schema().len(), input.expect_stream_key());
+
+                    // If the input is a stream join, enforce the stream key as the materialized
+                    // view distribution key to avoid slow backfilling caused by
+                    // data skew of the dimension table join key.
+                    // See <https://github.com/risingwavelabs/risingwave/issues/12824> for more information.
+                    let is_stream_join = matches!(input.as_stream_hash_join(), Some(_join))
+                        || matches!(input.as_stream_temporal_join(), Some(_join))
+                        || matches!(input.as_stream_delta_join(), Some(_join));
+
+                    if is_stream_join {
+                        return Ok(required_dist.enforce(input, &Order::any()));
+                    }
+
+                    required_dist
                 }
                 TableType::Index => {
                     assert_matches!(
@@ -273,8 +290,8 @@ impl Distill for StreamMaterialize {
 
         vec.push(("pk_conflict", Pretty::from(pk_conflict_behavior)));
 
-        let watermark_columns = &self.base.watermark_columns;
-        if self.base.watermark_columns.count_ones(..) > 0 {
+        let watermark_columns = &self.base.watermark_columns();
+        if self.base.watermark_columns().count_ones(..) > 0 {
             let watermark_column_names = watermark_columns
                 .ones()
                 .map(|i| table.columns()[i].name_with_hidden().to_string())
@@ -294,16 +311,16 @@ impl PlanTreeNodeUnary for StreamMaterialize {
     fn clone_with_input(&self, input: PlanRef) -> Self {
         let new = Self::new(input, self.table().clone());
         new.base
-            .schema
+            .schema()
             .fields
             .iter()
-            .zip_eq_fast(self.base.schema.fields.iter())
+            .zip_eq_fast(self.base.schema().fields.iter())
             .for_each(|(a, b)| {
                 assert_eq!(a.data_type, b.data_type);
                 assert_eq!(a.type_name, b.type_name);
                 assert_eq!(a.sub_fields, b.sub_fields);
             });
-        assert_eq!(new.plan_base().stream_key, self.plan_base().stream_key);
+        assert_eq!(new.plan_base().stream_key(), self.plan_base().stream_key());
         new
     }
 }
diff --git a/src/frontend/src/optimizer/plan_node/stream_now.rs b/src/frontend/src/optimizer/plan_node/stream_now.rs
index 9eb0a0e0f143e..91ebc344fa51d 100644
--- a/src/frontend/src/optimizer/plan_node/stream_now.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_now.rs
@@ -19,8 +19,7 @@ use risingwave_common::types::DataType;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::NowNode;
 
-use super::generic::GenericPlanRef;
-use super::stream::StreamPlanRef;
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
 use super::utils::{childless_record, Distill, TableCatalogBuilder};
 use super::{ExprRewritable, LogicalNow, PlanBase, StreamNode};
 use crate::optimizer::plan_node::utils::column_names_pretty;
@@ -59,7 +58,7 @@ impl StreamNow {
 
 impl Distill for StreamNow {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let vec = if self.base.ctx.is_explain_verbose() {
+        let vec = if self.base.ctx().is_explain_verbose() {
             vec![("output", column_names_pretty(self.schema()))]
         } else {
             vec![]
diff --git a/src/frontend/src/optimizer/plan_node/stream_over_window.rs b/src/frontend/src/optimizer/plan_node/stream_over_window.rs
index 0d749f0c7b0e6..5a2f9d98f1340 100644
--- a/src/frontend/src/optimizer/plan_node/stream_over_window.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_over_window.rs
@@ -21,6 +21,7 @@ use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use super::generic::{GenericPlanNode, PlanWindowFunction};
 use super::utils::{impl_distill_by_unit, TableCatalogBuilder};
 use super::{generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::stream_fragmenter::BuildFragmentGraphState;
 use crate::TableCatalog;
 
@@ -37,7 +38,7 @@ impl StreamOverWindow {
         let input = &core.input;
         let watermark_columns = FixedBitSet::with_capacity(core.output_len());
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             input.distribution().clone(),
             false, // general over window cannot be append-only
@@ -122,7 +123,7 @@ impl StreamNode for StreamOverWindow {
             .to_internal_table_prost();
         let cache_policy = self
             .base
-            .ctx
+            .ctx()
             .session_ctx()
             .config()
             .get_streaming_over_window_cache_policy();
diff --git a/src/frontend/src/optimizer/plan_node/stream_project.rs b/src/frontend/src/optimizer/plan_node/stream_project.rs
index 8a7665881e0cf..c0ff0d1cf2f43 100644
--- a/src/frontend/src/optimizer/plan_node/stream_project.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_project.rs
@@ -17,6 +17,8 @@ use pretty_xmlish::XmlNode;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::ProjectNode;
 
+use super::generic::GenericPlanRef;
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, watermark_pretty, Distill};
 use super::{generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::expr::{try_derive_watermark, Expr, ExprImpl, ExprRewriter, WatermarkDerivation};
@@ -41,7 +43,7 @@ impl Distill for StreamProject {
         let schema = self.schema();
         let mut vec = self.core.fields_pretty(schema);
         if let Some(display_output_watermarks) =
-            watermark_pretty(&self.base.watermark_columns, schema)
+            watermark_pretty(self.base.watermark_columns(), schema)
         {
             vec.push(("output_watermarks", display_output_watermarks));
         }
@@ -79,7 +81,7 @@ impl StreamProject {
         }
         // Project executor won't change the append-only behavior of the stream, so it depends on
         // input's `append_only`.
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             distribution,
             input.append_only(),
diff --git a/src/frontend/src/optimizer/plan_node/stream_project_set.rs b/src/frontend/src/optimizer/plan_node/stream_project_set.rs
index cadd600f3c3b7..ba09d79c96c60 100644
--- a/src/frontend/src/optimizer/plan_node/stream_project_set.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_project_set.rs
@@ -66,7 +66,7 @@ impl StreamProjectSet {
 
         // ProjectSet executor won't change the append-only behavior of the stream, so it depends on
         // input's `append_only`.
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             distribution,
             input.append_only(),
diff --git a/src/frontend/src/optimizer/plan_node/stream_share.rs b/src/frontend/src/optimizer/plan_node/stream_share.rs
index 8b406005f40a6..3acf0b132805e 100644
--- a/src/frontend/src/optimizer/plan_node/stream_share.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_share.rs
@@ -16,6 +16,8 @@ use pretty_xmlish::XmlNode;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::PbStreamNode;
 
+use super::generic::GenericPlanRef;
+use super::stream::StreamPlanRef;
 use super::utils::Distill;
 use super::{generic, ExprRewritable, PlanRef, PlanTreeNodeUnary, StreamExchange, StreamNode};
 use crate::optimizer::plan_node::{LogicalShare, PlanBase, PlanTreeNode};
@@ -34,7 +36,7 @@ impl StreamShare {
         let input = core.input.borrow().0.clone();
         let dist = input.distribution().clone();
         // Filter executor won't change the append-only behavior of the stream.
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             input.append_only(),
@@ -79,7 +81,7 @@ impl StreamNode for StreamShare {
 
 impl StreamShare {
     pub fn adhoc_to_stream_prost(&self, state: &mut BuildFragmentGraphState) -> PbStreamNode {
-        let operator_id = self.base.id.0 as u32;
+        let operator_id = self.base.id().0 as u32;
 
         match state.get_share_stream_node(operator_id) {
             None => {
diff --git a/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs b/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs
index 59311dd22226c..92d96fdf21b08 100644
--- a/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_simple_agg.rs
@@ -21,6 +21,8 @@ use super::generic::{self, PlanAggCall};
 use super::utils::{childless_record, plan_node_name, Distill};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::expr::ExprRewriter;
+use crate::optimizer::plan_node::generic::PhysicalPlanRef;
+use crate::optimizer::plan_node::stream::StreamPlanRef;
 use crate::optimizer::property::Distribution;
 use crate::stream_fragmenter::BuildFragmentGraphState;
 
@@ -48,7 +50,7 @@ impl StreamSimpleAgg {
         let watermark_columns = FixedBitSet::with_capacity(core.output_len());
 
         // Simple agg executor might change the append-only behavior of the stream.
-        let base = PlanBase::new_stream_with_logical(&core, dist, false, false, watermark_columns);
+        let base = PlanBase::new_stream_with_core(&core, dist, false, false, watermark_columns);
         StreamSimpleAgg {
             base,
             core,
@@ -99,7 +101,7 @@ impl StreamNode for StreamSimpleAgg {
                 .collect(),
             distribution_key: self
                 .base
-                .dist
+                .distribution()
                 .dist_column_indices()
                 .iter()
                 .map(|idx| *idx as u32)
diff --git a/src/frontend/src/optimizer/plan_node/stream_sink.rs b/src/frontend/src/optimizer/plan_node/stream_sink.rs
index a51380d630331..32e9fb487910c 100644
--- a/src/frontend/src/optimizer/plan_node/stream_sink.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_sink.rs
@@ -37,6 +37,7 @@ use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use tracing::info;
 
 use super::derive::{derive_columns, derive_pk};
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill, IndicesDisplay, TableCatalogBuilder};
 use super::{ExprRewritable, PlanBase, PlanRef, StreamNode};
 use crate::optimizer::plan_node::PlanTreeNodeUnary;
@@ -57,7 +58,7 @@ pub struct StreamSink {
 impl StreamSink {
     #[must_use]
     pub fn new(input: PlanRef, sink_desc: SinkDesc) -> Self {
-        let base = PlanBase::derive_stream_plan_base(&input);
+        let base = input.plan_base().clone_with_new_plan_id();
         Self {
             base,
             input,
@@ -389,7 +390,7 @@ impl Distill for StreamSink {
                     .iter()
                     .map(|k| k.column_index)
                     .collect_vec(),
-                schema: &self.base.schema,
+                schema: self.base.schema(),
             };
             vec.push(("pk", pk.distill()));
         }
@@ -409,7 +410,7 @@ impl StreamNode for StreamSink {
         PbNodeBody::Sink(SinkNode {
             sink_desc: Some(self.sink_desc.to_proto()),
             table: Some(table.to_internal_table_prost()),
-            log_store_type: match self.base.ctx.session_ctx().config().get_sink_decouple() {
+            log_store_type: match self.base.ctx().session_ctx().config().get_sink_decouple() {
                 SinkDecouple::Default => {
                     let enable_sink_decouple =
                         match_sink_name_str!(
diff --git a/src/frontend/src/optimizer/plan_node/stream_sort.rs b/src/frontend/src/optimizer/plan_node/stream_sort.rs
index b82d71068d817..41a56a0fd5df2 100644
--- a/src/frontend/src/optimizer/plan_node/stream_sort.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_sort.rs
@@ -20,6 +20,8 @@ use risingwave_common::catalog::FieldDisplay;
 use risingwave_common::util::sort_util::OrderType;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
+use super::generic::{GenericPlanRef, PhysicalPlanRef};
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, Distill, TableCatalogBuilder};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::stream_fragmenter::BuildFragmentGraphState;
@@ -84,7 +86,7 @@ impl StreamEowcSort {
         tbl_builder.add_order_column(self.sort_column_index, OrderType::ascending());
         order_cols.insert(self.sort_column_index);
 
-        let dist_key = self.base.dist.dist_column_indices().to_vec();
+        let dist_key = self.base.distribution().dist_column_indices().to_vec();
         for idx in &dist_key {
             if !order_cols.contains(idx) {
                 tbl_builder.add_order_column(*idx, OrderType::ascending());
diff --git a/src/frontend/src/optimizer/plan_node/stream_source.rs b/src/frontend/src/optimizer/plan_node/stream_source.rs
index 377e2704776bb..ae66cf568118b 100644
--- a/src/frontend/src/optimizer/plan_node/stream_source.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_source.rs
@@ -37,7 +37,7 @@ pub struct StreamSource {
 
 impl StreamSource {
     pub fn new(core: generic::Source) -> Self {
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             Distribution::SomeShard,
             core.catalog.as_ref().map_or(true, |s| s.append_only),
diff --git a/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs b/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs
index 0af7ebded94d9..474582ec877c7 100644
--- a/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_stateless_simple_agg.rs
@@ -20,6 +20,7 @@ use super::generic::{self, PlanAggCall};
 use super::utils::impl_distill_by_unit;
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::expr::ExprRewriter;
+use crate::optimizer::plan_node::generic::PhysicalPlanRef;
 use crate::optimizer::property::RequiredDist;
 use crate::stream_fragmenter::BuildFragmentGraphState;
 
@@ -49,7 +50,7 @@ impl StreamStatelessSimpleAgg {
             }
         }
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             input_dist.clone(),
             input.append_only(),
diff --git a/src/frontend/src/optimizer/plan_node/stream_table_scan.rs b/src/frontend/src/optimizer/plan_node/stream_table_scan.rs
index 907a41db28525..965ca217a3369 100644
--- a/src/frontend/src/optimizer/plan_node/stream_table_scan.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_table_scan.rs
@@ -24,11 +24,13 @@ use risingwave_common::util::sort_util::OrderType;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::{ChainType, PbStreamNode};
 
+use super::generic::PhysicalPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{generic, ExprRewritable, PlanBase, PlanNodeId, PlanRef, StreamNode};
 use crate::catalog::ColumnId;
 use crate::expr::{ExprRewriter, FunctionCall};
 use crate::optimizer::plan_node::generic::GenericPlanRef;
+use crate::optimizer::plan_node::stream::StreamPlanRef;
 use crate::optimizer::plan_node::utils::{IndicesDisplay, TableCatalogBuilder};
 use crate::optimizer::property::{Distribution, DistributionDisplay};
 use crate::stream_fragmenter::BuildFragmentGraphState;
@@ -66,7 +68,7 @@ impl StreamTableScan {
                 None => Distribution::SomeShard,
             }
         };
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             distribution,
             core.table_desc.append_only,
@@ -192,7 +194,7 @@ impl_plan_tree_node_for_leaf! { StreamTableScan }
 
 impl Distill for StreamTableScan {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(4);
         vec.push(("table", Pretty::from(self.core.table_name.clone())));
         vec.push(("columns", self.core.columns_pretty(verbose)));
@@ -200,12 +202,12 @@ impl Distill for StreamTableScan {
         if verbose {
             let pk = IndicesDisplay {
                 indices: self.stream_key().unwrap_or_default(),
-                schema: &self.base.schema,
+                schema: self.base.schema(),
             };
             vec.push(("pk", pk.distill()));
             let dist = Pretty::display(&DistributionDisplay {
                 distribution: self.distribution(),
-                input_schema: &self.base.schema,
+                input_schema: self.base.schema(),
             });
             vec.push(("dist", dist));
         }
@@ -325,7 +327,7 @@ impl StreamTableScan {
                 ..Default::default()
             })),
             stream_key,
-            operator_id: self.base.id.0 as u64,
+            operator_id: self.base.id().0 as u64,
             identity: {
                 let s = self.distill_to_string();
                 s.replace("StreamTableScan", "Chain")
diff --git a/src/frontend/src/optimizer/plan_node/stream_temporal_join.rs b/src/frontend/src/optimizer/plan_node/stream_temporal_join.rs
index 2191ca322342d..675dbeb9ab381 100644
--- a/src/frontend/src/optimizer/plan_node/stream_temporal_join.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_temporal_join.rs
@@ -18,6 +18,8 @@ use risingwave_pb::plan_common::JoinType;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
 use risingwave_pb::stream_plan::TemporalJoinNode;
 
+use super::generic::GenericPlanRef;
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, watermark_pretty, Distill};
 use super::{generic, ExprRewritable, PlanBase, PlanRef, PlanTreeNodeBinary, StreamNode};
 use crate::expr::{Expr, ExprRewriter};
@@ -61,7 +63,7 @@ impl StreamTemporalJoin {
                 .rewrite_bitset(core.left.watermark_columns()),
         );
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             true,
@@ -88,7 +90,7 @@ impl StreamTemporalJoin {
 
 impl Distill for StreamTemporalJoin {
     fn distill<'a>(&self) -> XmlNode<'a> {
-        let verbose = self.base.ctx.is_explain_verbose();
+        let verbose = self.base.ctx().is_explain_verbose();
         let mut vec = Vec::with_capacity(if verbose { 3 } else { 2 });
         vec.push(("type", Pretty::debug(&self.core.join_type)));
 
@@ -101,7 +103,7 @@ impl Distill for StreamTemporalJoin {
             }),
         ));
 
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             vec.push(("output_watermarks", ow));
         }
 
diff --git a/src/frontend/src/optimizer/plan_node/stream_topn.rs b/src/frontend/src/optimizer/plan_node/stream_topn.rs
index e7a880fa7d757..87890625f6be7 100644
--- a/src/frontend/src/optimizer/plan_node/stream_topn.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_topn.rs
@@ -40,7 +40,7 @@ impl StreamTopN {
         };
         let watermark_columns = FixedBitSet::with_capacity(input.schema().len());
 
-        let base = PlanBase::new_stream_with_logical(&core, dist, false, false, watermark_columns);
+        let base = PlanBase::new_stream_with_core(&core, dist, false, false, watermark_columns);
         StreamTopN { base, core }
     }
 
diff --git a/src/frontend/src/optimizer/plan_node/stream_union.rs b/src/frontend/src/optimizer/plan_node/stream_union.rs
index 8f6353d6be44c..6d6dca2d8dd02 100644
--- a/src/frontend/src/optimizer/plan_node/stream_union.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_union.rs
@@ -19,6 +19,8 @@ use pretty_xmlish::XmlNode;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 use risingwave_pb::stream_plan::UnionNode;
 
+use super::generic::GenericPlanRef;
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, watermark_pretty, Distill};
 use super::{generic, ExprRewritable, PlanRef};
 use crate::optimizer::plan_node::generic::GenericPlanNode;
@@ -46,7 +48,7 @@ impl StreamUnion {
             |acc_watermark_columns, input| acc_watermark_columns.bitand(input.watermark_columns()),
         );
 
-        let base = PlanBase::new_stream_with_logical(
+        let base = PlanBase::new_stream_with_core(
             &core,
             dist,
             inputs.iter().all(|x| x.append_only()),
@@ -60,7 +62,7 @@ impl StreamUnion {
 impl Distill for StreamUnion {
     fn distill<'a>(&self) -> XmlNode<'a> {
         let mut vec = self.core.fields_pretty();
-        if let Some(ow) = watermark_pretty(&self.base.watermark_columns, self.schema()) {
+        if let Some(ow) = watermark_pretty(self.base.watermark_columns(), self.schema()) {
             vec.push(("output_watermarks", ow));
         }
         childless_record("StreamUnion", vec)
diff --git a/src/frontend/src/optimizer/plan_node/stream_values.rs b/src/frontend/src/optimizer/plan_node/stream_values.rs
index fb0b844411f63..f8cc5db851159 100644
--- a/src/frontend/src/optimizer/plan_node/stream_values.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_values.rs
@@ -18,6 +18,7 @@ use risingwave_pb::stream_plan::stream_node::NodeBody as ProstStreamNode;
 use risingwave_pb::stream_plan::values_node::ExprTuple;
 use risingwave_pb::stream_plan::ValuesNode;
 
+use super::generic::GenericPlanRef;
 use super::utils::{childless_record, Distill};
 use super::{ExprRewritable, LogicalValues, PlanBase, StreamNode};
 use crate::expr::{Expr, ExprImpl};
diff --git a/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs b/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs
index ed5a946603ee4..066bc9a234ca5 100644
--- a/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs
+++ b/src/frontend/src/optimizer/plan_node/stream_watermark_filter.rs
@@ -21,6 +21,7 @@ use risingwave_common::util::sort_util::OrderType;
 use risingwave_pb::catalog::WatermarkDesc;
 use risingwave_pb::stream_plan::stream_node::PbNodeBody;
 
+use super::stream::StreamPlanRef;
 use super::utils::{childless_record, watermark_pretty, Distill, TableCatalogBuilder};
 use super::{ExprRewritable, PlanBase, PlanRef, PlanTreeNodeUnary, StreamNode};
 use crate::expr::{ExprDisplay, ExprImpl};
@@ -85,7 +86,7 @@ impl Distill for StreamWatermarkFilter {
             })
             .collect();
         let display_output_watermarks =
-            watermark_pretty(&self.base.watermark_columns, input_schema).unwrap();
+            watermark_pretty(self.base.watermark_columns(), input_schema).unwrap();
         let fields = vec![
             ("watermark_descs", Pretty::Array(display_watermark_descs)),
             ("output_watermarks", display_output_watermarks),
diff --git a/src/frontend/src/optimizer/plan_rewriter/plan_cloner.rs b/src/frontend/src/optimizer/plan_rewriter/plan_cloner.rs
index f30f3d9fa4966..7e53b903ac962 100644
--- a/src/frontend/src/optimizer/plan_rewriter/plan_cloner.rs
+++ b/src/frontend/src/optimizer/plan_rewriter/plan_cloner.rs
@@ -16,6 +16,7 @@ use std::collections::HashMap;
 
 use itertools::Itertools;
 
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{LogicalShare, PlanNodeId, PlanTreeNode, StreamShare};
 use crate::optimizer::PlanRewriter;
 use crate::PlanRef;
diff --git a/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs b/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs
index 9ab0d4d580ddc..5b9efb9fc7c94 100644
--- a/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs
+++ b/src/frontend/src/optimizer/plan_rewriter/share_source_rewriter.rs
@@ -17,6 +17,7 @@ use std::collections::{HashMap, HashSet};
 use itertools::Itertools;
 
 use crate::catalog::SourceId;
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{
     LogicalShare, LogicalSource, PlanNodeId, PlanTreeNode, StreamShare,
 };
diff --git a/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs b/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs
index 7d538392f9361..7950b5d81a49c 100644
--- a/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs
+++ b/src/frontend/src/optimizer/plan_visitor/share_parent_counter.rs
@@ -15,6 +15,7 @@
 use std::collections::HashMap;
 
 use super::{DefaultBehavior, DefaultValue};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{LogicalShare, PlanNodeId, PlanTreeNodeUnary};
 use crate::optimizer::plan_visitor::PlanVisitor;
 
diff --git a/src/frontend/src/optimizer/property/distribution.rs b/src/frontend/src/optimizer/property/distribution.rs
index 4fcaf959eac87..2df1d7ae00bc3 100644
--- a/src/frontend/src/optimizer/property/distribution.rs
+++ b/src/frontend/src/optimizer/property/distribution.rs
@@ -295,10 +295,12 @@ impl RequiredDist {
 
     pub fn enforce_if_not_satisfies(
         &self,
-        plan: PlanRef,
+        mut plan: PlanRef,
         required_order: &Order,
     ) -> Result<PlanRef> {
-        let plan = required_order.enforce_if_not_satisfies(plan)?;
+        if let Convention::Batch = plan.convention() {
+            plan = required_order.enforce_if_not_satisfies(plan)?;
+        }
         if !plan.distribution().satisfies(self) {
             Ok(self.enforce(plan, required_order))
         } else {
@@ -329,7 +331,7 @@ impl RequiredDist {
         }
     }
 
-    fn enforce(&self, plan: PlanRef, required_order: &Order) -> PlanRef {
+    pub fn enforce(&self, plan: PlanRef, required_order: &Order) -> PlanRef {
         let dist = self.to_dist();
         match plan.convention() {
             Convention::Batch => BatchExchange::new(plan, required_order.clone(), dist).into(),
diff --git a/src/frontend/src/optimizer/property/order.rs b/src/frontend/src/optimizer/property/order.rs
index a70bffb13a8ba..19ad7586e1c11 100644
--- a/src/frontend/src/optimizer/property/order.rs
+++ b/src/frontend/src/optimizer/property/order.rs
@@ -92,7 +92,7 @@ impl Order {
         }
     }
 
-    pub fn enforce(&self, plan: PlanRef) -> PlanRef {
+    fn enforce(&self, plan: PlanRef) -> PlanRef {
         assert_eq!(plan.convention(), Convention::Batch);
         BatchSort::new(plan, self.clone()).into()
     }
diff --git a/src/frontend/src/optimizer/rule/agg_group_by_simplify_rule.rs b/src/frontend/src/optimizer/rule/agg_group_by_simplify_rule.rs
index 34025eca43032..3e22348e27b49 100644
--- a/src/frontend/src/optimizer/rule/agg_group_by_simplify_rule.rs
+++ b/src/frontend/src/optimizer/rule/agg_group_by_simplify_rule.rs
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use risingwave_common::util::sort_util::{ColumnOrder, OrderType};
 use risingwave_expr::aggregate::AggKind;
 
 use super::super::plan_node::*;
@@ -48,11 +47,11 @@ impl Rule for AggGroupBySimplifyRule {
                 if !new_group_key.contains(i) {
                     let data_type = agg_input.schema().fields[i].data_type();
                     new_agg_calls.push(PlanAggCall {
-                        agg_kind: AggKind::FirstValue,
+                        agg_kind: AggKind::InternalLastSeenValue,
                         return_type: data_type.clone(),
                         inputs: vec![InputRef::new(i, data_type)],
                         distinct: false,
-                        order_by: vec![ColumnOrder::new(i, OrderType::ascending())],
+                        order_by: vec![],
                         filter: Condition::true_cond(),
                         direct_args: vec![],
                     });
diff --git a/src/frontend/src/optimizer/rule/always_false_filter_rule.rs b/src/frontend/src/optimizer/rule/always_false_filter_rule.rs
index 02165232372e4..eeba7d9f3be3b 100644
--- a/src/frontend/src/optimizer/rule/always_false_filter_rule.rs
+++ b/src/frontend/src/optimizer/rule/always_false_filter_rule.rs
@@ -15,6 +15,7 @@
 use risingwave_common::types::ScalarImpl;
 
 use super::Rule;
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{LogicalFilter, LogicalValues};
 use crate::PlanRef;
 
diff --git a/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs b/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs
index 66579248a76f9..7ac121692c81d 100644
--- a/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs
+++ b/src/frontend/src/optimizer/rule/apply_join_transpose_rule.rs
@@ -23,6 +23,7 @@ use crate::expr::{
     CorrelatedId, CorrelatedInputRef, Expr, ExprImpl, ExprRewriter, ExprType, FunctionCall,
     InputRef,
 };
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{LogicalApply, LogicalFilter, LogicalJoin, PlanTreeNodeBinary};
 use crate::optimizer::plan_visitor::{ExprCorrelatedIdFinder, PlanCorrelatedIdFinder};
 use crate::optimizer::rule::apply_offset_rewriter::ApplyCorrelatedIndicesConverter;
diff --git a/src/frontend/src/optimizer/rule/expand_to_project_rule.rs b/src/frontend/src/optimizer/rule/expand_to_project_rule.rs
index 1ed1da0037aba..01a39042efd98 100644
--- a/src/frontend/src/optimizer/rule/expand_to_project_rule.rs
+++ b/src/frontend/src/optimizer/rule/expand_to_project_rule.rs
@@ -36,7 +36,7 @@ impl Rule for ExpandToProjectRule {
         let column_subset = column_subsets.get(0).unwrap();
 
         // if `column_subsets` len equals 1, convert it into a project
-        let mut exprs = Vec::with_capacity(expand.base.schema.len());
+        let mut exprs = Vec::with_capacity(expand.base.schema().len());
         // Add original input column first
         for i in 0..input.schema().len() {
             exprs.push(ExprImpl::InputRef(
diff --git a/src/frontend/src/optimizer/rule/index_selection_rule.rs b/src/frontend/src/optimizer/rule/index_selection_rule.rs
index 9103d1bc906bc..323cc59ef3558 100644
--- a/src/frontend/src/optimizer/rule/index_selection_rule.rs
+++ b/src/frontend/src/optimizer/rule/index_selection_rule.rs
@@ -66,6 +66,7 @@ use crate::expr::{
     FunctionCall, InputRef,
 };
 use crate::optimizer::optimizer_context::OptimizerContextRef;
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{
     generic, ColumnPruningContext, LogicalJoin, LogicalScan, LogicalUnion, PlanTreeNode,
     PlanTreeNodeBinary, PredicatePushdown, PredicatePushdownContext,
diff --git a/src/frontend/src/optimizer/rule/left_deep_tree_join_ordering_rule.rs b/src/frontend/src/optimizer/rule/left_deep_tree_join_ordering_rule.rs
index dcbb6f7b015ee..bd2db0ac67cca 100644
--- a/src/frontend/src/optimizer/rule/left_deep_tree_join_ordering_rule.rs
+++ b/src/frontend/src/optimizer/rule/left_deep_tree_join_ordering_rule.rs
@@ -47,6 +47,7 @@ mod tests {
     use super::*;
     use crate::expr::{ExprImpl, FunctionCall, InputRef};
     use crate::optimizer::optimizer_context::OptimizerContext;
+    use crate::optimizer::plan_node::generic::GenericPlanRef;
     use crate::utils::Condition;
 
     #[tokio::test]
diff --git a/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs b/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs
index c496a906400ae..8682db8491a1d 100644
--- a/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs
+++ b/src/frontend/src/optimizer/rule/merge_multijoin_rule.rs
@@ -46,6 +46,7 @@ mod tests {
     use super::*;
     use crate::expr::{ExprImpl, FunctionCall, InputRef};
     use crate::optimizer::optimizer_context::OptimizerContext;
+    use crate::optimizer::plan_node::generic::GenericPlanRef;
     use crate::utils::Condition;
 
     #[tokio::test]
diff --git a/src/frontend/src/optimizer/rule/min_max_on_index_rule.rs b/src/frontend/src/optimizer/rule/min_max_on_index_rule.rs
index ea8386bc227f8..c32ae40531cd0 100644
--- a/src/frontend/src/optimizer/rule/min_max_on_index_rule.rs
+++ b/src/frontend/src/optimizer/rule/min_max_on_index_rule.rs
@@ -27,7 +27,7 @@ use risingwave_expr::aggregate::AggKind;
 
 use super::{BoxedRule, Rule};
 use crate::expr::{ExprImpl, ExprType, FunctionCall, InputRef};
-use crate::optimizer::plan_node::generic::Agg;
+use crate::optimizer::plan_node::generic::{Agg, GenericPlanRef};
 use crate::optimizer::plan_node::{
     LogicalAgg, LogicalFilter, LogicalScan, LogicalTopN, PlanAggCall, PlanTreeNodeUnary,
 };
diff --git a/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs b/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs
index dfb6963c7fb4f..93637d3ba8193 100644
--- a/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs
+++ b/src/frontend/src/optimizer/rule/over_window_to_topn_rule.rs
@@ -18,6 +18,7 @@ use risingwave_expr::window_function::WindowFuncKind;
 
 use super::Rule;
 use crate::expr::{collect_input_refs, ExprImpl, ExprType};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{LogicalFilter, LogicalTopN, PlanTreeNodeUnary};
 use crate::optimizer::property::Order;
 use crate::planner::LIMIT_ALL_COUNT;
diff --git a/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs b/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs
index dc5f9c2bc9aba..f34146ba80050 100644
--- a/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs
+++ b/src/frontend/src/optimizer/rule/pull_up_correlated_predicate_rule.rs
@@ -18,6 +18,7 @@ use risingwave_common::util::column_index_mapping::ColIndexMapping;
 use super::super::plan_node::*;
 use super::{BoxedRule, Rule};
 use crate::expr::{CorrelatedId, CorrelatedInputRef, Expr, ExprImpl, ExprRewriter, InputRef};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_visitor::{PlanCorrelatedIdFinder, PlanVisitor};
 use crate::optimizer::PlanRef;
 use crate::utils::Condition;
diff --git a/src/frontend/src/optimizer/rule/table_function_to_project_set_rule.rs b/src/frontend/src/optimizer/rule/table_function_to_project_set_rule.rs
index 5a6f1187fdd02..f85ffc2318459 100644
--- a/src/frontend/src/optimizer/rule/table_function_to_project_set_rule.rs
+++ b/src/frontend/src/optimizer/rule/table_function_to_project_set_rule.rs
@@ -18,6 +18,7 @@ use risingwave_common::types::DataType;
 
 use super::{BoxedRule, Rule};
 use crate::expr::{Expr, ExprImpl, ExprType, FunctionCall, InputRef};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{
     LogicalProject, LogicalProjectSet, LogicalTableFunction, LogicalValues, PlanTreeNodeUnary,
 };
@@ -51,7 +52,7 @@ impl Rule for TableFunctionToProjectSetRule {
         let logical_values = LogicalValues::create(
             vec![vec![]],
             Schema::new(vec![]),
-            logical_table_function.base.ctx.clone(),
+            logical_table_function.base.ctx().clone(),
         );
         let logical_project_set = LogicalProjectSet::create(logical_values, vec![table_function]);
         // We need a project to align schema type because
diff --git a/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs b/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs
index 9759739490fe6..a13bef3baa9d9 100644
--- a/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs
+++ b/src/frontend/src/optimizer/rule/trivial_project_to_values_rule.rs
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 use super::{BoxedRule, Rule};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::{LogicalValues, PlanTreeNodeUnary};
 use crate::optimizer::plan_visitor::{LogicalCardinalityExt, SideEffectVisitor};
 use crate::optimizer::{PlanRef, PlanVisitor};
diff --git a/src/frontend/src/optimizer/rule/union_input_values_merge_rule.rs b/src/frontend/src/optimizer/rule/union_input_values_merge_rule.rs
index 8119b8847b600..7b83c017ab781 100644
--- a/src/frontend/src/optimizer/rule/union_input_values_merge_rule.rs
+++ b/src/frontend/src/optimizer/rule/union_input_values_merge_rule.rs
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 use super::{BoxedRule, Rule};
+use crate::optimizer::plan_node::generic::GenericPlanRef;
 use crate::optimizer::plan_node::LogicalValues;
 use crate::optimizer::{PlanRef, PlanTreeNode};
 
diff --git a/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs b/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs
index 2a12f6b712e0d..f1d203fba1350 100644
--- a/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs
+++ b/src/frontend/src/optimizer/rule/union_to_distinct_rule.rs
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 use super::{BoxedRule, Rule};
-use crate::optimizer::plan_node::generic::Agg;
+use crate::optimizer::plan_node::generic::{Agg, GenericPlanRef};
 use crate::optimizer::plan_node::{LogicalUnion, PlanTreeNode};
 use crate::optimizer::PlanRef;
 
@@ -24,7 +24,7 @@ impl Rule for UnionToDistinctRule {
         let union: &LogicalUnion = plan.as_logical_union()?;
         if !union.all() {
             let union_all = LogicalUnion::create(true, union.inputs().into_iter().collect());
-            let distinct = Agg::new(vec![], (0..union.base.schema.len()).collect(), union_all)
+            let distinct = Agg::new(vec![], (0..union.base.schema().len()).collect(), union_all)
                 .with_enable_two_phase(false);
             Some(distinct.into())
         } else {
diff --git a/src/frontend/src/scheduler/plan_fragmenter.rs b/src/frontend/src/scheduler/plan_fragmenter.rs
index 4e16bc6cd0b21..cb20103b3e76f 100644
--- a/src/frontend/src/scheduler/plan_fragmenter.rs
+++ b/src/frontend/src/scheduler/plan_fragmenter.rs
@@ -103,7 +103,7 @@ impl Serialize for ExecutionPlanNode {
 impl From<PlanRef> for ExecutionPlanNode {
     fn from(plan_node: PlanRef) -> Self {
         Self {
-            plan_node_id: plan_node.plan_base().id,
+            plan_node_id: plan_node.plan_base().id(),
             plan_node_type: plan_node.node_type(),
             node: plan_node.to_batch_prost_body(),
             children: vec![],
diff --git a/src/frontend/src/test_utils.rs b/src/frontend/src/test_utils.rs
index 20eb252fc5053..cf915ae35713d 100644
--- a/src/frontend/src/test_utils.rs
+++ b/src/frontend/src/test_utils.rs
@@ -773,6 +773,10 @@ impl FrontendMetaClient for MockFrontendMetaClient {
         })
     }
 
+    async fn wait(&self) -> RpcResult<()> {
+        Ok(())
+    }
+
     async fn cancel_creating_jobs(&self, _infos: PbJobs) -> RpcResult<Vec<u32>> {
         Ok(vec![])
     }
diff --git a/src/jni_core/Cargo.toml b/src/jni_core/Cargo.toml
index 69c11a7f21e24..77cafd155000d 100644
--- a/src/jni_core/Cargo.toml
+++ b/src/jni_core/Cargo.toml
@@ -10,6 +10,7 @@ ignored = ["workspace-hack"]
 normal = ["workspace-hack"]
 
 [dependencies]
+anyhow = "1"
 bytes = "1"
 cfg-or-panic = "0.2"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
diff --git a/src/jni_core/src/lib.rs b/src/jni_core/src/lib.rs
index 29bbf76929b45..4815cd7368370 100644
--- a/src/jni_core/src/lib.rs
+++ b/src/jni_core/src/lib.rs
@@ -902,14 +902,17 @@ pub extern "system" fn Java_com_risingwave_java_binding_Binding_sendSinkWriterRe
     'a,
 >(
     env: EnvParam<'a>,
-    channel: Pointer<'a, Sender<SinkWriterStreamResponse>>,
+    channel: Pointer<'a, Sender<anyhow::Result<SinkWriterStreamResponse>>>,
     msg: JByteArray<'a>,
 ) -> jboolean {
     execute_and_catch(env, move |env| {
         let sink_writer_stream_response: SinkWriterStreamResponse =
             Message::decode(to_guarded_slice(&msg, env)?.deref())?;
 
-        match channel.as_ref().blocking_send(sink_writer_stream_response) {
+        match channel
+            .as_ref()
+            .blocking_send(Ok(sink_writer_stream_response))
+        {
             Ok(_) => Ok(JNI_TRUE),
             Err(e) => {
                 tracing::info!("send error.  {:?}", e);
diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml
index 67e9a95026cc7..f37c909546594 100644
--- a/src/meta/Cargo.toml
+++ b/src/meta/Cargo.toml
@@ -36,7 +36,6 @@ hyper = "0.14"
 itertools = "0.11"
 memcomparable = { version = "0.2" }
 mime_guess = "2"
-model_migration = { path = "src/model_v2/migration" }
 num-integer = "0.1"
 num-traits = "0.2"
 parking_lot = { version = "0.12", features = ["arc_lock"] }
@@ -50,6 +49,8 @@ risingwave_common = { workspace = true }
 risingwave_common_heap_profiling = { workspace = true }
 risingwave_connector = { workspace = true }
 risingwave_hummock_sdk = { workspace = true }
+risingwave_meta_model_migration = { workspace = true }
+risingwave_meta_model_v2 = { workspace = true }
 risingwave_object_store = { workspace = true }
 risingwave_pb = { workspace = true }
 risingwave_rpc_client = { workspace = true }
@@ -64,13 +65,6 @@ sea-orm = { version = "0.12.0", features = [
 ] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
-sqlx = { version = "0.7", features = [
-    "runtime-tokio",
-    "postgres",
-    "mysql",
-    "sqlite",
-    "chrono",
-] }
 sync-point = { path = "../utils/sync-point" }
 thiserror = "1"
 tokio = { version = "0.2", package = "madsim-tokio", features = [
diff --git a/src/meta/model_v2/Cargo.toml b/src/meta/model_v2/Cargo.toml
new file mode 100644
index 0000000000000..1d9992da8a832
--- /dev/null
+++ b/src/meta/model_v2/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "risingwave_meta_model_v2"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+keywords = { workspace = true }
+license = { workspace = true }
+repository = { workspace = true }
+
+[package.metadata.cargo-machete]
+ignored = ["workspace-hack"]
+
+[package.metadata.cargo-udeps.ignore]
+normal = ["workspace-hack"]
+
+[dependencies]
+risingwave_pb = { workspace = true }
+sea-orm = { version = "0.12.0", features = [
+    "sqlx-mysql",
+    "sqlx-postgres",
+    "sqlx-sqlite",
+    "runtime-tokio-native-tls",
+    "macros",
+] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
diff --git a/src/meta/model_v2/migration/Cargo.toml b/src/meta/model_v2/migration/Cargo.toml
new file mode 100644
index 0000000000000..4745125140a22
--- /dev/null
+++ b/src/meta/model_v2/migration/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "risingwave_meta_model_migration"
+version = { workspace = true }
+edition = { workspace = true }
+homepage = { workspace = true }
+keywords = { workspace = true }
+license = { workspace = true }
+repository = { workspace = true }
+
+[package.metadata.cargo-machete]
+ignored = ["workspace-hack"]
+
+[package.metadata.cargo-udeps.ignore]
+normal = ["workspace-hack"]
+
+[dependencies]
+async-std = { version = "1", features = ["attributes", "tokio1"] }
+uuid = { version = "1", features = ["v4"] }
+
+[dependencies.sea-orm-migration]
+version = "0.12.0"
+features = ["sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", "runtime-tokio-native-tls", "with-uuid"]
diff --git a/src/meta/src/model_v2/migration/README.md b/src/meta/model_v2/migration/README.md
similarity index 100%
rename from src/meta/src/model_v2/migration/README.md
rename to src/meta/model_v2/migration/README.md
diff --git a/src/meta/src/model_v2/migration/src/lib.rs b/src/meta/model_v2/migration/src/lib.rs
similarity index 100%
rename from src/meta/src/model_v2/migration/src/lib.rs
rename to src/meta/model_v2/migration/src/lib.rs
diff --git a/src/meta/src/model_v2/migration/src/m20230908_072257_init.rs b/src/meta/model_v2/migration/src/m20230908_072257_init.rs
similarity index 96%
rename from src/meta/src/model_v2/migration/src/m20230908_072257_init.rs
rename to src/meta/model_v2/migration/src/m20230908_072257_init.rs
index 43a8e5d24d22f..c9559bd6feda2 100644
--- a/src/meta/src/model_v2/migration/src/m20230908_072257_init.rs
+++ b/src/meta/model_v2/migration/src/m20230908_072257_init.rs
@@ -404,15 +404,16 @@ impl MigrationTrait for Migration {
                     .table(Source::Table)
                     .col(ColumnDef::new(Source::SourceId).integer().primary_key())
                     .col(ColumnDef::new(Source::Name).string().not_null())
-                    .col(ColumnDef::new(Source::RowIdIndex).string())
-                    .col(ColumnDef::new(Source::Columns).json())
-                    .col(ColumnDef::new(Source::PkColumnIds).json())
-                    .col(ColumnDef::new(Source::Properties).json())
-                    .col(ColumnDef::new(Source::Definition).string())
+                    .col(ColumnDef::new(Source::RowIdIndex).integer())
+                    .col(ColumnDef::new(Source::Columns).json().not_null())
+                    .col(ColumnDef::new(Source::PkColumnIds).json().not_null())
+                    .col(ColumnDef::new(Source::Properties).json().not_null())
+                    .col(ColumnDef::new(Source::Definition).string().not_null())
                     .col(ColumnDef::new(Source::SourceInfo).json())
-                    .col(ColumnDef::new(Source::WatermarkDescs).json())
+                    .col(ColumnDef::new(Source::WatermarkDescs).json().not_null())
                     .col(ColumnDef::new(Source::OptionalAssociatedTableId).integer())
                     .col(ColumnDef::new(Source::ConnectionId).integer())
+                    .col(ColumnDef::new(Source::Version).big_integer().not_null())
                     .foreign_key(
                         &mut ForeignKey::create()
                             .name("FK_source_object_id")
@@ -442,15 +443,17 @@ impl MigrationTrait for Migration {
                     .col(ColumnDef::new(Table::Columns).json().not_null())
                     .col(ColumnDef::new(Table::Pk).json().not_null())
                     .col(ColumnDef::new(Table::DistributionKey).json().not_null())
+                    .col(ColumnDef::new(Table::StreamKey).json().not_null())
                     .col(ColumnDef::new(Table::AppendOnly).boolean().not_null())
                     .col(ColumnDef::new(Table::Properties).json().not_null())
                     .col(ColumnDef::new(Table::FragmentId).integer().not_null())
                     .col(ColumnDef::new(Table::VnodeColIndex).integer())
+                    .col(ColumnDef::new(Table::RowIdIndex).integer())
                     .col(ColumnDef::new(Table::ValueIndices).json().not_null())
                     .col(ColumnDef::new(Table::Definition).string().not_null())
                     .col(
                         ColumnDef::new(Table::HandlePkConflictBehavior)
-                            .integer()
+                            .string()
                             .not_null(),
                     )
                     .col(
@@ -467,6 +470,8 @@ impl MigrationTrait for Migration {
                             .boolean()
                             .not_null(),
                     )
+                    .col(ColumnDef::new(Table::JobStatus).string().not_null())
+                    .col(ColumnDef::new(Table::CreateType).string().not_null())
                     .col(ColumnDef::new(Table::Version).json().not_null())
                     .foreign_key(
                         &mut ForeignKey::create()
@@ -506,16 +511,18 @@ impl MigrationTrait for Migration {
                     .table(Sink::Table)
                     .col(ColumnDef::new(Sink::SinkId).integer().primary_key())
                     .col(ColumnDef::new(Sink::Name).string().not_null())
-                    .col(ColumnDef::new(Sink::Columns).json())
-                    .col(ColumnDef::new(Sink::PkColumnIds).json())
-                    .col(ColumnDef::new(Sink::DistributionKey).json())
-                    .col(ColumnDef::new(Sink::DownstreamPk).json())
+                    .col(ColumnDef::new(Sink::Columns).json().not_null())
+                    .col(ColumnDef::new(Sink::PlanPk).json().not_null())
+                    .col(ColumnDef::new(Sink::DistributionKey).json().not_null())
+                    .col(ColumnDef::new(Sink::DownstreamPk).json().not_null())
                     .col(ColumnDef::new(Sink::SinkType).string().not_null())
-                    .col(ColumnDef::new(Sink::Properties).json())
+                    .col(ColumnDef::new(Sink::Properties).json().not_null())
                     .col(ColumnDef::new(Sink::Definition).string().not_null())
                     .col(ColumnDef::new(Sink::ConnectionId).integer())
                     .col(ColumnDef::new(Sink::DbName).string().not_null())
                     .col(ColumnDef::new(Sink::SinkFromName).string().not_null())
+                    .col(ColumnDef::new(Sink::SinkFormatDesc).json())
+                    .col(ColumnDef::new(Sink::JobStatus).string().not_null())
                     .foreign_key(
                         &mut ForeignKey::create()
                             .name("FK_sink_object_id")
@@ -541,7 +548,7 @@ impl MigrationTrait for Migration {
                     .col(ColumnDef::new(View::ViewId).integer().primary_key())
                     .col(ColumnDef::new(View::Name).string().not_null())
                     .col(ColumnDef::new(View::Properties).json().not_null())
-                    .col(ColumnDef::new(View::Sql).string().not_null())
+                    .col(ColumnDef::new(View::Definition).string().not_null())
                     .col(ColumnDef::new(View::Columns).json().not_null())
                     .foreign_key(
                         &mut ForeignKey::create()
@@ -562,8 +569,9 @@ impl MigrationTrait for Migration {
                     .col(ColumnDef::new(Index::Name).string().not_null())
                     .col(ColumnDef::new(Index::IndexTableId).integer().not_null())
                     .col(ColumnDef::new(Index::PrimaryTableId).integer().not_null())
-                    .col(ColumnDef::new(Index::IndexItems).json())
-                    .col(ColumnDef::new(Index::OriginalColumns).json())
+                    .col(ColumnDef::new(Index::IndexItems).json().not_null())
+                    .col(ColumnDef::new(Index::OriginalColumns).json().not_null())
+                    .col(ColumnDef::new(Index::JobStatus).string().not_null())
                     .foreign_key(
                         &mut ForeignKey::create()
                             .name("FK_index_object_id")
@@ -862,10 +870,12 @@ enum Table {
     Columns,
     Pk,
     DistributionKey,
+    StreamKey,
     AppendOnly,
     Properties,
     FragmentId,
     VnodeColIndex,
+    RowIdIndex,
     ValueIndices,
     Definition,
     HandlePkConflictBehavior,
@@ -875,6 +885,8 @@ enum Table {
     DmlFragmentId,
     Cardinality,
     CleanedByWatermark,
+    JobStatus,
+    CreateType,
     Version,
 }
 
@@ -892,6 +904,7 @@ enum Source {
     WatermarkDescs,
     OptionalAssociatedTableId,
     ConnectionId,
+    Version,
 }
 
 #[derive(DeriveIden)]
@@ -900,7 +913,7 @@ enum Sink {
     SinkId,
     Name,
     Columns,
-    PkColumnIds,
+    PlanPk,
     DistributionKey,
     DownstreamPk,
     SinkType,
@@ -909,6 +922,8 @@ enum Sink {
     ConnectionId,
     DbName,
     SinkFromName,
+    SinkFormatDesc,
+    JobStatus,
 }
 
 #[derive(DeriveIden)]
@@ -925,7 +940,7 @@ enum View {
     ViewId,
     Name,
     Properties,
-    Sql,
+    Definition,
     Columns,
 }
 
@@ -938,6 +953,7 @@ enum Index {
     PrimaryTableId,
     IndexItems,
     OriginalColumns,
+    JobStatus,
 }
 
 #[derive(DeriveIden)]
diff --git a/src/meta/src/model_v2/migration/src/m20231008_020431_hummock.rs b/src/meta/model_v2/migration/src/m20231008_020431_hummock.rs
similarity index 100%
rename from src/meta/src/model_v2/migration/src/m20231008_020431_hummock.rs
rename to src/meta/model_v2/migration/src/m20231008_020431_hummock.rs
diff --git a/src/meta/src/model_v2/migration/src/main.rs b/src/meta/model_v2/migration/src/main.rs
similarity index 52%
rename from src/meta/src/model_v2/migration/src/main.rs
rename to src/meta/model_v2/migration/src/main.rs
index 9354e45ecd198..9be884a68a11d 100644
--- a/src/meta/src/model_v2/migration/src/main.rs
+++ b/src/meta/model_v2/migration/src/main.rs
@@ -2,5 +2,5 @@ use sea_orm_migration::prelude::*;
 
 #[async_std::main]
 async fn main() {
-    cli::run_cli(model_migration::Migrator).await;
+    cli::run_cli(risingwave_meta_model_migration::Migrator).await;
 }
diff --git a/src/meta/src/model_v2/README.md b/src/meta/model_v2/src/README.md
similarity index 93%
rename from src/meta/src/model_v2/README.md
rename to src/meta/model_v2/src/README.md
index 25c22a4f566e1..48095d3e6d67f 100644
--- a/src/meta/src/model_v2/README.md
+++ b/src/meta/model_v2/src/README.md
@@ -1,6 +1,6 @@
 # How to define changes between versions and generate migration and model files
 
-- Generate a new migration file and apply it to the database, check [migration](./migration/README.md) for more details. Let's take a local PG database as an example(`postgres://postgres:@localhost:5432/postgres`):
+- Generate a new migration file and apply it to the database, check [migration](../migration/README.md) for more details. Let's take a local PG database as an example(`postgres://postgres:@localhost:5432/postgres`):
     ```sh
     export DATABASE_URL=postgres://postgres:@localhost:5432/postgres;
     cargo run -- generate MIGRATION_NAME
diff --git a/src/meta/src/model_v2/actor.rs b/src/meta/model_v2/src/actor.rs
similarity index 97%
rename from src/meta/src/model_v2/actor.rs
rename to src/meta/model_v2/src/actor.rs
index 8fecb3046b1bc..79a70e3f65e95 100644
--- a/src/meta/src/model_v2/actor.rs
+++ b/src/meta/model_v2/src/actor.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::I32Array;
+use crate::I32Array;
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "actor")]
diff --git a/src/meta/src/model_v2/cluster.rs b/src/meta/model_v2/src/cluster.rs
similarity index 100%
rename from src/meta/src/model_v2/cluster.rs
rename to src/meta/model_v2/src/cluster.rs
diff --git a/src/meta/src/model_v2/compaction_config.rs b/src/meta/model_v2/src/compaction_config.rs
similarity index 100%
rename from src/meta/src/model_v2/compaction_config.rs
rename to src/meta/model_v2/src/compaction_config.rs
diff --git a/src/meta/src/model_v2/compaction_status.rs b/src/meta/model_v2/src/compaction_status.rs
similarity index 100%
rename from src/meta/src/model_v2/compaction_status.rs
rename to src/meta/model_v2/src/compaction_status.rs
diff --git a/src/meta/src/model_v2/compaction_task.rs b/src/meta/model_v2/src/compaction_task.rs
similarity index 100%
rename from src/meta/src/model_v2/compaction_task.rs
rename to src/meta/model_v2/src/compaction_task.rs
diff --git a/src/meta/src/model_v2/connection.rs b/src/meta/model_v2/src/connection.rs
similarity index 85%
rename from src/meta/src/model_v2/connection.rs
rename to src/meta/model_v2/src/connection.rs
index f6638ed0b53a4..8cff6b2a6025b 100644
--- a/src/meta/src/model_v2/connection.rs
+++ b/src/meta/model_v2/src/connection.rs
@@ -12,13 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use risingwave_pb::catalog::connection::{PbInfo, PbPrivateLinkService};
+use risingwave_pb::catalog::connection::PbInfo;
 use risingwave_pb::catalog::PbConnection;
 use sea_orm::entity::prelude::*;
-use sea_orm::{ActiveValue, FromJsonQueryResult};
-use serde::{Deserialize, Serialize};
+use sea_orm::ActiveValue;
 
-use crate::model_v2::ConnectionId;
+use crate::{ConnectionId, PrivateLinkService};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "connection")]
@@ -65,11 +64,6 @@ impl Related<super::source::Entity> for Entity {
 
 impl ActiveModelBehavior for ActiveModel {}
 
-#[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Serialize, Deserialize, Default)]
-pub struct PrivateLinkService(pub PbPrivateLinkService);
-
-impl Eq for PrivateLinkService {}
-
 impl From<PbConnection> for ActiveModel {
     fn from(conn: PbConnection) -> Self {
         let Some(PbInfo::PrivateLinkService(private_link_srv)) = conn.info else {
diff --git a/src/meta/src/model_v2/database.rs b/src/meta/model_v2/src/database.rs
similarity index 81%
rename from src/meta/src/model_v2/database.rs
rename to src/meta/model_v2/src/database.rs
index 909c12eceac5a..95ff3a8aee8e6 100644
--- a/src/meta/src/model_v2/database.rs
+++ b/src/meta/model_v2/src/database.rs
@@ -12,9 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use risingwave_pb::catalog::PbDatabase;
 use sea_orm::entity::prelude::*;
+use sea_orm::ActiveValue;
 
-use crate::model_v2::DatabaseId;
+use crate::DatabaseId;
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "database")]
@@ -44,3 +46,12 @@ impl Related<super::object::Entity> for Entity {
 }
 
 impl ActiveModelBehavior for ActiveModel {}
+
+impl From<PbDatabase> for ActiveModel {
+    fn from(db: PbDatabase) -> Self {
+        Self {
+            database_id: ActiveValue::Set(db.id),
+            name: ActiveValue::Set(db.name),
+        }
+    }
+}
diff --git a/src/meta/src/model_v2/fragment.rs b/src/meta/model_v2/src/fragment.rs
similarity index 98%
rename from src/meta/src/model_v2/fragment.rs
rename to src/meta/model_v2/src/fragment.rs
index 9263dd99eabb8..c590a58da771e 100644
--- a/src/meta/src/model_v2/fragment.rs
+++ b/src/meta/model_v2/src/fragment.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::I32Array;
+use crate::I32Array;
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "fragment")]
diff --git a/src/meta/src/model_v2/function.rs b/src/meta/model_v2/src/function.rs
similarity index 97%
rename from src/meta/src/model_v2/function.rs
rename to src/meta/model_v2/src/function.rs
index 663f8e2284fd7..4126dddc0f5ee 100644
--- a/src/meta/src/model_v2/function.rs
+++ b/src/meta/model_v2/src/function.rs
@@ -17,7 +17,7 @@ use risingwave_pb::catalog::PbFunction;
 use sea_orm::entity::prelude::*;
 use sea_orm::ActiveValue;
 
-use crate::model_v2::{DataType, DataTypeArray, FunctionId};
+use crate::{DataType, DataTypeArray, FunctionId};
 
 #[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
 #[sea_orm(rs_type = "String", db_type = "String(None)")]
diff --git a/src/meta/src/model_v2/hummock_pinned_snapshot.rs b/src/meta/model_v2/src/hummock_pinned_snapshot.rs
similarity index 100%
rename from src/meta/src/model_v2/hummock_pinned_snapshot.rs
rename to src/meta/model_v2/src/hummock_pinned_snapshot.rs
diff --git a/src/meta/src/model_v2/hummock_pinned_version.rs b/src/meta/model_v2/src/hummock_pinned_version.rs
similarity index 100%
rename from src/meta/src/model_v2/hummock_pinned_version.rs
rename to src/meta/model_v2/src/hummock_pinned_version.rs
diff --git a/src/meta/src/model_v2/hummock_version_delta.rs b/src/meta/model_v2/src/hummock_version_delta.rs
similarity index 100%
rename from src/meta/src/model_v2/hummock_version_delta.rs
rename to src/meta/model_v2/src/hummock_version_delta.rs
diff --git a/src/meta/src/model_v2/hummock_version_stats.rs b/src/meta/model_v2/src/hummock_version_stats.rs
similarity index 100%
rename from src/meta/src/model_v2/hummock_version_stats.rs
rename to src/meta/model_v2/src/hummock_version_stats.rs
diff --git a/src/meta/src/model_v2/index.rs b/src/meta/model_v2/src/index.rs
similarity index 91%
rename from src/meta/src/model_v2/index.rs
rename to src/meta/model_v2/src/index.rs
index 6a4b7d1b349ca..c85a896914240 100644
--- a/src/meta/src/model_v2/index.rs
+++ b/src/meta/model_v2/src/index.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{I32Array, IndexId, TableId};
+use crate::{ExprNodeArray, I32Array, IndexId, JobStatus, TableId};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "index")]
@@ -24,8 +24,9 @@ pub struct Model {
     pub name: String,
     pub index_table_id: TableId,
     pub primary_table_id: TableId,
-    pub index_items: Option<Json>,
-    pub original_columns: Option<I32Array>,
+    pub index_items: ExprNodeArray,
+    pub original_columns: I32Array,
+    pub job_status: JobStatus,
 }
 
 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
diff --git a/src/meta/model_v2/src/lib.rs b/src/meta/model_v2/src/lib.rs
new file mode 100644
index 0000000000000..5fe23bcaa280c
--- /dev/null
+++ b/src/meta/model_v2/src/lib.rs
@@ -0,0 +1,134 @@
+// Copyright 2023 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use risingwave_pb::catalog::{PbCreateType, PbStreamJobStatus};
+use sea_orm::{DeriveActiveEnum, EnumIter, FromJsonQueryResult};
+use serde::{Deserialize, Serialize};
+
+pub mod prelude;
+
+pub mod actor;
+pub mod cluster;
+pub mod compaction_config;
+pub mod compaction_status;
+pub mod compaction_task;
+pub mod connection;
+pub mod database;
+pub mod fragment;
+pub mod function;
+pub mod hummock_pinned_snapshot;
+pub mod hummock_pinned_version;
+pub mod hummock_version_delta;
+pub mod hummock_version_stats;
+pub mod index;
+pub mod object;
+pub mod object_dependency;
+pub mod schema;
+pub mod sink;
+pub mod source;
+pub mod system_parameter;
+pub mod table;
+pub mod user;
+pub mod user_privilege;
+pub mod view;
+pub mod worker;
+pub mod worker_property;
+
+pub type WorkerId = u32;
+pub type TransactionId = u32;
+
+pub type ObjectId = u32;
+pub type DatabaseId = ObjectId;
+pub type SchemaId = ObjectId;
+pub type TableId = ObjectId;
+pub type SourceId = ObjectId;
+pub type SinkId = ObjectId;
+pub type IndexId = ObjectId;
+pub type ViewId = ObjectId;
+pub type FunctionId = ObjectId;
+pub type ConnectionId = ObjectId;
+pub type UserId = u32;
+
+#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "String", db_type = "String(None)")]
+pub enum JobStatus {
+    #[sea_orm(string_value = "CREATING")]
+    Creating,
+    #[sea_orm(string_value = "CREATED")]
+    Created,
+}
+
+impl From<JobStatus> for PbStreamJobStatus {
+    fn from(job_status: JobStatus) -> Self {
+        match job_status {
+            JobStatus::Creating => Self::Creating,
+            JobStatus::Created => Self::Created,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "String", db_type = "String(None)")]
+pub enum CreateType {
+    #[sea_orm(string_value = "BACKGROUND")]
+    Background,
+    #[sea_orm(string_value = "FOREGROUND")]
+    Foreground,
+}
+
+impl From<CreateType> for PbCreateType {
+    fn from(create_type: CreateType) -> Self {
+        match create_type {
+            CreateType::Background => Self::Background,
+            CreateType::Foreground => Self::Foreground,
+        }
+    }
+}
+
+/// Defines struct with a single pb field that derives `FromJsonQueryResult`, it will helps to map json value stored in database to Pb struct.
+macro_rules! derive_from_json_struct {
+    ($struct_name:ident, $field_type:ty) => {
+        #[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Serialize, Deserialize, Default)]
+        pub struct $struct_name(pub $field_type);
+        impl Eq for $struct_name {}
+    };
+}
+
+derive_from_json_struct!(I32Array, Vec<i32>);
+derive_from_json_struct!(DataType, risingwave_pb::data::DataType);
+derive_from_json_struct!(DataTypeArray, Vec<risingwave_pb::data::DataType>);
+derive_from_json_struct!(FieldArray, Vec<risingwave_pb::plan_common::Field>);
+derive_from_json_struct!(Property, HashMap<String, String>);
+derive_from_json_struct!(ColumnCatalog, risingwave_pb::plan_common::PbColumnCatalog);
+derive_from_json_struct!(
+    ColumnCatalogArray,
+    Vec<risingwave_pb::plan_common::PbColumnCatalog>
+);
+derive_from_json_struct!(StreamSourceInfo, risingwave_pb::catalog::PbStreamSourceInfo);
+derive_from_json_struct!(WatermarkDesc, risingwave_pb::catalog::PbWatermarkDesc);
+derive_from_json_struct!(
+    WatermarkDescArray,
+    Vec<risingwave_pb::catalog::PbWatermarkDesc>
+);
+derive_from_json_struct!(ExprNodeArray, Vec<risingwave_pb::expr::PbExprNode>);
+derive_from_json_struct!(ColumnOrderArray, Vec<risingwave_pb::common::PbColumnOrder>);
+derive_from_json_struct!(SinkFormatDesc, risingwave_pb::catalog::PbSinkFormatDesc);
+derive_from_json_struct!(Cardinality, risingwave_pb::plan_common::PbCardinality);
+derive_from_json_struct!(TableVersion, risingwave_pb::catalog::table::PbTableVersion);
+derive_from_json_struct!(
+    PrivateLinkService,
+    risingwave_pb::catalog::connection::PbPrivateLinkService
+);
diff --git a/src/meta/src/model_v2/object.rs b/src/meta/model_v2/src/object.rs
similarity index 98%
rename from src/meta/src/model_v2/object.rs
rename to src/meta/model_v2/src/object.rs
index 5048f93a483d9..39506777068a3 100644
--- a/src/meta/src/model_v2/object.rs
+++ b/src/meta/model_v2/src/object.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{DatabaseId, ObjectId, SchemaId, UserId};
+use crate::{DatabaseId, ObjectId, SchemaId, UserId};
 
 #[derive(Clone, Debug, PartialEq, Eq, Copy, EnumIter, DeriveActiveEnum)]
 #[sea_orm(rs_type = "String", db_type = "String(None)")]
diff --git a/src/meta/src/model_v2/object_dependency.rs b/src/meta/model_v2/src/object_dependency.rs
similarity index 97%
rename from src/meta/src/model_v2/object_dependency.rs
rename to src/meta/model_v2/src/object_dependency.rs
index 53800112a7370..52ca229c6997a 100644
--- a/src/meta/src/model_v2/object_dependency.rs
+++ b/src/meta/model_v2/src/object_dependency.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{ObjectId, UserId};
+use crate::{ObjectId, UserId};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "object_dependency")]
diff --git a/src/meta/src/model_v2/prelude.rs b/src/meta/model_v2/src/prelude.rs
similarity index 100%
rename from src/meta/src/model_v2/prelude.rs
rename to src/meta/model_v2/src/prelude.rs
diff --git a/src/meta/src/model_v2/schema.rs b/src/meta/model_v2/src/schema.rs
similarity index 81%
rename from src/meta/src/model_v2/schema.rs
rename to src/meta/model_v2/src/schema.rs
index 2c28665fd06f0..0af2d7fc020c9 100644
--- a/src/meta/src/model_v2/schema.rs
+++ b/src/meta/model_v2/src/schema.rs
@@ -12,9 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use risingwave_pb::catalog::PbSchema;
 use sea_orm::entity::prelude::*;
+use sea_orm::ActiveValue;
 
-use crate::model_v2::SchemaId;
+use crate::SchemaId;
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "schema")]
@@ -43,3 +45,12 @@ impl Related<super::object::Entity> for Entity {
 }
 
 impl ActiveModelBehavior for ActiveModel {}
+
+impl From<PbSchema> for ActiveModel {
+    fn from(schema: PbSchema) -> Self {
+        Self {
+            schema_id: ActiveValue::Set(schema.id),
+            name: ActiveValue::Set(schema.name),
+        }
+    }
+}
diff --git a/src/meta/src/model_v2/sink.rs b/src/meta/model_v2/src/sink.rs
similarity index 75%
rename from src/meta/src/model_v2/sink.rs
rename to src/meta/model_v2/src/sink.rs
index 8c22a04a8fd01..21ac172246703 100644
--- a/src/meta/src/model_v2/sink.rs
+++ b/src/meta/model_v2/src/sink.rs
@@ -12,9 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use risingwave_pb::catalog::PbSinkType;
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{ConnectionId, I32Array, SinkId};
+use crate::{
+    ColumnCatalogArray, ColumnOrderArray, ConnectionId, I32Array, JobStatus, Property,
+    SinkFormatDesc, SinkId,
+};
 
 #[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
 #[sea_orm(rs_type = "String", db_type = "String(None)")]
@@ -27,22 +31,34 @@ pub enum SinkType {
     Upsert,
 }
 
+impl From<SinkType> for PbSinkType {
+    fn from(sink_type: SinkType) -> Self {
+        match sink_type {
+            SinkType::AppendOnly => Self::AppendOnly,
+            SinkType::ForceAppendOnly => Self::ForceAppendOnly,
+            SinkType::Upsert => Self::Upsert,
+        }
+    }
+}
+
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "sink")]
 pub struct Model {
     #[sea_orm(primary_key, auto_increment = false)]
     pub sink_id: SinkId,
     pub name: String,
-    pub columns: Option<Json>,
-    pub pk_column_ids: Option<Json>,
-    pub distribution_key: Option<I32Array>,
-    pub downstream_pk: Option<I32Array>,
+    pub columns: ColumnCatalogArray,
+    pub plan_pk: ColumnOrderArray,
+    pub distribution_key: I32Array,
+    pub downstream_pk: I32Array,
     pub sink_type: SinkType,
-    pub properties: Option<Json>,
+    pub properties: Property,
     pub definition: String,
     pub connection_id: Option<ConnectionId>,
     pub db_name: String,
     pub sink_from_name: String,
+    pub sink_format_desc: Option<SinkFormatDesc>,
+    pub job_status: JobStatus,
 }
 
 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
diff --git a/src/meta/src/model_v2/source.rs b/src/meta/model_v2/src/source.rs
similarity index 82%
rename from src/meta/src/model_v2/source.rs
rename to src/meta/model_v2/src/source.rs
index 9bb6acc9382aa..620d002c27b55 100644
--- a/src/meta/src/model_v2/source.rs
+++ b/src/meta/model_v2/src/source.rs
@@ -14,7 +14,10 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{ConnectionId, SourceId, TableId};
+use crate::{
+    ColumnCatalogArray, ConnectionId, I32Array, Property, SourceId, StreamSourceInfo, TableId,
+    WatermarkDescArray,
+};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "source")]
@@ -22,15 +25,16 @@ pub struct Model {
     #[sea_orm(primary_key, auto_increment = false)]
     pub source_id: SourceId,
     pub name: String,
-    pub row_id_index: Option<String>,
-    pub columns: Option<Json>,
-    pub pk_column_ids: Option<Json>,
-    pub properties: Option<Json>,
-    pub definition: Option<String>,
-    pub source_info: Option<Json>,
-    pub watermark_descs: Option<Json>,
+    pub row_id_index: Option<u32>,
+    pub columns: ColumnCatalogArray,
+    pub pk_column_ids: I32Array,
+    pub properties: Property,
+    pub definition: String,
+    pub source_info: Option<StreamSourceInfo>,
+    pub watermark_descs: WatermarkDescArray,
     pub optional_associated_table_id: Option<TableId>,
     pub connection_id: Option<ConnectionId>,
+    pub version: u64,
 }
 
 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
diff --git a/src/meta/src/model_v2/system_parameter.rs b/src/meta/model_v2/src/system_parameter.rs
similarity index 100%
rename from src/meta/src/model_v2/system_parameter.rs
rename to src/meta/model_v2/src/system_parameter.rs
diff --git a/src/meta/src/model_v2/table.rs b/src/meta/model_v2/src/table.rs
similarity index 63%
rename from src/meta/src/model_v2/table.rs
rename to src/meta/model_v2/src/table.rs
index b2815eed7c8a0..a335f41023442 100644
--- a/src/meta/src/model_v2/table.rs
+++ b/src/meta/model_v2/src/table.rs
@@ -12,9 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use risingwave_pb::catalog::table::PbTableType;
+use risingwave_pb::catalog::PbHandleConflictBehavior;
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{I32Array, Property, SourceId, TableId};
+use crate::{
+    Cardinality, ColumnCatalogArray, ColumnOrderArray, CreateType, I32Array, JobStatus, Property,
+    SourceId, TableId, TableVersion,
+};
 
 #[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
 #[sea_orm(rs_type = "String", db_type = "String(None)")]
@@ -29,6 +34,38 @@ pub enum TableType {
     Internal,
 }
 
+impl From<TableType> for PbTableType {
+    fn from(table_type: TableType) -> Self {
+        match table_type {
+            TableType::Table => Self::Table,
+            TableType::MaterializedView => Self::MaterializedView,
+            TableType::Index => Self::Index,
+            TableType::Internal => Self::Internal,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "String", db_type = "String(None)")]
+pub enum HandleConflictBehavior {
+    #[sea_orm(string_value = "OVERWRITE")]
+    Overwrite,
+    #[sea_orm(string_value = "IGNORE")]
+    Ignore,
+    #[sea_orm(string_value = "NO_CHECK")]
+    NoCheck,
+}
+
+impl From<HandleConflictBehavior> for PbHandleConflictBehavior {
+    fn from(handle_conflict_behavior: HandleConflictBehavior) -> Self {
+        match handle_conflict_behavior {
+            HandleConflictBehavior::Overwrite => Self::Overwrite,
+            HandleConflictBehavior::Ignore => Self::Ignore,
+            HandleConflictBehavior::NoCheck => Self::NoCheck,
+        }
+    }
+}
+
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "table")]
 pub struct Model {
@@ -37,23 +74,27 @@ pub struct Model {
     pub name: String,
     pub optional_associated_source_id: Option<SourceId>,
     pub table_type: TableType,
-    pub columns: Json,
-    pub pk: Json,
+    pub columns: ColumnCatalogArray,
+    pub pk: ColumnOrderArray,
     pub distribution_key: I32Array,
+    pub stream_key: I32Array,
     pub append_only: bool,
     pub properties: Property,
     pub fragment_id: i32,
-    pub vnode_col_index: I32Array,
+    pub vnode_col_index: Option<u32>,
+    pub row_id_index: Option<u32>,
     pub value_indices: I32Array,
     pub definition: String,
-    pub handle_pk_conflict_behavior: i32,
-    pub read_prefix_len_hint: i32,
+    pub handle_pk_conflict_behavior: HandleConflictBehavior,
+    pub read_prefix_len_hint: u32,
     pub watermark_indices: I32Array,
     pub dist_key_in_pk: I32Array,
     pub dml_fragment_id: Option<i32>,
-    pub cardinality: Option<I32Array>,
+    pub cardinality: Option<Cardinality>,
     pub cleaned_by_watermark: bool,
-    pub version: Json,
+    pub job_status: JobStatus,
+    pub create_type: CreateType,
+    pub version: TableVersion,
 }
 
 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
diff --git a/src/meta/src/model_v2/user.rs b/src/meta/model_v2/src/user.rs
similarity index 97%
rename from src/meta/src/model_v2/user.rs
rename to src/meta/model_v2/src/user.rs
index 0e7ab4dd17876..e9cd36f75fb43 100644
--- a/src/meta/src/model_v2/user.rs
+++ b/src/meta/model_v2/src/user.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::UserId;
+use crate::UserId;
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "user")]
diff --git a/src/meta/src/model_v2/user_privilege.rs b/src/meta/model_v2/src/user_privilege.rs
similarity index 97%
rename from src/meta/src/model_v2/user_privilege.rs
rename to src/meta/model_v2/src/user_privilege.rs
index 335f716cec1c8..7e12af225ed02 100644
--- a/src/meta/src/model_v2/user_privilege.rs
+++ b/src/meta/model_v2/src/user_privilege.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{ObjectId, UserId};
+use crate::{ObjectId, UserId};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "user_privilege")]
diff --git a/src/meta/src/model_v2/view.rs b/src/meta/model_v2/src/view.rs
similarity index 93%
rename from src/meta/src/model_v2/view.rs
rename to src/meta/model_v2/src/view.rs
index 5bad8593e0b72..0de9ea64a616e 100644
--- a/src/meta/src/model_v2/view.rs
+++ b/src/meta/model_v2/src/view.rs
@@ -16,7 +16,7 @@ use risingwave_pb::catalog::PbView;
 use sea_orm::entity::prelude::*;
 use sea_orm::ActiveValue;
 
-use crate::model_v2::{FieldArray, Property, ViewId};
+use crate::{FieldArray, Property, ViewId};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "view")]
@@ -25,7 +25,7 @@ pub struct Model {
     pub view_id: ViewId,
     pub name: String,
     pub properties: Property,
-    pub sql: String,
+    pub definition: String,
     pub columns: FieldArray,
 }
 
@@ -55,7 +55,7 @@ impl From<PbView> for ActiveModel {
             view_id: ActiveValue::Set(view.id as _),
             name: ActiveValue::Set(view.name),
             properties: ActiveValue::Set(Property(view.properties)),
-            sql: ActiveValue::Set(view.sql),
+            definition: ActiveValue::Set(view.sql),
             columns: ActiveValue::Set(FieldArray(view.columns)),
         }
     }
diff --git a/src/meta/model_v2/src/worker.rs b/src/meta/model_v2/src/worker.rs
new file mode 100644
index 0000000000000..d164fba62b41e
--- /dev/null
+++ b/src/meta/model_v2/src/worker.rs
@@ -0,0 +1,128 @@
+// Copyright 2023 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use risingwave_pb::common::worker_node::PbState;
+use risingwave_pb::common::{PbWorkerNode, PbWorkerType};
+use sea_orm::entity::prelude::*;
+use sea_orm::ActiveValue;
+
+use crate::{TransactionId, WorkerId};
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "String", db_type = "String(None)")]
+pub enum WorkerType {
+    #[sea_orm(string_value = "FRONTEND")]
+    Frontend,
+    #[sea_orm(string_value = "COMPUTE_NODE")]
+    ComputeNode,
+    #[sea_orm(string_value = "RISE_CTL")]
+    RiseCtl,
+    #[sea_orm(string_value = "COMPACTOR")]
+    Compactor,
+    #[sea_orm(string_value = "META")]
+    Meta,
+}
+
+impl From<PbWorkerType> for WorkerType {
+    fn from(worker_type: PbWorkerType) -> Self {
+        match worker_type {
+            PbWorkerType::Unspecified => unreachable!("unspecified worker type"),
+            PbWorkerType::Frontend => Self::Frontend,
+            PbWorkerType::ComputeNode => Self::ComputeNode,
+            PbWorkerType::RiseCtl => Self::RiseCtl,
+            PbWorkerType::Compactor => Self::Compactor,
+            PbWorkerType::Meta => Self::Meta,
+        }
+    }
+}
+
+impl From<WorkerType> for PbWorkerType {
+    fn from(worker_type: WorkerType) -> Self {
+        match worker_type {
+            WorkerType::Frontend => Self::Frontend,
+            WorkerType::ComputeNode => Self::ComputeNode,
+            WorkerType::RiseCtl => Self::RiseCtl,
+            WorkerType::Compactor => Self::Compactor,
+            WorkerType::Meta => Self::Meta,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
+#[sea_orm(rs_type = "String", db_type = "String(None)")]
+pub enum WorkerStatus {
+    #[sea_orm(string_value = "STARTING")]
+    Starting,
+    #[sea_orm(string_value = "RUNNING")]
+    Running,
+}
+
+impl From<PbState> for WorkerStatus {
+    fn from(state: PbState) -> Self {
+        match state {
+            PbState::Unspecified => unreachable!("unspecified worker status"),
+            PbState::Starting => Self::Starting,
+            PbState::Running => Self::Running,
+        }
+    }
+}
+
+impl From<WorkerStatus> for PbState {
+    fn from(status: WorkerStatus) -> Self {
+        match status {
+            WorkerStatus::Starting => Self::Starting,
+            WorkerStatus::Running => Self::Running,
+        }
+    }
+}
+
+impl From<&PbWorkerNode> for ActiveModel {
+    fn from(worker: &PbWorkerNode) -> Self {
+        let host = worker.host.clone().unwrap();
+        Self {
+            worker_id: ActiveValue::Set(worker.id),
+            worker_type: ActiveValue::Set(worker.r#type().into()),
+            host: ActiveValue::Set(host.host),
+            port: ActiveValue::Set(host.port),
+            status: ActiveValue::Set(worker.state().into()),
+            ..Default::default()
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
+#[sea_orm(table_name = "worker")]
+pub struct Model {
+    #[sea_orm(primary_key)]
+    pub worker_id: WorkerId,
+    pub worker_type: WorkerType,
+    pub host: String,
+    pub port: i32,
+    pub status: WorkerStatus,
+    pub transaction_id: Option<TransactionId>,
+}
+
+#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
+pub enum Relation {
+    #[sea_orm(has_many = "super::worker_property::Entity")]
+    WorkerProperty,
+}
+
+impl Related<super::worker_property::Entity> for Entity {
+    fn to() -> RelationDef {
+        Relation::WorkerProperty.def()
+    }
+}
+
+impl ActiveModelBehavior for ActiveModel {}
diff --git a/src/meta/src/model_v2/worker_property.rs b/src/meta/model_v2/src/worker_property.rs
similarity index 97%
rename from src/meta/src/model_v2/worker_property.rs
rename to src/meta/model_v2/src/worker_property.rs
index 8521cbed15ce2..0512ea97e5be3 100644
--- a/src/meta/src/model_v2/worker_property.rs
+++ b/src/meta/model_v2/src/worker_property.rs
@@ -14,7 +14,7 @@
 
 use sea_orm::entity::prelude::*;
 
-use crate::model_v2::{I32Array, WorkerId};
+use crate::{I32Array, WorkerId};
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
 #[sea_orm(table_name = "worker_property")]
diff --git a/src/meta/node/Cargo.toml b/src/meta/node/Cargo.toml
index 8c2a5aeadbe41..84793a74591c8 100644
--- a/src/meta/node/Cargo.toml
+++ b/src/meta/node/Cargo.toml
@@ -20,13 +20,13 @@ either = "1"
 etcd-client = { workspace = true }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 itertools = "0.11"
-model_migration = { path = "../src/model_v2/migration" }
 prometheus-http-query = "0.7"
 regex = "1"
 risingwave_common = { workspace = true }
 risingwave_common_heap_profiling = { workspace = true }
 risingwave_common_service = { workspace = true }
 risingwave_meta = { workspace = true }
+risingwave_meta_model_migration = { workspace = true }
 risingwave_meta_service = { workspace = true }
 risingwave_pb = { workspace = true }
 risingwave_rpc_client = { workspace = true }
diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs
index 55c7b27b0c80a..bf1bddad2070f 100644
--- a/src/meta/node/src/lib.rs
+++ b/src/meta/node/src/lib.rs
@@ -14,7 +14,7 @@
 
 #![feature(lint_reasons)]
 #![feature(let_chains)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 mod server;
 use std::time::Duration;
diff --git a/src/meta/node/src/server.rs b/src/meta/node/src/server.rs
index d922f1c37e033..d8d8525aca235 100644
--- a/src/meta/node/src/server.rs
+++ b/src/meta/node/src/server.rs
@@ -19,7 +19,6 @@ use either::Either;
 use etcd_client::ConnectOptions;
 use futures::future::join_all;
 use itertools::Itertools;
-use model_migration::{Migrator, MigratorTrait};
 use regex::Regex;
 use risingwave_common::monitor::connection::{RouterExt, TcpConfig};
 use risingwave_common::telemetry::manager::TelemetryManager;
@@ -28,6 +27,7 @@ use risingwave_common_service::metrics_manager::MetricsManager;
 use risingwave_common_service::tracing::TracingExtractLayer;
 use risingwave_meta::rpc::intercept::MetricsMiddlewareLayer;
 use risingwave_meta::rpc::ElectionClientRef;
+use risingwave_meta_model_migration::{Migrator, MigratorTrait};
 use risingwave_meta_service::backup_service::BackupServiceImpl;
 use risingwave_meta_service::cloud_service::CloudServiceImpl;
 use risingwave_meta_service::cluster_service::ClusterServiceImpl;
diff --git a/src/meta/service/Cargo.toml b/src/meta/service/Cargo.toml
index 1760ccd56a85a..87b293f64a5e6 100644
--- a/src/meta/service/Cargo.toml
+++ b/src/meta/service/Cargo.toml
@@ -23,6 +23,7 @@ regex = "1"
 risingwave_common = { workspace = true }
 risingwave_connector = { workspace = true }
 risingwave_meta = { workspace = true }
+risingwave_meta_model_v2 = { workspace = true }
 risingwave_pb = { workspace = true }
 sea-orm = { version = "0.12.0", features = [
     "sqlx-mysql",
diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs
index 935d398aeacb0..fac8f89e17b11 100644
--- a/src/meta/service/src/ddl_service.rs
+++ b/src/meta/service/src/ddl_service.rs
@@ -717,7 +717,7 @@ impl DdlService for DdlServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn get_tables(
         &self,
         request: Request<GetTablesRequest>,
@@ -732,6 +732,11 @@ impl DdlService for DdlServiceImpl {
         }
         Ok(Response::new(GetTablesResponse { tables }))
     }
+
+    async fn wait(&self, _request: Request<WaitRequest>) -> Result<Response<WaitResponse>, Status> {
+        self.ddl_controller.wait().await?;
+        Ok(Response::new(WaitResponse {}))
+    }
 }
 
 impl DdlServiceImpl {
diff --git a/src/meta/service/src/heartbeat_service.rs b/src/meta/service/src/heartbeat_service.rs
index 7c51b39346894..e31058ff2bdc5 100644
--- a/src/meta/service/src/heartbeat_service.rs
+++ b/src/meta/service/src/heartbeat_service.rs
@@ -32,7 +32,7 @@ impl HeartbeatServiceImpl {
 
 #[async_trait::async_trait]
 impl HeartbeatService for HeartbeatServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn heartbeat(
         &self,
         request: Request<HeartbeatRequest>,
diff --git a/src/meta/service/src/lib.rs b/src/meta/service/src/lib.rs
index 0d473a6ed031f..6c8cc11f8971c 100644
--- a/src/meta/service/src/lib.rs
+++ b/src/meta/service/src/lib.rs
@@ -16,7 +16,7 @@
 #![feature(let_chains)]
 #![feature(lazy_cell)]
 #![feature(impl_trait_in_assoc_type)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 use risingwave_meta::*;
 
diff --git a/src/meta/service/src/meta_member_service.rs b/src/meta/service/src/meta_member_service.rs
index 25c4c7ad4cc84..5753061176e8c 100644
--- a/src/meta/service/src/meta_member_service.rs
+++ b/src/meta/service/src/meta_member_service.rs
@@ -36,7 +36,7 @@ impl MetaMemberServiceImpl {
 
 #[async_trait::async_trait]
 impl MetaMemberService for MetaMemberServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn members(
         &self,
         _request: Request<MembersRequest>,
diff --git a/src/meta/service/src/notification_service.rs b/src/meta/service/src/notification_service.rs
index bd247c1e18980..0fcc470a70e39 100644
--- a/src/meta/service/src/notification_service.rs
+++ b/src/meta/service/src/notification_service.rs
@@ -207,7 +207,7 @@ impl NotificationServiceImpl {
 impl NotificationService for NotificationServiceImpl {
     type SubscribeStream = UnboundedReceiverStream<Notification>;
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn subscribe(
         &self,
         request: Request<SubscribeRequest>,
diff --git a/src/meta/service/src/scale_service.rs b/src/meta/service/src/scale_service.rs
index f231ea5f4955d..676180adc7581 100644
--- a/src/meta/service/src/scale_service.rs
+++ b/src/meta/service/src/scale_service.rs
@@ -59,7 +59,7 @@ impl ScaleServiceImpl {
 
 #[async_trait::async_trait]
 impl ScaleService for ScaleServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn get_cluster_info(
         &self,
         _: Request<GetClusterInfoRequest>,
@@ -110,7 +110,7 @@ impl ScaleService for ScaleServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn reschedule(
         &self,
         request: Request<RescheduleRequest>,
@@ -174,7 +174,7 @@ impl ScaleService for ScaleServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn get_reschedule_plan(
         &self,
         request: Request<GetReschedulePlanRequest>,
diff --git a/src/meta/service/src/stream_service.rs b/src/meta/service/src/stream_service.rs
index ef232d9b04ffd..92af1d4beb707 100644
--- a/src/meta/service/src/stream_service.rs
+++ b/src/meta/service/src/stream_service.rs
@@ -59,7 +59,7 @@ impl StreamServiceImpl {
 
 #[async_trait::async_trait]
 impl StreamManagerService for StreamServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn flush(&self, request: Request<FlushRequest>) -> TonicResponse<FlushResponse> {
         self.env.idle_manager().record_activity();
         let req = request.into_inner();
@@ -71,7 +71,7 @@ impl StreamManagerService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn pause(&self, _: Request<PauseRequest>) -> Result<Response<PauseResponse>, Status> {
         let i = self
             .barrier_scheduler
@@ -83,7 +83,7 @@ impl StreamManagerService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn resume(&self, _: Request<ResumeRequest>) -> Result<Response<ResumeResponse>, Status> {
         let i = self
             .barrier_scheduler
@@ -122,7 +122,7 @@ impl StreamManagerService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn list_table_fragments(
         &self,
         request: Request<ListTableFragmentsRequest>,
@@ -165,7 +165,7 @@ impl StreamManagerService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn list_table_fragment_states(
         &self,
         _request: Request<ListTableFragmentStatesRequest>,
@@ -186,7 +186,7 @@ impl StreamManagerService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn list_fragment_distribution(
         &self,
         _request: Request<ListFragmentDistributionRequest>,
@@ -215,7 +215,7 @@ impl StreamManagerService for StreamServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn list_actor_states(
         &self,
         _request: Request<ListActorStatesRequest>,
diff --git a/src/meta/service/src/telemetry_service.rs b/src/meta/service/src/telemetry_service.rs
index 7c413406f13e5..42200e10a4eeb 100644
--- a/src/meta/service/src/telemetry_service.rs
+++ b/src/meta/service/src/telemetry_service.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use risingwave_meta_model_v2::prelude::Cluster;
 use risingwave_pb::meta::telemetry_info_service_server::TelemetryInfoService;
 use risingwave_pb::meta::{GetTelemetryInfoRequest, TelemetryInfoResponse};
 use sea_orm::EntityTrait;
@@ -19,7 +20,6 @@ use tonic::{Request, Response, Status};
 
 use crate::controller::SqlMetaStore;
 use crate::model::ClusterId;
-use crate::model_v2::prelude::Cluster;
 use crate::storage::MetaStoreRef;
 use crate::MetaResult;
 
diff --git a/src/meta/service/src/user_service.rs b/src/meta/service/src/user_service.rs
index 8c982521b112a..cb290766e6fd1 100644
--- a/src/meta/service/src/user_service.rs
+++ b/src/meta/service/src/user_service.rs
@@ -107,7 +107,7 @@ impl UserServiceImpl {
 
 #[async_trait::async_trait]
 impl UserService for UserServiceImpl {
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn create_user(
         &self,
         request: Request<CreateUserRequest>,
@@ -128,7 +128,7 @@ impl UserService for UserServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn drop_user(
         &self,
         request: Request<DropUserRequest>,
@@ -142,7 +142,7 @@ impl UserService for UserServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn update_user(
         &self,
         request: Request<UpdateUserRequest>,
@@ -165,7 +165,7 @@ impl UserService for UserServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn grant_privilege(
         &self,
         request: Request<GrantPrivilegeRequest>,
@@ -185,7 +185,7 @@ impl UserService for UserServiceImpl {
         }))
     }
 
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     async fn revoke_privilege(
         &self,
         request: Request<RevokePrivilegeRequest>,
diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs
index ed6ad289a5a68..d39dde51399d8 100644
--- a/src/meta/src/barrier/mod.rs
+++ b/src/meta/src/barrier/mod.rs
@@ -626,7 +626,7 @@ impl GlobalBarrierManager {
             let paused = self.take_pause_on_bootstrap().await.unwrap_or(false);
             let paused_reason = paused.then_some(PausedReason::Manual);
 
-            self.recovery(prev_epoch, paused_reason, true)
+            self.recovery(prev_epoch, paused_reason)
                 .instrument(span)
                 .await
         };
@@ -981,10 +981,7 @@ impl GlobalBarrierManager {
 
             // No need to clean dirty tables for barrier recovery,
             // The foreground stream job should cleanup their own tables.
-            *state = self
-                .recovery(prev_epoch, None, false)
-                .instrument(span)
-                .await;
+            *state = self.recovery(prev_epoch, None).instrument(span).await;
             self.set_status(BarrierManagerStatus::Running).await;
         } else {
             panic!("failed to execute barrier: {:?}", err);
diff --git a/src/meta/src/barrier/recovery.rs b/src/meta/src/barrier/recovery.rs
index 21197a8df98d4..3e319f0e69a52 100644
--- a/src/meta/src/barrier/recovery.rs
+++ b/src/meta/src/barrier/recovery.rs
@@ -219,7 +219,6 @@ impl GlobalBarrierManager {
         &self,
         prev_epoch: TracedEpoch,
         paused_reason: Option<PausedReason>,
-        bootstrap_recovery: bool,
     ) -> BarrierManagerState {
         // Mark blocked and abort buffered schedules, they might be dirty already.
         self.scheduled_barriers
@@ -227,11 +226,9 @@ impl GlobalBarrierManager {
             .await;
 
         tracing::info!("recovery start!");
-        if bootstrap_recovery {
-            self.clean_dirty_tables()
-                .await
-                .expect("clean dirty tables should not fail");
-        }
+        self.clean_dirty_tables()
+            .await
+            .expect("clean dirty tables should not fail");
         self.clean_dirty_fragments()
             .await
             .expect("clean dirty fragments");
diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs
index c0cfcf3baba59..daaa9b684850c 100644
--- a/src/meta/src/controller/catalog.rs
+++ b/src/meta/src/controller/catalog.rs
@@ -15,11 +15,19 @@
 use std::iter;
 
 use itertools::Itertools;
+use risingwave_common::bail;
 use risingwave_common::catalog::{DEFAULT_SCHEMA_NAME, SYSTEM_SCHEMAS};
+use risingwave_meta_model_v2::object::ObjectType;
+use risingwave_meta_model_v2::prelude::*;
+use risingwave_meta_model_v2::{
+    connection, database, function, index, object, object_dependency, schema, sink, source, table,
+    view, ConnectionId, DatabaseId, FunctionId, ObjectId, PrivateLinkService, SchemaId, SourceId,
+    TableId, UserId,
+};
 use risingwave_pb::catalog::{
     PbConnection, PbDatabase, PbFunction, PbIndex, PbSchema, PbSink, PbSource, PbTable, PbView,
 };
-use risingwave_pb::meta::relation::{PbRelationInfo, RelationInfo};
+use risingwave_pb::meta::relation::PbRelationInfo;
 use risingwave_pb::meta::subscribe_response::{
     Info as NotificationInfo, Operation as NotificationOperation,
 };
@@ -30,20 +38,15 @@ use sea_orm::{
 };
 use tokio::sync::RwLock;
 
+use crate::controller::rename::{alter_relation_rename, alter_relation_rename_refs};
 use crate::controller::utils::{
     check_connection_name_duplicate, check_function_signature_duplicate,
     check_relation_name_duplicate, check_schema_name_duplicate, ensure_object_id,
-    ensure_object_not_refer, ensure_schema_empty, ensure_user_id, list_used_by, PartialObject,
+    ensure_object_not_refer, ensure_schema_empty, ensure_user_id, get_referring_objects,
+    get_referring_objects_cascade, PartialObject,
 };
 use crate::controller::ObjectModel;
 use crate::manager::{MetaSrvEnv, NotificationVersion};
-use crate::model_v2::connection::PrivateLinkService;
-use crate::model_v2::object::ObjectType;
-use crate::model_v2::prelude::*;
-use crate::model_v2::{
-    connection, database, function, index, object, object_dependency, schema, table, view,
-    ConnectionId, DatabaseId, FunctionId, ObjectId, SchemaId, SourceId, TableId, UserId,
-};
 use crate::rpc::ddl_controller::DropMode;
 use crate::{MetaError, MetaResult};
 
@@ -503,7 +506,7 @@ impl CatalogController {
         assert_eq!(obj.obj_type, object_type);
 
         let mut to_drop_objects = match drop_mode {
-            DropMode::Cascade => list_used_by(object_id, &txn).await?,
+            DropMode::Cascade => get_referring_objects_cascade(object_id, &txn).await?,
             DropMode::Restrict => {
                 ensure_object_not_refer(object_type, object_id, &txn).await?;
                 vec![]
@@ -589,7 +592,7 @@ impl CatalogController {
             .into_iter()
             .map(|obj| match obj.obj_type {
                 ObjectType::Table => PbRelation {
-                    relation_info: Some(RelationInfo::Table(PbTable {
+                    relation_info: Some(PbRelationInfo::Table(PbTable {
                         id: obj.oid,
                         schema_id: obj.schema_id.unwrap(),
                         database_id: obj.database_id.unwrap(),
@@ -597,7 +600,7 @@ impl CatalogController {
                     })),
                 },
                 ObjectType::Source => PbRelation {
-                    relation_info: Some(RelationInfo::Source(PbSource {
+                    relation_info: Some(PbRelationInfo::Source(PbSource {
                         id: obj.oid,
                         schema_id: obj.schema_id.unwrap(),
                         database_id: obj.database_id.unwrap(),
@@ -605,7 +608,7 @@ impl CatalogController {
                     })),
                 },
                 ObjectType::Sink => PbRelation {
-                    relation_info: Some(RelationInfo::Sink(PbSink {
+                    relation_info: Some(PbRelationInfo::Sink(PbSink {
                         id: obj.oid,
                         schema_id: obj.schema_id.unwrap(),
                         database_id: obj.database_id.unwrap(),
@@ -613,7 +616,7 @@ impl CatalogController {
                     })),
                 },
                 ObjectType::View => PbRelation {
-                    relation_info: Some(RelationInfo::View(PbView {
+                    relation_info: Some(PbRelationInfo::View(PbView {
                         id: obj.oid,
                         schema_id: obj.schema_id.unwrap(),
                         database_id: obj.database_id.unwrap(),
@@ -621,7 +624,7 @@ impl CatalogController {
                     })),
                 },
                 ObjectType::Index => PbRelation {
-                    relation_info: Some(RelationInfo::Index(PbIndex {
+                    relation_info: Some(PbRelationInfo::Index(PbIndex {
                         id: obj.oid,
                         schema_id: obj.schema_id.unwrap(),
                         database_id: obj.database_id.unwrap(),
@@ -647,6 +650,142 @@ impl CatalogController {
             version,
         ))
     }
+
+    pub async fn alter_relation_name(
+        &self,
+        object_type: ObjectType,
+        object_id: ObjectId,
+        object_name: &str,
+    ) -> MetaResult<NotificationVersion> {
+        let inner = self.inner.write().await;
+        let txn = inner.db.begin().await?;
+        let obj: PartialObject = Object::find_by_id(object_id)
+            .into_partial_model()
+            .one(&txn)
+            .await?
+            .ok_or_else(|| MetaError::catalog_id_not_found(object_type.as_str(), object_id))?;
+        assert_eq!(obj.obj_type, object_type);
+        check_relation_name_duplicate(
+            object_name,
+            obj.database_id.unwrap(),
+            obj.schema_id.unwrap(),
+            &txn,
+        )
+        .await?;
+
+        let mut to_update_relations = vec![];
+        // rename relation.
+        macro_rules! rename_relation {
+            ($entity:ident, $table:ident, $identity:ident, $object_id:expr) => {{
+                let (mut relation, obj) = $entity::find_by_id($object_id)
+                    .find_also_related(Object)
+                    .one(&txn)
+                    .await?
+                    .unwrap();
+                let old_name = relation.name.clone();
+                relation.name = object_name.into();
+                relation.definition = alter_relation_rename(&relation.definition, object_name);
+                let active_model = $table::ActiveModel {
+                    $identity: ActiveValue::Set(relation.$identity),
+                    name: ActiveValue::Set(object_name.into()),
+                    definition: ActiveValue::Set(relation.definition.clone()),
+                    ..Default::default()
+                };
+                active_model.update(&txn).await?;
+                to_update_relations.push(PbRelation {
+                    relation_info: Some(PbRelationInfo::$entity(
+                        ObjectModel(relation, obj.unwrap()).into(),
+                    )),
+                });
+                old_name
+            }};
+        }
+
+        let old_name = match object_type {
+            ObjectType::Table => rename_relation!(Table, table, table_id, object_id),
+            ObjectType::Source => rename_relation!(Source, source, source_id, object_id),
+            ObjectType::Sink => rename_relation!(Sink, sink, sink_id, object_id),
+            ObjectType::View => rename_relation!(View, view, view_id, object_id),
+            ObjectType::Index => {
+                let (mut index, obj) = Index::find_by_id(object_id)
+                    .find_also_related(Object)
+                    .one(&txn)
+                    .await?
+                    .unwrap();
+                index.name = object_name.into();
+                let index_table_id = index.index_table_id;
+
+                // the name of index and its associated table is the same.
+                let active_model = index::ActiveModel {
+                    index_id: ActiveValue::Set(index.index_id),
+                    name: ActiveValue::Set(object_name.into()),
+                    ..Default::default()
+                };
+                active_model.update(&txn).await?;
+                to_update_relations.push(PbRelation {
+                    relation_info: Some(PbRelationInfo::Index(
+                        ObjectModel(index, obj.unwrap()).into(),
+                    )),
+                });
+                rename_relation!(Table, table, table_id, index_table_id)
+            }
+            _ => unreachable!("only relation name can be altered."),
+        };
+
+        // rename referring relation name.
+        macro_rules! rename_relation_ref {
+            ($entity:ident, $table:ident, $identity:ident, $object_id:expr) => {{
+                let (mut relation, obj) = $entity::find_by_id($object_id)
+                    .find_also_related(Object)
+                    .one(&txn)
+                    .await?
+                    .unwrap();
+                relation.definition =
+                    alter_relation_rename_refs(&relation.definition, &old_name, object_name);
+                let active_model = $table::ActiveModel {
+                    $identity: ActiveValue::Set(relation.$identity),
+                    definition: ActiveValue::Set(relation.definition.clone()),
+                    ..Default::default()
+                };
+                active_model.update(&txn).await?;
+                to_update_relations.push(PbRelation {
+                    relation_info: Some(PbRelationInfo::$entity(
+                        ObjectModel(relation, obj.unwrap()).into(),
+                    )),
+                });
+            }};
+        }
+        let objs = get_referring_objects(object_id, &txn).await?;
+        for obj in objs {
+            match obj.obj_type {
+                ObjectType::Table => rename_relation_ref!(Table, table, table_id, obj.oid),
+                ObjectType::Sink => rename_relation_ref!(Sink, sink, sink_id, obj.oid),
+                ObjectType::View => rename_relation_ref!(View, view, view_id, obj.oid),
+                ObjectType::Index => {
+                    let index_table_id: Option<TableId> = Index::find_by_id(obj.oid)
+                        .select_only()
+                        .column(index::Column::IndexTableId)
+                        .into_tuple()
+                        .one(&txn)
+                        .await?;
+                    rename_relation_ref!(Table, table, table_id, index_table_id.unwrap());
+                }
+                _ => bail!("only table, sink, view and index depend on other objects."),
+            }
+        }
+        txn.commit().await?;
+
+        let version = self
+            .notify_frontend(
+                NotificationOperation::Update,
+                NotificationInfo::RelationGroup(PbRelationGroup {
+                    relations: to_update_relations,
+                }),
+            )
+            .await;
+
+        Ok(version)
+    }
 }
 
 #[cfg(test)]
diff --git a/src/meta/src/controller/cluster.rs b/src/meta/src/controller/cluster.rs
index ca29380a49fca..392a0def5d53f 100644
--- a/src/meta/src/controller/cluster.rs
+++ b/src/meta/src/controller/cluster.rs
@@ -22,6 +22,9 @@ use std::time::{Duration, SystemTime};
 use itertools::Itertools;
 use risingwave_common::hash::ParallelUnitId;
 use risingwave_hummock_sdk::HummockSstableObjectId;
+use risingwave_meta_model_v2::prelude::{Worker, WorkerProperty};
+use risingwave_meta_model_v2::worker::{WorkerStatus, WorkerType};
+use risingwave_meta_model_v2::{worker, worker_property, I32Array, TransactionId, WorkerId};
 use risingwave_pb::common::worker_node::{PbProperty, PbState};
 use risingwave_pb::common::{
     HostAddress, ParallelUnit, PbHostAddress, PbParallelUnit, PbWorkerNode, PbWorkerType,
@@ -39,10 +42,7 @@ use tokio::sync::oneshot::Sender;
 use tokio::sync::{RwLock, RwLockReadGuard};
 use tokio::task::JoinHandle;
 
-use crate::manager::prelude::{Worker, WorkerProperty};
 use crate::manager::{LocalNotification, MetaSrvEnv, WorkerKey};
-use crate::model_v2::worker::{WorkerStatus, WorkerType};
-use crate::model_v2::{worker, worker_property, I32Array, TransactionId, WorkerId};
 use crate::{MetaError, MetaResult};
 
 pub type ClusterControllerRef = Arc<ClusterController>;
@@ -89,64 +89,6 @@ impl From<WorkerInfo> for PbWorkerNode {
     }
 }
 
-impl From<PbWorkerType> for WorkerType {
-    fn from(worker_type: PbWorkerType) -> Self {
-        match worker_type {
-            PbWorkerType::Unspecified => unreachable!("unspecified worker type"),
-            PbWorkerType::Frontend => Self::Frontend,
-            PbWorkerType::ComputeNode => Self::ComputeNode,
-            PbWorkerType::RiseCtl => Self::RiseCtl,
-            PbWorkerType::Compactor => Self::Compactor,
-            PbWorkerType::Meta => Self::Meta,
-        }
-    }
-}
-
-impl From<WorkerType> for PbWorkerType {
-    fn from(worker_type: WorkerType) -> Self {
-        match worker_type {
-            WorkerType::Frontend => Self::Frontend,
-            WorkerType::ComputeNode => Self::ComputeNode,
-            WorkerType::RiseCtl => Self::RiseCtl,
-            WorkerType::Compactor => Self::Compactor,
-            WorkerType::Meta => Self::Meta,
-        }
-    }
-}
-
-impl From<PbState> for WorkerStatus {
-    fn from(state: PbState) -> Self {
-        match state {
-            PbState::Unspecified => unreachable!("unspecified worker status"),
-            PbState::Starting => Self::Starting,
-            PbState::Running => Self::Running,
-        }
-    }
-}
-
-impl From<WorkerStatus> for PbState {
-    fn from(status: WorkerStatus) -> Self {
-        match status {
-            WorkerStatus::Starting => Self::Starting,
-            WorkerStatus::Running => Self::Running,
-        }
-    }
-}
-
-impl From<&PbWorkerNode> for worker::ActiveModel {
-    fn from(worker: &PbWorkerNode) -> Self {
-        let host = worker.host.clone().unwrap();
-        Self {
-            worker_id: ActiveValue::Set(worker.id),
-            worker_type: ActiveValue::Set(worker.r#type().into()),
-            host: ActiveValue::Set(host.host),
-            port: ActiveValue::Set(host.port),
-            status: ActiveValue::Set(worker.state().into()),
-            ..Default::default()
-        }
-    }
-}
-
 impl ClusterController {
     pub async fn new(env: MetaSrvEnv, max_heartbeat_interval: Duration) -> MetaResult<Self> {
         let meta_store = env
diff --git a/src/meta/src/controller/mod.rs b/src/meta/src/controller/mod.rs
index 5b0ff4ab99bef..d9193acd5591f 100644
--- a/src/meta/src/controller/mod.rs
+++ b/src/meta/src/controller/mod.rs
@@ -13,16 +13,25 @@
 // limitations under the License.
 
 use anyhow::anyhow;
+use risingwave_common::util::epoch::Epoch;
+use risingwave_meta_model_v2::{
+    connection, database, index, object, schema, sink, source, table, view,
+};
 use risingwave_pb::catalog::connection::PbInfo as PbConnectionInfo;
-use risingwave_pb::catalog::{PbConnection, PbDatabase, PbSchema};
-use sea_orm::{ActiveValue, DatabaseConnection, ModelTrait};
+use risingwave_pb::catalog::source::PbOptionalAssociatedTableId;
+use risingwave_pb::catalog::table::{PbOptionalAssociatedSourceId, PbTableType};
+use risingwave_pb::catalog::{
+    PbConnection, PbCreateType, PbDatabase, PbHandleConflictBehavior, PbIndex, PbSchema, PbSink,
+    PbSinkType, PbSource, PbStreamJobStatus, PbTable, PbView,
+};
+use sea_orm::{DatabaseConnection, ModelTrait};
 
-use crate::model_v2::{connection, database, object, schema};
 use crate::MetaError;
 
 #[allow(dead_code)]
 pub mod catalog;
 pub mod cluster;
+pub mod rename;
 pub mod system_param;
 pub mod utils;
 
@@ -49,7 +58,7 @@ impl SqlMetaStore {
     #[cfg(any(test, feature = "test"))]
     #[cfg(not(madsim))]
     pub async fn for_test() -> Self {
-        use model_migration::{Migrator, MigratorTrait};
+        use risingwave_meta_model_migration::{Migrator, MigratorTrait};
         let conn = sea_orm::Database::connect("sqlite::memory:").await.unwrap();
         Migrator::up(&conn, None).await.unwrap();
         Self { conn }
@@ -61,38 +70,169 @@ pub struct ObjectModel<M: ModelTrait>(M, object::Model);
 impl From<ObjectModel<database::Model>> for PbDatabase {
     fn from(value: ObjectModel<database::Model>) -> Self {
         Self {
-            id: value.0.database_id as _,
+            id: value.0.database_id,
             name: value.0.name,
-            owner: value.1.owner_id as _,
+            owner: value.1.owner_id,
         }
     }
 }
 
-impl From<PbDatabase> for database::ActiveModel {
-    fn from(db: PbDatabase) -> Self {
+impl From<ObjectModel<schema::Model>> for PbSchema {
+    fn from(value: ObjectModel<schema::Model>) -> Self {
         Self {
-            database_id: ActiveValue::Set(db.id as _),
-            name: ActiveValue::Set(db.name),
+            id: value.0.schema_id,
+            name: value.0.name,
+            database_id: value.1.database_id.unwrap(),
+            owner: value.1.owner_id,
         }
     }
 }
 
-impl From<PbSchema> for schema::ActiveModel {
-    fn from(schema: PbSchema) -> Self {
+impl From<ObjectModel<table::Model>> for PbTable {
+    fn from(value: ObjectModel<table::Model>) -> Self {
         Self {
-            schema_id: ActiveValue::Set(schema.id as _),
-            name: ActiveValue::Set(schema.name),
+            id: value.0.table_id,
+            schema_id: value.1.schema_id.unwrap(),
+            database_id: value.1.database_id.unwrap(),
+            name: value.0.name,
+            columns: value.0.columns.0,
+            pk: value.0.pk.0,
+            dependent_relations: vec![], // todo: deprecate it.
+            table_type: PbTableType::from(value.0.table_type) as _,
+            distribution_key: value.0.distribution_key.0,
+            stream_key: value.0.stream_key.0,
+            append_only: value.0.append_only,
+            owner: value.1.owner_id,
+            properties: value.0.properties.0,
+            fragment_id: value.0.fragment_id as u32,
+            vnode_col_index: value.0.vnode_col_index,
+            row_id_index: value.0.row_id_index,
+            value_indices: value.0.value_indices.0,
+            definition: value.0.definition,
+            handle_pk_conflict_behavior: PbHandleConflictBehavior::from(
+                value.0.handle_pk_conflict_behavior,
+            ) as _,
+            read_prefix_len_hint: value.0.read_prefix_len_hint,
+            watermark_indices: value.0.watermark_indices.0,
+            dist_key_in_pk: value.0.dist_key_in_pk.0,
+            dml_fragment_id: value.0.dml_fragment_id.map(|id| id as u32),
+            cardinality: value.0.cardinality.map(|cardinality| cardinality.0),
+            initialized_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.initialized_at.timestamp_millis() as _).0,
+            ),
+            created_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.created_at.timestamp_millis() as _).0,
+            ),
+            cleaned_by_watermark: value.0.cleaned_by_watermark,
+            stream_job_status: PbStreamJobStatus::from(value.0.job_status) as _,
+            create_type: PbCreateType::from(value.0.create_type) as _,
+            version: Some(value.0.version.0),
+            optional_associated_source_id: value
+                .0
+                .optional_associated_source_id
+                .map(PbOptionalAssociatedSourceId::AssociatedSourceId),
         }
     }
 }
 
-impl From<ObjectModel<schema::Model>> for PbSchema {
-    fn from(value: ObjectModel<schema::Model>) -> Self {
+impl From<ObjectModel<source::Model>> for PbSource {
+    fn from(value: ObjectModel<source::Model>) -> Self {
+        Self {
+            id: value.0.source_id,
+            schema_id: value.1.schema_id.unwrap(),
+            database_id: value.1.database_id.unwrap(),
+            name: value.0.name,
+            row_id_index: value.0.row_id_index,
+            columns: value.0.columns.0,
+            pk_column_ids: value.0.pk_column_ids.0,
+            properties: value.0.properties.0,
+            owner: value.1.owner_id,
+            info: value.0.source_info.map(|info| info.0),
+            watermark_descs: value.0.watermark_descs.0,
+            definition: value.0.definition,
+            connection_id: value.0.connection_id,
+            // todo: using the timestamp from the database directly.
+            initialized_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.initialized_at.timestamp_millis() as _).0,
+            ),
+            created_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.created_at.timestamp_millis() as _).0,
+            ),
+            version: value.0.version,
+            optional_associated_table_id: value
+                .0
+                .optional_associated_table_id
+                .map(PbOptionalAssociatedTableId::AssociatedTableId),
+        }
+    }
+}
+
+impl From<ObjectModel<sink::Model>> for PbSink {
+    fn from(value: ObjectModel<sink::Model>) -> Self {
+        Self {
+            id: value.0.sink_id,
+            schema_id: value.1.schema_id.unwrap(),
+            database_id: value.1.database_id.unwrap(),
+            name: value.0.name,
+            columns: value.0.columns.0,
+            plan_pk: value.0.plan_pk.0,
+            dependent_relations: vec![], // todo: deprecate it.
+            distribution_key: value.0.distribution_key.0,
+            downstream_pk: value.0.downstream_pk.0,
+            sink_type: PbSinkType::from(value.0.sink_type) as _,
+            owner: value.1.owner_id,
+            properties: value.0.properties.0,
+            definition: value.0.definition,
+            connection_id: value.0.connection_id,
+            initialized_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.initialized_at.timestamp_millis() as _).0,
+            ),
+            created_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.created_at.timestamp_millis() as _).0,
+            ),
+            db_name: value.0.db_name,
+            sink_from_name: value.0.sink_from_name,
+            stream_job_status: PbStreamJobStatus::from(value.0.job_status) as _,
+            format_desc: value.0.sink_format_desc.map(|desc| desc.0),
+        }
+    }
+}
+
+impl From<ObjectModel<index::Model>> for PbIndex {
+    fn from(value: ObjectModel<index::Model>) -> Self {
+        Self {
+            id: value.0.index_id,
+            schema_id: value.1.schema_id.unwrap(),
+            database_id: value.1.database_id.unwrap(),
+            name: value.0.name,
+            owner: value.1.owner_id,
+            index_table_id: value.0.index_table_id,
+            primary_table_id: value.0.primary_table_id,
+            index_item: value.0.index_items.0,
+            original_columns: value.0.original_columns.0,
+            initialized_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.initialized_at.timestamp_millis() as _).0,
+            ),
+            created_at_epoch: Some(
+                Epoch::from_unix_millis(value.1.created_at.timestamp_millis() as _).0,
+            ),
+            stream_job_status: PbStreamJobStatus::from(value.0.job_status) as _,
+        }
+    }
+}
+
+impl From<ObjectModel<view::Model>> for PbView {
+    fn from(value: ObjectModel<view::Model>) -> Self {
         Self {
-            id: value.0.schema_id as _,
+            id: value.0.view_id,
+            schema_id: value.1.schema_id.unwrap(),
+            database_id: value.1.database_id.unwrap(),
             name: value.0.name,
-            database_id: value.1.database_id.unwrap() as _,
-            owner: value.1.owner_id as _,
+            owner: value.1.owner_id,
+            properties: value.0.properties.0,
+            sql: value.0.definition,
+            dependent_relations: vec![], // todo: deprecate it.
+            columns: value.0.columns.0,
         }
     }
 }
@@ -100,11 +240,11 @@ impl From<ObjectModel<schema::Model>> for PbSchema {
 impl From<ObjectModel<connection::Model>> for PbConnection {
     fn from(value: ObjectModel<connection::Model>) -> Self {
         Self {
-            id: value.1.oid as _,
-            schema_id: value.1.schema_id.unwrap() as _,
-            database_id: value.1.database_id.unwrap() as _,
+            id: value.1.oid,
+            schema_id: value.1.schema_id.unwrap(),
+            database_id: value.1.database_id.unwrap(),
             name: value.0.name,
-            owner: value.1.owner_id as _,
+            owner: value.1.owner_id,
             info: Some(PbConnectionInfo::PrivateLinkService(value.0.info.0)),
         }
     }
diff --git a/src/meta/src/controller/rename.rs b/src/meta/src/controller/rename.rs
new file mode 100644
index 0000000000000..254565efb391c
--- /dev/null
+++ b/src/meta/src/controller/rename.rs
@@ -0,0 +1,430 @@
+// Copyright 2023 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use itertools::Itertools;
+use risingwave_common::util::column_index_mapping::ColIndexMapping;
+use risingwave_pb::expr::expr_node::RexNode;
+use risingwave_pb::expr::{ExprNode, FunctionCall, UserDefinedFunction};
+use risingwave_sqlparser::ast::{
+    Array, CreateSink, CreateSinkStatement, CreateSourceStatement, Distinct, Expr, Function,
+    FunctionArg, FunctionArgExpr, Ident, ObjectName, Query, SelectItem, SetExpr, Statement,
+    TableAlias, TableFactor, TableWithJoins,
+};
+use risingwave_sqlparser::parser::Parser;
+
+/// `alter_relation_rename` renames a relation to a new name in its `Create` statement, and returns
+/// the updated definition raw sql. Note that the `definition` must be a `Create` statement and the
+/// `new_name` must be a valid identifier, it should be validated before calling this function. To
+/// update all relations that depend on the renamed one, use `alter_relation_rename_refs`.
+pub fn alter_relation_rename(definition: &str, new_name: &str) -> String {
+    // This happens when we try to rename a table that's created by `CREATE TABLE AS`. Remove it
+    // when we support `SHOW CREATE TABLE` for `CREATE TABLE AS`.
+    if definition.is_empty() {
+        tracing::warn!("found empty definition when renaming relation, ignored.");
+        return definition.into();
+    }
+    let ast = Parser::parse_sql(definition).expect("failed to parse relation definition");
+    let mut stmt = ast
+        .into_iter()
+        .exactly_one()
+        .expect("should contains only one statement");
+
+    match &mut stmt {
+        Statement::CreateTable { name, .. }
+        | Statement::CreateView { name, .. }
+        | Statement::CreateIndex { name, .. }
+        | Statement::CreateSource {
+            stmt: CreateSourceStatement {
+                source_name: name, ..
+            },
+        }
+        | Statement::CreateSink {
+            stmt: CreateSinkStatement {
+                sink_name: name, ..
+            },
+        } => replace_table_name(name, new_name),
+        _ => unreachable!(),
+    };
+
+    stmt.to_string()
+}
+
+/// `alter_relation_rename_refs` updates all references of renamed-relation in the definition of
+/// target relation's `Create` statement.
+pub fn alter_relation_rename_refs(definition: &str, from: &str, to: &str) -> String {
+    let ast = Parser::parse_sql(definition).expect("failed to parse relation definition");
+    let mut stmt = ast
+        .into_iter()
+        .exactly_one()
+        .expect("should contains only one statement");
+
+    match &mut stmt {
+        Statement::CreateTable {
+            query: Some(query), ..
+        }
+        | Statement::CreateView { query, .. }
+        | Statement::Query(query) // Used by view, actually we store a query as the definition of view.
+        | Statement::CreateSink {
+            stmt:
+            CreateSinkStatement {
+                sink_from: CreateSink::AsQuery(query),
+                ..
+            },
+        } => {
+            QueryRewriter::rewrite_query(query, from, to);
+        }
+        Statement::CreateIndex { table_name, .. }
+        | Statement::CreateSink {
+            stmt:
+            CreateSinkStatement {
+                sink_from: CreateSink::From(table_name),
+                ..
+            },
+        } => replace_table_name(table_name, to),
+        _ => unreachable!(),
+    };
+    stmt.to_string()
+}
+
+/// Replace the last ident in the `table_name` with the given name, the object name is ensured to be
+/// non-empty. e.g. `schema.table` or `database.schema.table`.
+fn replace_table_name(table_name: &mut ObjectName, to: &str) {
+    let idx = table_name.0.len() - 1;
+    table_name.0[idx] = Ident::new_unchecked(to);
+}
+
+/// `QueryRewriter` is a visitor that updates all references of relation named `from` to `to` in the
+/// given query, which is the part of create statement of `relation`.
+struct QueryRewriter<'a> {
+    from: &'a str,
+    to: &'a str,
+}
+
+impl QueryRewriter<'_> {
+    fn rewrite_query(query: &mut Query, from: &str, to: &str) {
+        let rewriter = QueryRewriter { from, to };
+        rewriter.visit_query(query)
+    }
+
+    /// Visit the query and update all references of relation named `from` to `to`.
+    fn visit_query(&self, query: &mut Query) {
+        if let Some(with) = &mut query.with {
+            for cte_table in &mut with.cte_tables {
+                self.visit_query(&mut cte_table.query);
+            }
+        }
+        self.visit_set_expr(&mut query.body);
+        for expr in &mut query.order_by {
+            self.visit_expr(&mut expr.expr);
+        }
+    }
+
+    /// Visit table factor and update all references of relation named `from` to `to`.
+    /// Rewrite idents(i.e. `schema.table`, `table`) that contains the old name in the
+    /// following pattern:
+    /// 1. `FROM a` to `FROM new_a AS a`
+    /// 2. `FROM a AS b` to `FROM new_a AS b`
+    ///
+    /// So that we DON'T have to:
+    /// 1. rewrite the select and expr part like `schema.table.column`, `table.column`,
+    /// `alias.column` etc.
+    /// 2. handle the case that the old name is used as alias.
+    /// 3. handle the case that the new name is used as alias.
+    fn visit_table_factor(&self, table_factor: &mut TableFactor) {
+        match table_factor {
+            TableFactor::Table { name, alias, .. } => {
+                let idx = name.0.len() - 1;
+                if name.0[idx].real_value() == self.from {
+                    if alias.is_none() {
+                        *alias = Some(TableAlias {
+                            name: Ident::new_unchecked(self.from),
+                            columns: vec![],
+                        });
+                    }
+                    name.0[idx] = Ident::new_unchecked(self.to);
+                }
+            }
+            TableFactor::Derived { subquery, .. } => self.visit_query(subquery),
+            TableFactor::TableFunction { args, .. } => {
+                for arg in args {
+                    self.visit_function_args(arg);
+                }
+            }
+            TableFactor::NestedJoin(table_with_joins) => {
+                self.visit_table_with_joins(table_with_joins);
+            }
+        }
+    }
+
+    /// Visit table with joins and update all references of relation named `from` to `to`.
+    fn visit_table_with_joins(&self, table_with_joins: &mut TableWithJoins) {
+        self.visit_table_factor(&mut table_with_joins.relation);
+        for join in &mut table_with_joins.joins {
+            self.visit_table_factor(&mut join.relation);
+        }
+    }
+
+    /// Visit query body expression and update all references.
+    fn visit_set_expr(&self, set_expr: &mut SetExpr) {
+        match set_expr {
+            SetExpr::Select(select) => {
+                if let Distinct::DistinctOn(exprs) = &mut select.distinct {
+                    for expr in exprs {
+                        self.visit_expr(expr);
+                    }
+                }
+                for select_item in &mut select.projection {
+                    self.visit_select_item(select_item);
+                }
+                for from_item in &mut select.from {
+                    self.visit_table_with_joins(from_item);
+                }
+                if let Some(where_clause) = &mut select.selection {
+                    self.visit_expr(where_clause);
+                }
+                for expr in &mut select.group_by {
+                    self.visit_expr(expr);
+                }
+                if let Some(having) = &mut select.having {
+                    self.visit_expr(having);
+                }
+            }
+            SetExpr::Query(query) => self.visit_query(query),
+            SetExpr::SetOperation { left, right, .. } => {
+                self.visit_set_expr(left);
+                self.visit_set_expr(right);
+            }
+            SetExpr::Values(_) => {}
+        }
+    }
+
+    /// Visit function arguments and update all references.
+    fn visit_function_args(&self, function_args: &mut FunctionArg) {
+        match function_args {
+            FunctionArg::Unnamed(arg) | FunctionArg::Named { arg, .. } => match arg {
+                FunctionArgExpr::Expr(expr) | FunctionArgExpr::ExprQualifiedWildcard(expr, _) => {
+                    self.visit_expr(expr)
+                }
+                FunctionArgExpr::QualifiedWildcard(_, None) | FunctionArgExpr::Wildcard(None) => {}
+                FunctionArgExpr::QualifiedWildcard(_, Some(exprs))
+                | FunctionArgExpr::Wildcard(Some(exprs)) => {
+                    for expr in exprs {
+                        self.visit_expr(expr);
+                    }
+                }
+            },
+        }
+    }
+
+    /// Visit function and update all references.
+    fn visit_function(&self, function: &mut Function) {
+        for arg in &mut function.args {
+            self.visit_function_args(arg);
+        }
+    }
+
+    /// Visit expression and update all references.
+    fn visit_expr(&self, expr: &mut Expr) {
+        match expr {
+            Expr::FieldIdentifier(expr, ..)
+            | Expr::IsNull(expr)
+            | Expr::IsNotNull(expr)
+            | Expr::IsTrue(expr)
+            | Expr::IsNotTrue(expr)
+            | Expr::IsFalse(expr)
+            | Expr::IsNotFalse(expr)
+            | Expr::IsUnknown(expr)
+            | Expr::IsNotUnknown(expr)
+            | Expr::IsJson { expr, .. }
+            | Expr::InList { expr, .. }
+            | Expr::SomeOp(expr)
+            | Expr::AllOp(expr)
+            | Expr::UnaryOp { expr, .. }
+            | Expr::Cast { expr, .. }
+            | Expr::TryCast { expr, .. }
+            | Expr::AtTimeZone {
+                timestamp: expr, ..
+            }
+            | Expr::Extract { expr, .. }
+            | Expr::Substring { expr, .. }
+            | Expr::Overlay { expr, .. }
+            | Expr::Trim { expr, .. }
+            | Expr::Nested(expr)
+            | Expr::ArrayIndex { obj: expr, .. }
+            | Expr::ArrayRangeIndex { obj: expr, .. } => self.visit_expr(expr),
+
+            Expr::Position { substring, string } => {
+                self.visit_expr(substring);
+                self.visit_expr(string);
+            }
+
+            Expr::InSubquery { expr, subquery, .. } => {
+                self.visit_expr(expr);
+                self.visit_query(subquery);
+            }
+            Expr::Between {
+                expr, low, high, ..
+            } => {
+                self.visit_expr(expr);
+                self.visit_expr(low);
+                self.visit_expr(high);
+            }
+
+            Expr::IsDistinctFrom(expr1, expr2)
+            | Expr::IsNotDistinctFrom(expr1, expr2)
+            | Expr::BinaryOp {
+                left: expr1,
+                right: expr2,
+                ..
+            } => {
+                self.visit_expr(expr1);
+                self.visit_expr(expr2);
+            }
+            Expr::Function(function) => self.visit_function(function),
+            Expr::Exists(query) | Expr::Subquery(query) | Expr::ArraySubquery(query) => {
+                self.visit_query(query)
+            }
+
+            Expr::GroupingSets(exprs_vec) | Expr::Cube(exprs_vec) | Expr::Rollup(exprs_vec) => {
+                for exprs in exprs_vec {
+                    for expr in exprs {
+                        self.visit_expr(expr);
+                    }
+                }
+            }
+
+            Expr::Row(exprs) | Expr::Array(Array { elem: exprs, .. }) => {
+                for expr in exprs {
+                    self.visit_expr(expr);
+                }
+            }
+
+            Expr::LambdaFunction { body, args: _ } => self.visit_expr(body),
+
+            // No need to visit.
+            Expr::Identifier(_)
+            | Expr::CompoundIdentifier(_)
+            | Expr::Collate { .. }
+            | Expr::Value(_)
+            | Expr::Parameter { .. }
+            | Expr::TypedString { .. }
+            | Expr::Case { .. } => {}
+        }
+    }
+
+    /// Visit select item and update all references.
+    fn visit_select_item(&self, select_item: &mut SelectItem) {
+        match select_item {
+            SelectItem::UnnamedExpr(expr)
+            | SelectItem::ExprQualifiedWildcard(expr, _)
+            | SelectItem::ExprWithAlias { expr, .. } => self.visit_expr(expr),
+            SelectItem::QualifiedWildcard(_, None) | SelectItem::Wildcard(None) => {}
+            SelectItem::QualifiedWildcard(_, Some(exprs)) | SelectItem::Wildcard(Some(exprs)) => {
+                for expr in exprs {
+                    self.visit_expr(expr);
+                }
+            }
+        }
+    }
+}
+
+pub struct ReplaceTableExprRewriter {
+    pub table_col_index_mapping: ColIndexMapping,
+}
+
+impl ReplaceTableExprRewriter {
+    pub fn rewrite_expr(&self, expr: &mut ExprNode) {
+        let rex_node = expr.rex_node.as_mut().unwrap();
+        match rex_node {
+            RexNode::InputRef(input_col_idx) => {
+                *input_col_idx = self.table_col_index_mapping.map(*input_col_idx as usize) as u32
+            }
+            RexNode::Constant(_) => {}
+            RexNode::Udf(udf) => self.rewrite_udf(udf),
+            RexNode::FuncCall(function_call) => self.rewrite_function_call(function_call),
+            RexNode::Now(_) => {}
+        }
+    }
+
+    fn rewrite_udf(&self, udf: &mut UserDefinedFunction) {
+        udf.children
+            .iter_mut()
+            .for_each(|expr| self.rewrite_expr(expr));
+    }
+
+    fn rewrite_function_call(&self, function_call: &mut FunctionCall) {
+        function_call
+            .children
+            .iter_mut()
+            .for_each(|expr| self.rewrite_expr(expr));
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_alter_table_rename() {
+        let definition = "CREATE TABLE foo (a int, b int)";
+        let new_name = "bar";
+        let expected = "CREATE TABLE bar (a INT, b INT)";
+        let actual = alter_relation_rename(definition, new_name);
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn test_rename_index_refs() {
+        let definition = "CREATE INDEX idx1 ON foo(v1 DESC, v2)";
+        let from = "foo";
+        let to = "bar";
+        let expected = "CREATE INDEX idx1 ON bar(v1 DESC, v2)";
+        let actual = alter_relation_rename_refs(definition, from, to);
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn test_rename_sink_refs() {
+        let definition =
+            "CREATE SINK sink_t FROM foo WITH (connector = 'kafka', format = 'append_only')";
+        let from = "foo";
+        let to = "bar";
+        let expected =
+            "CREATE SINK sink_t FROM bar WITH (connector = 'kafka', format = 'append_only')";
+        let actual = alter_relation_rename_refs(definition, from, to);
+        assert_eq!(expected, actual);
+    }
+
+    #[test]
+    fn test_rename_with_alias_refs() {
+        let definition =
+            "CREATE MATERIALIZED VIEW mv1 AS SELECT foo.v1 AS m1v, foo.v2 AS m2v FROM foo";
+        let from = "foo";
+        let to = "bar";
+        let expected =
+            "CREATE MATERIALIZED VIEW mv1 AS SELECT foo.v1 AS m1v, foo.v2 AS m2v FROM bar AS foo";
+        let actual = alter_relation_rename_refs(definition, from, to);
+        assert_eq!(expected, actual);
+
+        let definition = "CREATE MATERIALIZED VIEW mv1 AS SELECT foo.v1 AS m1v, (foo.v2).v3 AS m2v FROM foo WHERE foo.v1 = 1 AND (foo.v2).v3 IS TRUE";
+        let expected = "CREATE MATERIALIZED VIEW mv1 AS SELECT foo.v1 AS m1v, (foo.v2).v3 AS m2v FROM bar AS foo WHERE foo.v1 = 1 AND (foo.v2).v3 IS TRUE";
+        let actual = alter_relation_rename_refs(definition, from, to);
+        assert_eq!(expected, actual);
+
+        let definition = "CREATE MATERIALIZED VIEW mv1 AS SELECT bar.v1 AS m1v, (bar.v2).v3 AS m2v FROM foo AS bar WHERE bar.v1 = 1";
+        let expected = "CREATE MATERIALIZED VIEW mv1 AS SELECT bar.v1 AS m1v, (bar.v2).v3 AS m2v FROM bar AS bar WHERE bar.v1 = 1";
+        let actual = alter_relation_rename_refs(definition, from, to);
+        assert_eq!(expected, actual);
+    }
+}
diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs
index 0656da5ea9a46..5c9761a9a119d 100644
--- a/src/meta/src/controller/system_param.rs
+++ b/src/meta/src/controller/system_param.rs
@@ -21,6 +21,8 @@ use risingwave_common::system_param::{
     check_missing_params, derive_missing_fields, set_system_param,
 };
 use risingwave_common::{for_all_params, key_of};
+use risingwave_meta_model_v2::prelude::SystemParameter;
+use risingwave_meta_model_v2::system_parameter;
 use risingwave_pb::meta::subscribe_response::{Info, Operation};
 use risingwave_pb::meta::PbSystemParams;
 use sea_orm::{ActiveModelTrait, ActiveValue, DatabaseConnection, EntityTrait, TransactionTrait};
@@ -31,8 +33,6 @@ use tracing::info;
 
 use crate::controller::SqlMetaStore;
 use crate::manager::{LocalNotification, NotificationManagerRef};
-use crate::model_v2::prelude::SystemParameter;
-use crate::model_v2::system_parameter;
 use crate::{MetaError, MetaResult};
 
 pub type SystemParamsControllerRef = Arc<SystemParamsController>;
diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs
index 964ee24ae99b5..2dbd89ac92423 100644
--- a/src/meta/src/controller/utils.rs
+++ b/src/meta/src/controller/utils.rs
@@ -13,7 +13,13 @@
 // limitations under the License.
 
 use anyhow::anyhow;
-use model_migration::WithQuery;
+use risingwave_meta_model_migration::WithQuery;
+use risingwave_meta_model_v2::object::ObjectType;
+use risingwave_meta_model_v2::prelude::*;
+use risingwave_meta_model_v2::{
+    connection, function, index, object, object_dependency, schema, sink, source, table, view,
+    DataTypeArray, DatabaseId, ObjectId, SchemaId, UserId,
+};
 use risingwave_pb::catalog::{PbConnection, PbFunction};
 use sea_orm::sea_query::{
     Alias, CommonTableExpression, Expr, Query, QueryStatementBuilder, SelectStatement, UnionType,
@@ -24,12 +30,6 @@ use sea_orm::{
     Order, PaginatorTrait, QueryFilter, QuerySelect, RelationTrait, Statement,
 };
 
-use crate::model_v2::object::ObjectType;
-use crate::model_v2::prelude::*;
-use crate::model_v2::{
-    connection, function, index, object, object_dependency, schema, sink, source, table, view,
-    DataTypeArray, DatabaseId, ObjectId, SchemaId, UserId,
-};
 use crate::{MetaError, MetaResult};
 
 /// This function will construct a query using recursive cte to find all objects[(id, `obj_type`)] that are used by the given object.
@@ -115,8 +115,11 @@ pub struct PartialObject {
     pub database_id: Option<DatabaseId>,
 }
 
-/// List all objects that are using the given one. It runs a recursive CTE to find all the dependencies.
-pub async fn list_used_by<C>(obj_id: ObjectId, db: &C) -> MetaResult<Vec<PartialObject>>
+/// List all objects that are using the given one in a cascade way. It runs a recursive CTE to find all the dependencies.
+pub async fn get_referring_objects_cascade<C>(
+    obj_id: ObjectId,
+    db: &C,
+) -> MetaResult<Vec<PartialObject>>
 where
     C: ConnectionTrait,
 {
@@ -318,6 +321,24 @@ where
     Ok(())
 }
 
+/// List all objects that are using the given one.
+pub async fn get_referring_objects<C>(object_id: ObjectId, db: &C) -> MetaResult<Vec<PartialObject>>
+where
+    C: ConnectionTrait,
+{
+    let objs = ObjectDependency::find()
+        .filter(object_dependency::Column::Oid.eq(object_id))
+        .join(
+            JoinType::InnerJoin,
+            object_dependency::Relation::Object1.def(),
+        )
+        .into_partial_model()
+        .all(db)
+        .await?;
+
+    Ok(objs)
+}
+
 /// `ensure_schema_empty` ensures that the schema is empty, used by `DROP SCHEMA`.
 pub async fn ensure_schema_empty<C>(schema_id: SchemaId, db: &C) -> MetaResult<()>
 where
diff --git a/src/meta/src/error.rs b/src/meta/src/error.rs
index f8332819a4610..03323d53fa0af 100644
--- a/src/meta/src/error.rs
+++ b/src/meta/src/error.rs
@@ -20,7 +20,6 @@ use risingwave_common::error::BoxedError;
 use risingwave_connector::sink::SinkError;
 use risingwave_pb::PbFieldNotFound;
 use risingwave_rpc_client::error::RpcError;
-use sqlx::Error;
 
 use crate::hummock::error::Error as HummockError;
 use crate::manager::WorkerId;
@@ -181,12 +180,6 @@ impl From<etcd_client::Error> for MetaError {
     }
 }
 
-impl From<sqlx::Error> for MetaError {
-    fn from(value: Error) -> Self {
-        MetaErrorInner::Election(value.to_string()).into()
-    }
-}
-
 impl From<RpcError> for MetaError {
     fn from(e: RpcError) -> Self {
         MetaErrorInner::RpcError(e).into()
diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs
index 2b0c3e3db87dc..1b3a284e9ccc9 100644
--- a/src/meta/src/hummock/manager/mod.rs
+++ b/src/meta/src/hummock/manager/mod.rs
@@ -1761,7 +1761,7 @@ impl HummockManager {
     }
 
     /// Get version deltas from meta store
-    #[cfg_attr(coverage, no_coverage)]
+    #[cfg_attr(coverage, coverage(off))]
     #[named]
     pub async fn list_version_deltas(
         &self,
diff --git a/src/meta/src/lib.rs b/src/meta/src/lib.rs
index afe66d27ad8e8..95b4ce7ead72d 100644
--- a/src/meta/src/lib.rs
+++ b/src/meta/src/lib.rs
@@ -26,13 +26,12 @@
 #![feature(error_generic_member_access)]
 #![feature(assert_matches)]
 #![feature(try_blocks)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 #![feature(custom_test_frameworks)]
 #![test_runner(risingwave_test_runner::test_runner::run_failpont_tests)]
 #![feature(is_sorted)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(type_name_of_val)]
-#![feature(async_fn_in_trait)]
 
 pub mod backup_restore;
 pub mod barrier;
@@ -43,7 +42,6 @@ pub mod error;
 pub mod hummock;
 pub mod manager;
 pub mod model;
-pub mod model_v2;
 pub mod rpc;
 pub mod serving;
 pub mod storage;
diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs
index bcac32922d180..15e74e4c2ac9e 100644
--- a/src/meta/src/manager/catalog/mod.rs
+++ b/src/meta/src/manager/catalog/mod.rs
@@ -647,7 +647,8 @@ impl CatalogManager {
                     self.start_create_table_procedure_with_source(source, table)
                         .await
                 } else {
-                    self.start_create_table_procedure(table, vec![]).await
+                    self.start_create_table_procedure(table, internal_tables)
+                        .await
                 }
             }
         }
@@ -765,7 +766,9 @@ impl CatalogManager {
     ///    2. Not belonging to a background stream job.
     ///    Clean up these hanging tables by the id.
     pub async fn clean_dirty_tables(&self, fragment_manager: FragmentManagerRef) -> MetaResult<()> {
-        let creating_tables: Vec<Table> = self.list_persisted_creating_tables().await;
+        let core = &mut *self.core.lock().await;
+        let database_core = &mut core.database;
+        let creating_tables: Vec<Table> = database_core.list_persisted_creating_tables();
         tracing::debug!(
             "creating_tables ids: {:#?}",
             creating_tables.iter().map(|t| t.id).collect_vec()
@@ -839,28 +842,31 @@ impl CatalogManager {
             }
         }
 
-        let core = &mut *self.core.lock().await;
-        let database_core = &mut core.database;
         let tables = &mut database_core.tables;
         let mut tables = BTreeMapTransaction::new(tables);
         for table in &tables_to_clean {
-            tracing::debug!("cleaning table_id: {}", table.id);
-            let table = tables.remove(table.id);
-            assert!(table.is_some())
+            let table_id = table.id;
+            tracing::debug!("cleaning table_id: {}", table_id);
+            let table = tables.remove(table_id);
+            assert!(table.is_some(), "table_id {} missing", table_id)
         }
         commit_meta!(self, tables)?;
 
         database_core.clear_creating_stream_jobs();
         let user_core = &mut core.user;
         for table in &tables_to_clean {
-            // Recovered when init database manager.
-            for relation_id in &table.dependent_relations {
-                database_core.decrease_ref_count(*relation_id);
+            // If table type is internal, no need to update the ref count OR
+            // user ref count.
+            if table.table_type != TableType::Internal as i32 {
+                // Recovered when init database manager.
+                for relation_id in &table.dependent_relations {
+                    database_core.decrease_ref_count(*relation_id);
+                }
+                // Recovered when init user manager.
+                tracing::debug!("decrease ref for {}", table.id);
+                user_core.decrease_ref(table.owner);
             }
-            // Recovered when init user manager.
-            user_core.decrease_ref(table.owner);
         }
-
         Ok(())
     }
 
@@ -919,29 +925,26 @@ impl CatalogManager {
             let database_core = &mut core.database;
             let tables = &mut database_core.tables;
             let Some(table) = tables.get(&table_id).cloned() else {
-                bail!(
-                    "table_id {} missing when attempting to cancel job",
+                tracing::warn!(
+                    "table_id {} missing when attempting to cancel job, could be cleaned on recovery",
                     table_id
-                )
+                );
+                return Ok(());
             };
-            table
-        };
-
-        tracing::trace!("cleanup tables for {}", table.id);
-        {
-            let core = &mut self.core.lock().await;
-            let database_core = &mut core.database;
 
+            tracing::trace!("cleanup tables for {}", table.id);
             let mut table_ids = vec![table.id];
             table_ids.extend(internal_table_ids);
 
             let tables = &mut database_core.tables;
             let mut tables = BTreeMapTransaction::new(tables);
             for table_id in table_ids {
-                tables.remove(table_id);
+                let res = tables.remove(table_id);
+                assert!(res.is_some(), "table_id {} missing", table_id);
             }
             commit_meta!(self, tables)?;
-        }
+            table
+        };
 
         {
             let core = &mut self.core.lock().await;
@@ -1978,9 +1981,7 @@ impl CatalogManager {
         let table_key = (table.database_id, table.schema_id, table.name.clone());
         assert!(
             !database_core.sources.contains_key(&source.id)
-                && !database_core.tables.contains_key(&table.id)
-                && database_core.has_in_progress_creation(&source_key)
-                && database_core.has_in_progress_creation(&table_key),
+                && !database_core.tables.contains_key(&table.id),
             "table and source must be in creating procedure"
         );
 
@@ -2032,8 +2033,7 @@ impl CatalogManager {
         let user_core = &mut core.user;
         let key = (index.database_id, index.schema_id, index.name.clone());
         assert!(
-            !database_core.indexes.contains_key(&index.id)
-                && database_core.has_in_progress_creation(&key),
+            !database_core.indexes.contains_key(&index.id),
             "index must be in creating procedure"
         );
 
@@ -2188,8 +2188,7 @@ impl CatalogManager {
         let user_core = &mut core.user;
         let key = (sink.database_id, sink.schema_id, sink.name.clone());
         assert!(
-            !database_core.sinks.contains_key(&sink.id)
-                && database_core.has_in_progress_creation(&key),
+            !database_core.sinks.contains_key(&sink.id),
             "sink must be in creating procedure"
         );
 
diff --git a/src/meta/src/manager/catalog/utils.rs b/src/meta/src/manager/catalog/utils.rs
index 7e26e32ee62eb..ea579867fc320 100644
--- a/src/meta/src/manager/catalog/utils.rs
+++ b/src/meta/src/manager/catalog/utils.rs
@@ -401,7 +401,7 @@ impl ReplaceTableExprRewriter {
 
 #[cfg(test)]
 mod tests {
-    use crate::manager::catalog::utils::{alter_relation_rename, alter_relation_rename_refs};
+    use super::*;
 
     #[test]
     fn test_alter_table_rename() {
diff --git a/src/meta/src/manager/env.rs b/src/meta/src/manager/env.rs
index 16a4bcb248b23..28d8200c73ea5 100644
--- a/src/meta/src/manager/env.rs
+++ b/src/meta/src/manager/env.rs
@@ -16,6 +16,7 @@ use std::ops::Deref;
 use std::sync::Arc;
 
 use risingwave_common::config::{CompactionConfig, DefaultParallelism};
+use risingwave_meta_model_v2::prelude::Cluster;
 use risingwave_pb::meta::SystemParams;
 use risingwave_rpc_client::{ConnectorClient, StreamClientPool, StreamClientPoolRef};
 use sea_orm::EntityTrait;
@@ -28,7 +29,6 @@ use crate::manager::{
     NotificationManagerRef,
 };
 use crate::model::ClusterId;
-use crate::model_v2::prelude::Cluster;
 use crate::storage::MetaStoreRef;
 #[cfg(any(test, feature = "test"))]
 use crate::storage::{MemStore, MetaStoreBoxExt};
diff --git a/src/meta/src/manager/mod.rs b/src/meta/src/manager/mod.rs
index 35642ed0ec143..e7e5208856bc3 100644
--- a/src/meta/src/manager/mod.rs
+++ b/src/meta/src/manager/mod.rs
@@ -28,7 +28,6 @@ pub use env::{MetaSrvEnv, *};
 pub use id::*;
 pub use idle::*;
 pub use notification::{LocalNotification, MessageStatus, NotificationManagerRef, *};
+pub use risingwave_meta_model_v2::prelude;
 pub use streaming_job::*;
 pub use system_param::*;
-
-pub use super::model_v2::prelude;
diff --git a/src/meta/src/model_v2/ext/hummock.rs b/src/meta/src/model_v2/ext/hummock.rs
deleted file mode 100644
index 77111e2e7d202..0000000000000
--- a/src/meta/src/model_v2/ext/hummock.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2023 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use risingwave_pb::hummock::HummockPinnedVersion;
-use sea_orm::sea_query::OnConflict;
-use sea_orm::ActiveValue::{Set, Unchanged};
-use sea_orm::EntityTrait;
-
-use crate::model::{MetadataModelResult, Transactional};
-use crate::model_v2::hummock_pinned_version;
-use crate::model_v2::trx::Transaction;
-
-#[async_trait::async_trait]
-impl Transactional<Transaction> for HummockPinnedVersion {
-    async fn upsert_in_transaction(
-        &self,
-        trx: &mut crate::model_v2::trx::Transaction,
-    ) -> MetadataModelResult<()> {
-        // TODO: error type conversion
-        // TODO: integer type conversion
-        let m = hummock_pinned_version::ActiveModel {
-            context_id: Unchanged(self.context_id.try_into().unwrap()),
-            min_pinned_id: Set(self.min_pinned_id.try_into().unwrap()),
-        };
-        hummock_pinned_version::Entity::insert(m)
-            .on_conflict(
-                OnConflict::column(hummock_pinned_version::Column::ContextId)
-                    .update_columns([hummock_pinned_version::Column::MinPinnedId])
-                    .to_owned(),
-            )
-            .exec(trx)
-            .await
-            .unwrap();
-        Ok(())
-    }
-
-    async fn delete_in_transaction(
-        &self,
-        trx: &mut crate::model_v2::trx::Transaction,
-    ) -> MetadataModelResult<()> {
-        // TODO: error type conversion
-        // TODO: integer type conversion
-        let id: i32 = self.context_id.try_into().unwrap();
-        hummock_pinned_version::Entity::delete_by_id(id)
-            .exec(trx)
-            .await
-            .unwrap();
-        Ok(())
-    }
-}
diff --git a/src/meta/src/model_v2/migration/Cargo.toml b/src/meta/src/model_v2/migration/Cargo.toml
deleted file mode 100644
index d5d51d77da909..0000000000000
--- a/src/meta/src/model_v2/migration/Cargo.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-[package]
-name = "model_migration"
-version = "0.1.0"
-edition = "2021"
-publish = false
-
-[lib]
-name = "model_migration"
-path = "src/lib.rs"
-
-[dependencies]
-async-std = { version = "1", features = ["attributes", "tokio1"] }
-uuid = { version = "1", features = ["v4"] }
-
-[dependencies.sea-orm-migration]
-version = "0.12.0"
-features = ["sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", "runtime-tokio-native-tls", "with-uuid"]
diff --git a/src/meta/src/model_v2/mod.rs b/src/meta/src/model_v2/mod.rs
deleted file mode 100644
index d799a608933ac..0000000000000
--- a/src/meta/src/model_v2/mod.rs
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2023 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-
-use sea_orm::FromJsonQueryResult;
-use serde::{Deserialize, Serialize};
-
-pub mod prelude;
-
-pub mod actor;
-pub mod cluster;
-pub mod compaction_config;
-pub mod compaction_status;
-pub mod compaction_task;
-pub mod connection;
-pub mod database;
-pub mod ext;
-pub mod fragment;
-pub mod function;
-pub mod hummock_pinned_snapshot;
-pub mod hummock_pinned_version;
-pub mod hummock_version_delta;
-pub mod hummock_version_stats;
-pub mod index;
-pub mod object;
-pub mod object_dependency;
-pub mod schema;
-pub mod sink;
-pub mod source;
-pub mod system_parameter;
-pub mod table;
-pub mod trx;
-pub mod user;
-pub mod user_privilege;
-pub mod view;
-pub mod worker;
-pub mod worker_property;
-
-pub type WorkerId = u32;
-pub type TransactionId = u32;
-
-pub type ObjectId = u32;
-pub type DatabaseId = ObjectId;
-pub type SchemaId = ObjectId;
-pub type TableId = ObjectId;
-pub type SourceId = ObjectId;
-pub type SinkId = ObjectId;
-pub type IndexId = ObjectId;
-pub type ViewId = ObjectId;
-pub type FunctionId = ObjectId;
-pub type ConnectionId = ObjectId;
-pub type UserId = u32;
-
-#[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Eq, Serialize, Deserialize, Default)]
-pub struct I32Array(pub Vec<i32>);
-
-#[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Eq, Serialize, Deserialize, Default)]
-pub struct DataType(pub risingwave_pb::data::DataType);
-
-#[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Eq, Serialize, Deserialize, Default)]
-pub struct DataTypeArray(pub Vec<risingwave_pb::data::DataType>);
-
-#[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Serialize, Deserialize, Default)]
-pub struct FieldArray(pub Vec<risingwave_pb::plan_common::Field>);
-
-impl Eq for FieldArray {}
-
-#[derive(Clone, Debug, PartialEq, FromJsonQueryResult, Eq, Serialize, Deserialize, Default)]
-pub struct Property(pub HashMap<String, String>);
diff --git a/src/meta/src/model_v2/trx.rs b/src/meta/src/model_v2/trx.rs
deleted file mode 100644
index 4bfe6d0261de4..0000000000000
--- a/src/meta/src/model_v2/trx.rs
+++ /dev/null
@@ -1,276 +0,0 @@
-// Copyright 2023 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-pub type Transaction = sea_orm::DatabaseTransaction;
-
-#[cfg(not(madsim))]
-#[cfg(test)]
-mod tests {
-    use std::collections::BTreeMap;
-
-    use risingwave_pb::hummock::HummockPinnedVersion;
-    use sea_orm::{EntityTrait, TransactionTrait};
-
-    use crate::controller::SqlMetaStore;
-    use crate::model::{BTreeMapTransaction, ValTransaction, VarTransaction};
-    use crate::model_v2::hummock_pinned_version::Model as HummockPinnedVersionModel;
-    use crate::model_v2::prelude::HummockPinnedVersion as HummockPinnedVersionEntity;
-    use crate::model_v2::trx::Transaction;
-
-    #[tokio::test]
-    async fn test_simple_var_transaction_commit() {
-        let store = SqlMetaStore::for_test().await;
-        let db = &store.conn;
-        let mut kv = HummockPinnedVersion {
-            context_id: 1,
-            min_pinned_id: 2,
-        };
-        let mut num_txn = VarTransaction::<'_, Transaction, _>::new(&mut kv);
-        num_txn.min_pinned_id = 3;
-        assert_eq!(num_txn.min_pinned_id, 3);
-        let mut txn = db.begin().await.unwrap();
-        num_txn.apply_to_txn(&mut txn).await.unwrap();
-        txn.commit().await.unwrap();
-        let db_val = HummockPinnedVersionEntity::find_by_id(1)
-            .one(db)
-            .await
-            .unwrap()
-            .unwrap();
-        assert_eq!(db_val.min_pinned_id, 3);
-        num_txn.commit();
-        assert_eq!(kv.min_pinned_id, 3);
-    }
-
-    #[test]
-    fn test_simple_var_transaction_abort() {
-        let mut kv = HummockPinnedVersion {
-            context_id: 1,
-            min_pinned_id: 11,
-        };
-        let mut num_txn = VarTransaction::<'_, Transaction, _>::new(&mut kv);
-        num_txn.min_pinned_id = 2;
-        num_txn.abort();
-        assert_eq!(11, kv.min_pinned_id);
-    }
-
-    #[tokio::test]
-    async fn test_tree_map_transaction_commit() {
-        let mut map: BTreeMap<u32, HummockPinnedVersion> = BTreeMap::new();
-        // to remove
-        map.insert(
-            1,
-            HummockPinnedVersion {
-                context_id: 1,
-                min_pinned_id: 11,
-            },
-        );
-        // to-remove-after-modify
-        map.insert(
-            2,
-            HummockPinnedVersion {
-                context_id: 2,
-                min_pinned_id: 22,
-            },
-        );
-        // first
-        map.insert(
-            3,
-            HummockPinnedVersion {
-                context_id: 3,
-                min_pinned_id: 33,
-            },
-        );
-
-        let mut map_copy = map.clone();
-        let mut map_txn = BTreeMapTransaction::new(&mut map);
-        map_txn.remove(1);
-        map_txn.insert(
-            2,
-            HummockPinnedVersion {
-                context_id: 2,
-                min_pinned_id: 0,
-            },
-        );
-        map_txn.remove(2);
-        // first
-        map_txn.insert(
-            3,
-            HummockPinnedVersion {
-                context_id: 3,
-                min_pinned_id: 333,
-            },
-        );
-        // second
-        map_txn.insert(
-            4,
-            HummockPinnedVersion {
-                context_id: 4,
-                min_pinned_id: 44,
-            },
-        );
-        assert_eq!(
-            &HummockPinnedVersion {
-                context_id: 4,
-                min_pinned_id: 44
-            },
-            map_txn.get(&4).unwrap()
-        );
-        // third
-        map_txn.insert(
-            5,
-            HummockPinnedVersion {
-                context_id: 5,
-                min_pinned_id: 55,
-            },
-        );
-        assert_eq!(
-            &HummockPinnedVersion {
-                context_id: 5,
-                min_pinned_id: 55
-            },
-            map_txn.get(&5).unwrap()
-        );
-
-        let mut third_entry = map_txn.get_mut(5).unwrap();
-        third_entry.min_pinned_id = 555;
-        assert_eq!(
-            &HummockPinnedVersion {
-                context_id: 5,
-                min_pinned_id: 555
-            },
-            map_txn.get(&5).unwrap()
-        );
-
-        let store = SqlMetaStore::for_test().await;
-        let db = &store.conn;
-        let mut txn = db.begin().await.unwrap();
-        map_txn.apply_to_txn(&mut txn).await.unwrap();
-        txn.commit().await.unwrap();
-
-        let db_rows: Vec<HummockPinnedVersionModel> =
-            HummockPinnedVersionEntity::find().all(db).await.unwrap();
-        assert_eq!(db_rows.len(), 3);
-        assert_eq!(
-            1,
-            db_rows
-                .iter()
-                .filter(|m| m.context_id == 3 && m.min_pinned_id == 333)
-                .count()
-        );
-        assert_eq!(
-            1,
-            db_rows
-                .iter()
-                .filter(|m| m.context_id == 4 && m.min_pinned_id == 44)
-                .count()
-        );
-        assert_eq!(
-            1,
-            db_rows
-                .iter()
-                .filter(|m| m.context_id == 5 && m.min_pinned_id == 555)
-                .count()
-        );
-        map_txn.commit();
-
-        // replay the change to local copy and compare
-        map_copy.remove(&1).unwrap();
-        map_copy.insert(
-            2,
-            HummockPinnedVersion {
-                context_id: 2,
-                min_pinned_id: 22,
-            },
-        );
-        map_copy.remove(&2).unwrap();
-        map_copy.insert(
-            3,
-            HummockPinnedVersion {
-                context_id: 3,
-                min_pinned_id: 333,
-            },
-        );
-        map_copy.insert(
-            4,
-            HummockPinnedVersion {
-                context_id: 4,
-                min_pinned_id: 44,
-            },
-        );
-        map_copy.insert(
-            5,
-            HummockPinnedVersion {
-                context_id: 5,
-                min_pinned_id: 555,
-            },
-        );
-        assert_eq!(map_copy, map);
-    }
-
-    #[tokio::test]
-    async fn test_tree_map_entry_update_transaction_commit() {
-        let mut map: BTreeMap<u32, HummockPinnedVersion> = BTreeMap::new();
-        map.insert(
-            1,
-            HummockPinnedVersion {
-                context_id: 1,
-                min_pinned_id: 11,
-            },
-        );
-
-        let mut map_txn = BTreeMapTransaction::new(&mut map);
-        let mut first_entry_txn = map_txn.new_entry_txn(1).unwrap();
-        first_entry_txn.min_pinned_id = 111;
-
-        let store = SqlMetaStore::for_test().await;
-        let db = &store.conn;
-        let mut txn = db.begin().await.unwrap();
-        first_entry_txn.apply_to_txn(&mut txn).await.unwrap();
-        txn.commit().await.unwrap();
-        first_entry_txn.commit();
-
-        let db_rows: Vec<HummockPinnedVersionModel> =
-            HummockPinnedVersionEntity::find().all(db).await.unwrap();
-        assert_eq!(db_rows.len(), 1);
-        assert_eq!(
-            1,
-            db_rows
-                .iter()
-                .filter(|m| m.context_id == 1 && m.min_pinned_id == 111)
-                .count()
-        );
-        assert_eq!(111, map.get(&1).unwrap().min_pinned_id);
-    }
-
-    #[tokio::test]
-    async fn test_tree_map_entry_insert_transaction_commit() {
-        let mut map: BTreeMap<u32, HummockPinnedVersion> = BTreeMap::new();
-
-        let mut map_txn = BTreeMapTransaction::new(&mut map);
-        let first_entry_txn = map_txn.new_entry_insert_txn(
-            1,
-            HummockPinnedVersion {
-                context_id: 1,
-                min_pinned_id: 11,
-            },
-        );
-        let store = SqlMetaStore::for_test().await;
-        let db = &store.conn;
-        let mut txn = db.begin().await.unwrap();
-        first_entry_txn.apply_to_txn(&mut txn).await.unwrap();
-        txn.commit().await.unwrap();
-        first_entry_txn.commit();
-        assert_eq!(11, map.get(&1).unwrap().min_pinned_id);
-    }
-}
diff --git a/src/meta/src/model_v2/worker.rs b/src/meta/src/model_v2/worker.rs
deleted file mode 100644
index 08cdb2be34da1..0000000000000
--- a/src/meta/src/model_v2/worker.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2023 RisingWave Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use sea_orm::entity::prelude::*;
-
-use crate::model_v2::{TransactionId, WorkerId};
-
-#[derive(Clone, Debug, Hash, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
-#[sea_orm(rs_type = "String", db_type = "String(None)")]
-pub enum WorkerType {
-    #[sea_orm(string_value = "FRONTEND")]
-    Frontend,
-    #[sea_orm(string_value = "COMPUTE_NODE")]
-    ComputeNode,
-    #[sea_orm(string_value = "RISE_CTL")]
-    RiseCtl,
-    #[sea_orm(string_value = "COMPACTOR")]
-    Compactor,
-    #[sea_orm(string_value = "META")]
-    Meta,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, EnumIter, DeriveActiveEnum)]
-#[sea_orm(rs_type = "String", db_type = "String(None)")]
-pub enum WorkerStatus {
-    #[sea_orm(string_value = "STARTING")]
-    Starting,
-    #[sea_orm(string_value = "RUNNING")]
-    Running,
-}
-
-#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)]
-#[sea_orm(table_name = "worker")]
-pub struct Model {
-    #[sea_orm(primary_key)]
-    pub worker_id: WorkerId,
-    pub worker_type: WorkerType,
-    pub host: String,
-    pub port: i32,
-    pub status: WorkerStatus,
-    pub transaction_id: Option<TransactionId>,
-}
-
-#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
-pub enum Relation {
-    #[sea_orm(has_many = "super::worker_property::Entity")]
-    WorkerProperty,
-}
-
-impl Related<super::worker_property::Entity> for Entity {
-    fn to() -> RelationDef {
-        Relation::WorkerProperty.def()
-    }
-}
-
-impl ActiveModelBehavior for ActiveModel {}
diff --git a/src/meta/src/rpc/ddl_controller.rs b/src/meta/src/rpc/ddl_controller.rs
index 04b9729c5a5b8..5f40d9a561f4e 100644
--- a/src/meta/src/rpc/ddl_controller.rs
+++ b/src/meta/src/rpc/ddl_controller.rs
@@ -15,6 +15,7 @@
 use std::cmp::Ordering;
 use std::num::NonZeroUsize;
 use std::sync::Arc;
+use std::time::Duration;
 
 use itertools::Itertools;
 use risingwave_common::config::DefaultParallelism;
@@ -29,6 +30,7 @@ use risingwave_pb::ddl_service::alter_relation_name_request::Relation;
 use risingwave_pb::ddl_service::DdlProgress;
 use risingwave_pb::stream_plan::StreamFragmentGraph as StreamFragmentGraphProto;
 use tokio::sync::Semaphore;
+use tokio::time::sleep;
 use tracing::log::warn;
 use tracing::Instrument;
 
@@ -429,6 +431,7 @@ impl DdlController {
 
         let env = StreamEnvironment::from_protobuf(fragment_graph.get_env().unwrap());
 
+        // Persist tables
         tracing::debug!(id = stream_job.id(), "preparing stream job");
         let fragment_graph = self
             .prepare_stream_job(&mut stream_job, fragment_graph)
@@ -1094,4 +1097,19 @@ impl DdlController {
             }
         }
     }
+
+    pub async fn wait(&self) -> MetaResult<()> {
+        for _ in 0..30 * 60 {
+            if self
+                .catalog_manager
+                .list_creating_background_mvs()
+                .await
+                .is_empty()
+            {
+                return Ok(());
+            }
+            sleep(Duration::from_secs(1)).await;
+        }
+        Err(MetaError::cancelled("timeout".into()))
+    }
 }
diff --git a/src/meta/src/rpc/election/sql.rs b/src/meta/src/rpc/election/sql.rs
index fc985bd9a4521..a027e8bffdfd1 100644
--- a/src/meta/src/rpc/election/sql.rs
+++ b/src/meta/src/rpc/election/sql.rs
@@ -44,7 +44,7 @@ impl<T: SqlDriver> SqlBackendElectionClient<T> {
     }
 }
 
-#[derive(sqlx::FromRow, Debug, FromQueryResult)]
+#[derive(Debug, FromQueryResult)]
 pub struct ElectionRow {
     service: String,
     id: String,
@@ -191,16 +191,14 @@ DO
     }
 
     async fn leader(&self, service_name: &str) -> MetaResult<Option<ElectionRow>> {
-        let string = format!(
-            r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = $1;"#,
-            table = Self::election_table_name()
-        );
-
         let query_result = self
             .conn
             .query_one(Statement::from_sql_and_values(
                 DatabaseBackend::Sqlite,
-                string,
+                format!(
+                    r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = $1;"#,
+                    table = Self::election_table_name()
+                ),
                 vec![Value::from(service_name)],
             ))
             .await?;
@@ -285,19 +283,17 @@ impl SqlDriver for MySqlDriver {
     }
 
     async fn update_heartbeat(&self, service_name: &str, id: &str) -> MetaResult<()> {
-        let string = format!(
-            r#"INSERT INTO {table} (id, service, last_heartbeat)
-VALUES(?, ?, NOW())
-ON duplicate KEY
-   UPDATE last_heartbeat = VALUES(last_heartbeat);
-"#,
-            table = Self::member_table_name()
-        );
-
         self.conn
             .execute(Statement::from_sql_and_values(
                 DatabaseBackend::MySql,
-                string,
+                format!(
+                    r#"INSERT INTO {table} (id, service, last_heartbeat)
+        VALUES(?, ?, NOW())
+        ON duplicate KEY
+           UPDATE last_heartbeat = VALUES(last_heartbeat);
+        "#,
+                    table = Self::member_table_name()
+                ),
                 vec![Value::from(id), Value::from(service_name)],
             ))
             .await?;
@@ -353,16 +349,14 @@ ON duplicate KEY
     }
 
     async fn leader(&self, service_name: &str) -> MetaResult<Option<ElectionRow>> {
-        let string = format!(
-            r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = ?;"#,
-            table = Self::election_table_name()
-        );
-
         let query_result = self
             .conn
             .query_one(Statement::from_sql_and_values(
                 DatabaseBackend::MySql,
-                string,
+                format!(
+                    r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = ?;"#,
+                    table = Self::election_table_name()
+                ),
                 vec![Value::from(service_name)],
             ))
             .await?;
@@ -375,16 +369,14 @@ ON duplicate KEY
     }
 
     async fn candidates(&self, service_name: &str) -> MetaResult<Vec<ElectionRow>> {
-        let string = format!(
-            r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = ?;"#,
-            table = Self::member_table_name()
-        );
-
         let all = self
             .conn
             .query_all(Statement::from_sql_and_values(
                 DatabaseBackend::MySql,
-                string,
+                format!(
+                    r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = ?;"#,
+                    table = Self::member_table_name()
+                ),
                 vec![Value::from(service_name)],
             ))
             .await?;
@@ -412,16 +404,14 @@ ON duplicate KEY
         ))
         .await?;
 
-        let string = format!(
-            r#"
-        DELETE FROM {table} WHERE service = ? AND id = ?;
-        "#,
-            table = Self::member_table_name()
-        );
-
         txn.execute(Statement::from_sql_and_values(
             DatabaseBackend::MySql,
-            string,
+            format!(
+                r#"
+            DELETE FROM {table} WHERE service = ? AND id = ?;
+            "#,
+                table = Self::member_table_name()
+            ),
             vec![Value::from(service_name), Value::from(id)],
         ))
         .await?;
@@ -451,20 +441,18 @@ impl SqlDriver for PostgresDriver {
     }
 
     async fn update_heartbeat(&self, service_name: &str, id: &str) -> MetaResult<()> {
-        let string = format!(
-            r#"INSERT INTO {table} (id, service, last_heartbeat)
-VALUES($1, $2, NOW())
-ON CONFLICT (id, service)
-DO
-   UPDATE SET last_heartbeat = EXCLUDED.last_heartbeat;
-"#,
-            table = Self::member_table_name()
-        );
-
         self.conn
             .execute(Statement::from_sql_and_values(
                 DatabaseBackend::Postgres,
-                string,
+                format!(
+                    r#"INSERT INTO {table} (id, service, last_heartbeat)
+        VALUES($1, $2, NOW())
+        ON CONFLICT (id, service)
+        DO
+           UPDATE SET last_heartbeat = EXCLUDED.last_heartbeat;
+        "#,
+                    table = Self::member_table_name()
+                ),
                 vec![Value::from(id), Value::from(service_name)],
             ))
             .await?;
@@ -478,30 +466,28 @@ DO
         id: &str,
         ttl: i64,
     ) -> MetaResult<ElectionRow> {
-        let string = format!(
-            r#"INSERT INTO {table} (service, id, last_heartbeat)
-VALUES ($1, $2, NOW())
-ON CONFLICT (service)
-    DO UPDATE
-    SET id             = CASE
-                             WHEN {table}.last_heartbeat < NOW() - $3::INTERVAL THEN EXCLUDED.id
-                             ELSE {table}.id
-        END,
-        last_heartbeat = CASE
-                             WHEN {table}.last_heartbeat < NOW() - $3::INTERVAL THEN EXCLUDED.last_heartbeat
-                             WHEN {table}.id = EXCLUDED.id THEN EXCLUDED.last_heartbeat
-                             ELSE {table}.last_heartbeat
-            END
-RETURNING service, id, last_heartbeat;
-"#,
-            table = Self::election_table_name()
-        );
-
         let query_result = self
             .conn
             .query_one(Statement::from_sql_and_values(
                 DatabaseBackend::Postgres,
-                string,
+                format!(
+                    r#"INSERT INTO {table} (service, id, last_heartbeat)
+        VALUES ($1, $2, NOW())
+        ON CONFLICT (service)
+            DO UPDATE
+            SET id             = CASE
+                                     WHEN {table}.last_heartbeat < NOW() - $3::INTERVAL THEN EXCLUDED.id
+                                     ELSE {table}.id
+                END,
+                last_heartbeat = CASE
+                                     WHEN {table}.last_heartbeat < NOW() - $3::INTERVAL THEN EXCLUDED.last_heartbeat
+                                     WHEN {table}.id = EXCLUDED.id THEN EXCLUDED.last_heartbeat
+                                     ELSE {table}.last_heartbeat
+                    END
+        RETURNING service, id, last_heartbeat;
+        "#,
+                    table = Self::election_table_name()
+                ),
                 vec![
                     Value::from(service_name),
                     Value::from(id),
@@ -541,16 +527,14 @@ RETURNING service, id, last_heartbeat;
     }
 
     async fn candidates(&self, service_name: &str) -> MetaResult<Vec<ElectionRow>> {
-        let string = format!(
-            r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = $1;"#,
-            table = Self::member_table_name()
-        );
-
         let all = self
             .conn
             .query_all(Statement::from_sql_and_values(
                 DatabaseBackend::Postgres,
-                string,
+                format!(
+                    r#"SELECT service, id, last_heartbeat FROM {table} WHERE service = $1;"#,
+                    table = Self::member_table_name()
+                ),
                 vec![Value::from(service_name)],
             ))
             .await?;
@@ -578,16 +562,14 @@ RETURNING service, id, last_heartbeat;
         ))
         .await?;
 
-        let string = format!(
-            r#"
-        DELETE FROM {table} WHERE service = $1 AND id = $2;
-        "#,
-            table = Self::member_table_name()
-        );
-
         txn.execute(Statement::from_sql_and_values(
             DatabaseBackend::Postgres,
-            string,
+            format!(
+                r#"
+            DELETE FROM {table} WHERE service = $1 AND id = $2;
+            "#,
+                table = Self::member_table_name()
+            ),
             vec![Value::from(service_name), Value::from(id)],
         ))
         .await?;
diff --git a/src/prost/helpers/src/lib.rs b/src/prost/helpers/src/lib.rs
index f4d1d1a45baa1..5796e14273fe9 100644
--- a/src/prost/helpers/src/lib.rs
+++ b/src/prost/helpers/src/lib.rs
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 #![feature(iterator_try_collect)]
 
 use proc_macro::TokenStream;
@@ -24,7 +24,7 @@ mod generate;
 
 /// This attribute will be placed before any pb types, including messages and enums.
 /// See `prost/helpers/README.md` for more details.
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 #[proc_macro_derive(AnyPB)]
 pub fn any_pb(input: TokenStream) -> TokenStream {
     // Parse the string representation
@@ -37,7 +37,7 @@ pub fn any_pb(input: TokenStream) -> TokenStream {
 }
 
 // Procedure macros can not be tested from the same crate.
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 fn produce(ast: &DeriveInput) -> Result<TokenStream2> {
     let name = &ast.ident;
 
diff --git a/src/rpc_client/src/lib.rs b/src/rpc_client/src/lib.rs
index 3e744bb61608d..6afa67ef88efe 100644
--- a/src/rpc_client/src/lib.rs
+++ b/src/rpc_client/src/lib.rs
@@ -19,7 +19,7 @@
 #![feature(result_option_inspect)]
 #![feature(type_alias_impl_trait)]
 #![feature(associated_type_defaults)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(iterator_try_collect)]
 #![feature(hash_extract_if)]
 #![feature(try_blocks)]
@@ -30,12 +30,11 @@ use std::any::type_name;
 use std::fmt::{Debug, Formatter};
 use std::future::Future;
 use std::iter::repeat;
-use std::pin::pin;
 use std::sync::Arc;
 
 use anyhow::anyhow;
 use async_trait::async_trait;
-use futures::future::{select, try_join_all, Either};
+use futures::future::try_join_all;
 use futures::stream::{BoxStream, Peekable};
 use futures::{Stream, StreamExt};
 use moka::future::Cache;
@@ -58,13 +57,12 @@ mod sink_coordinate_client;
 mod stream_client;
 mod tracing;
 
-use std::pin::Pin;
-
 pub use compactor_client::{CompactorClient, GrpcCompactorProxyClient};
 pub use compute_client::{ComputeClient, ComputeClientPool, ComputeClientPoolRef};
 pub use connector_client::{ConnectorClient, SinkCoordinatorStreamHandle, SinkWriterStreamHandle};
 pub use hummock_meta_client::{CompactionEventItem, HummockMetaClient};
 pub use meta_client::{MetaClient, SinkCoordinationRpcClient};
+use risingwave_common::util::await_future_with_monitor_error_stream;
 pub use sink_coordinate_client::CoordinatorStreamHandle;
 pub use stream_client::{StreamClient, StreamClientPool, StreamClientPoolRef};
 
@@ -240,25 +238,16 @@ impl<REQ: 'static, RSP: 'static> BidiStreamHandle<REQ, RSP> {
     }
 
     pub async fn send_request(&mut self, request: REQ) -> Result<()> {
-        // Poll the response stream to early see the error
-        let send_request_result = match select(
-            pin!(self.request_sender.send(request)),
-            pin!(Pin::new(&mut self.response_stream).peek()),
+        match await_future_with_monitor_error_stream(
+            &mut self.response_stream,
+            self.request_sender.send(request),
         )
         .await
         {
-            Either::Left((result, _)) => result,
-            Either::Right((response_result, send_future)) => match response_result {
-                None => {
-                    return Err(anyhow!("end of response stream").into());
-                }
-                Some(Err(e)) => {
-                    return Err(e.clone().into());
-                }
-                Some(Ok(_)) => send_future.await,
-            },
-        };
-        send_request_result
-            .map_err(|_| anyhow!("unable to send request {}", type_name::<REQ>()).into())
+            Ok(send_result) => send_result
+                .map_err(|_| anyhow!("unable to send request {}", type_name::<REQ>()).into()),
+            Err(None) => Err(anyhow!("end of response stream").into()),
+            Err(Some(e)) => Err(e.into()),
+        }
     }
 }
diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs
index 827860d1af7b3..95b746ea33e6c 100644
--- a/src/rpc_client/src/meta_client.rs
+++ b/src/rpc_client/src/meta_client.rs
@@ -698,6 +698,12 @@ impl MetaClient {
         Ok(resp.snapshot.unwrap())
     }
 
+    pub async fn wait(&self) -> Result<()> {
+        let request = WaitRequest {};
+        self.inner.wait(request).await?;
+        Ok(())
+    }
+
     pub async fn cancel_creating_jobs(&self, jobs: PbJobs) -> Result<Vec<u32>> {
         let request = CancelCreatingJobsRequest { jobs: Some(jobs) };
         let resp = self.inner.cancel_creating_jobs(request).await?;
@@ -1719,6 +1725,7 @@ macro_rules! for_all_meta_rpc {
             ,{ ddl_client, list_connections, ListConnectionsRequest, ListConnectionsResponse }
             ,{ ddl_client, drop_connection, DropConnectionRequest, DropConnectionResponse }
             ,{ ddl_client, get_tables, GetTablesRequest, GetTablesResponse }
+            ,{ ddl_client, wait, WaitRequest, WaitResponse }
             ,{ hummock_client, unpin_version_before, UnpinVersionBeforeRequest, UnpinVersionBeforeResponse }
             ,{ hummock_client, get_current_version, GetCurrentVersionRequest, GetCurrentVersionResponse }
             ,{ hummock_client, replay_version_delta, ReplayVersionDeltaRequest, ReplayVersionDeltaResponse }
diff --git a/src/source/src/lib.rs b/src/source/src/lib.rs
index 1a32888cdf651..aaa045c607c95 100644
--- a/src/source/src/lib.rs
+++ b/src/source/src/lib.rs
@@ -16,7 +16,7 @@
 #![feature(trait_alias)]
 #![feature(lint_reasons)]
 #![feature(result_option_inspect)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(hash_extract_if)]
 #![feature(type_alias_impl_trait)]
 #![feature(box_patterns)]
diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs
index ecae5a9663a88..5d802bae99cdc 100644
--- a/src/sqlparser/src/ast/mod.rs
+++ b/src/sqlparser/src/ast/mod.rs
@@ -1294,6 +1294,9 @@ pub enum Statement {
     ///
     /// Note: RisingWave specific statement.
     Flush,
+    /// WAIT for ALL running stream jobs to finish.
+    /// It will block the current session the condition is met.
+    Wait,
 }
 
 impl fmt::Display for Statement {
@@ -1787,6 +1790,9 @@ impl fmt::Display for Statement {
             Statement::Flush => {
                 write!(f, "FLUSH")
             }
+            Statement::Wait => {
+                write!(f, "WAIT")
+            }
             Statement::Begin { modes } => {
                 write!(f, "BEGIN")?;
                 if !modes.is_empty() {
diff --git a/src/sqlparser/src/ast/statement.rs b/src/sqlparser/src/ast/statement.rs
index 76de970a919a9..58fb2d50c6287 100644
--- a/src/sqlparser/src/ast/statement.rs
+++ b/src/sqlparser/src/ast/statement.rs
@@ -294,6 +294,7 @@ pub enum Encode {
     Json,     // Keyword::JSON
     Bytes,    // Keyword::BYTES
     Native,
+    Template,
 }
 
 // TODO: unify with `from_keyword`
@@ -309,6 +310,7 @@ impl fmt::Display for Encode {
                 Encode::Json => "JSON",
                 Encode::Bytes => "BYTES",
                 Encode::Native => "NATIVE",
+                Encode::Template => "TEMPLATE",
             }
         )
     }
@@ -322,13 +324,12 @@ impl Encode {
             "CSV" => Encode::Csv,
             "PROTOBUF" => Encode::Protobuf,
             "JSON" => Encode::Json,
+            "TEMPLATE" => Encode::Template,
             "NATIVE" => Encode::Native, // used internally for schema change
-            _ => {
-                return Err(ParserError::ParserError(
-                    "expected AVRO | BYTES | CSV | PROTOBUF | JSON | NATIVE after Encode"
-                        .to_string(),
-                ))
-            }
+            _ => return Err(ParserError::ParserError(
+                "expected AVRO | BYTES | CSV | PROTOBUF | JSON | NATIVE | TEMPLATE after Encode"
+                    .to_string(),
+            )),
         })
     }
 }
diff --git a/src/sqlparser/src/keywords.rs b/src/sqlparser/src/keywords.rs
index 5c2fedb0ea547..4188f06f76ae3 100644
--- a/src/sqlparser/src/keywords.rs
+++ b/src/sqlparser/src/keywords.rs
@@ -540,6 +540,7 @@ define_keywords!(
     VIEWS,
     VIRTUAL,
     VOLATILE,
+    WAIT,
     WATERMARK,
     WHEN,
     WHENEVER,
diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs
index ee054f7d17031..5cc094a204268 100644
--- a/src/sqlparser/src/parser.rs
+++ b/src/sqlparser/src/parser.rs
@@ -259,6 +259,7 @@ impl Parser {
                 Keyword::PREPARE => Ok(self.parse_prepare()?),
                 Keyword::COMMENT => Ok(self.parse_comment()?),
                 Keyword::FLUSH => Ok(Statement::Flush),
+                Keyword::WAIT => Ok(Statement::Wait),
                 _ => self.expected(
                     "an SQL statement",
                     Token::Word(w).with_location(token.location),
diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml
index f1022ab2fd935..fc01eba294564 100644
--- a/src/storage/Cargo.toml
+++ b/src/storage/Cargo.toml
@@ -25,7 +25,7 @@ dyn-clone = "1.0.14"
 either = "1"
 enum-as-inner = "0.6"
 fail = "0.5"
-foyer = { git = "https://github.com/mrcroxx/foyer", rev = "438eec8" }
+foyer = { git = "https://github.com/MrCroxx/foyer", rev = "2261151" }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 futures-async-stream = { workspace = true }
 hex = "0.4"
diff --git a/src/storage/backup/src/lib.rs b/src/storage/backup/src/lib.rs
index 3e0549db188a2..1daacbf691c0d 100644
--- a/src/storage/backup/src/lib.rs
+++ b/src/storage/backup/src/lib.rs
@@ -25,7 +25,7 @@
 #![feature(lazy_cell)]
 #![feature(let_chains)]
 #![feature(error_generic_member_access)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
 pub mod error;
 pub mod meta_snapshot;
diff --git a/src/storage/hummock_test/Cargo.toml b/src/storage/hummock_test/Cargo.toml
index 600a5249ddf1b..8abf2f45e6855 100644
--- a/src/storage/hummock_test/Cargo.toml
+++ b/src/storage/hummock_test/Cargo.toml
@@ -20,7 +20,7 @@ bytes = { version = "1" }
 clap = { version = "4", features = ["derive"] }
 fail = "0.5"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
-futures-async-stream = "0.2"
+futures-async-stream = "0.2.9"
 itertools = "0.11"
 parking_lot = "0.12"
 rand = "0.8"
@@ -47,7 +47,7 @@ futures = { version = "0.3", default-features = false, features = [
     "executor",
 ] }
 
-futures-async-stream = "0.2"
+futures-async-stream = "0.2.9"
 risingwave_test_runner = { workspace = true }
 serial_test = "2.0"
 sync-point = { path = "../../utils/sync-point" }
diff --git a/src/storage/hummock_test/src/bin/replay/main.rs b/src/storage/hummock_test/src/bin/replay/main.rs
index 1e9c9591bc864..ae6038d8b5d16 100644
--- a/src/storage/hummock_test/src/bin/replay/main.rs
+++ b/src/storage/hummock_test/src/bin/replay/main.rs
@@ -13,7 +13,7 @@
 // limitations under the License.
 
 #![feature(bound_map)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(stmt_expr_attributes)]
 #![feature(proc_macro_hygiene)]
 
diff --git a/src/storage/hummock_test/src/lib.rs b/src/storage/hummock_test/src/lib.rs
index 73e1d8cd0eaad..593771435f1e0 100644
--- a/src/storage/hummock_test/src/lib.rs
+++ b/src/storage/hummock_test/src/lib.rs
@@ -17,7 +17,6 @@
 #![feature(bound_map)]
 #![feature(type_alias_impl_trait)]
 #![feature(associated_type_bounds)]
-#![feature(return_position_impl_trait_in_trait)]
 
 #[cfg(test)]
 mod compactor_tests;
diff --git a/src/storage/hummock_trace/Cargo.toml b/src/storage/hummock_trace/Cargo.toml
index 46eabf17835e4..150b35b79cda0 100644
--- a/src/storage/hummock_trace/Cargo.toml
+++ b/src/storage/hummock_trace/Cargo.toml
@@ -14,7 +14,7 @@ bincode = { version = "=2.0.0-rc.3", features = ["serde"] }
 byteorder = "1"
 bytes = { version = "1", features = ["serde"] }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
-futures-async-stream = "0.2"
+futures-async-stream = "0.2.9"
 parking_lot = "0.12"
 prost = { workspace = true }
 risingwave_common = { workspace = true }
diff --git a/src/storage/hummock_trace/src/lib.rs b/src/storage/hummock_trace/src/lib.rs
index df757c58cc4fa..8c6c8913205ab 100644
--- a/src/storage/hummock_trace/src/lib.rs
+++ b/src/storage/hummock_trace/src/lib.rs
@@ -16,7 +16,7 @@
 #![feature(cursor_remaining)]
 #![feature(bound_map)]
 #![feature(trait_alias)]
-#![feature(generators)]
+#![feature(coroutines)]
 
 mod collector;
 mod error;
diff --git a/src/storage/src/hummock/compactor/compactor_runner.rs b/src/storage/src/hummock/compactor/compactor_runner.rs
index 583bab3d10b3c..a21016014d247 100644
--- a/src/storage/src/hummock/compactor/compactor_runner.rs
+++ b/src/storage/src/hummock/compactor/compactor_runner.rs
@@ -140,9 +140,6 @@ impl CompactorRunner {
         Ok((self.split_index, ssts, compaction_stat))
     }
 
-    // This is a clippy bug, see https://github.com/rust-lang/rust-clippy/issues/11380.
-    // TODO: remove `allow` here after the issued is closed.
-    #[expect(clippy::needless_pass_by_ref_mut)]
     pub async fn build_delete_range_iter<F: CompactionFilter>(
         sstable_infos: &Vec<SstableInfo>,
         sstable_store: &SstableStoreRef,
diff --git a/src/storage/src/hummock/compactor/mod.rs b/src/storage/src/hummock/compactor/mod.rs
index d2f36167675e7..137682d6f7825 100644
--- a/src/storage/src/hummock/compactor/mod.rs
+++ b/src/storage/src/hummock/compactor/mod.rs
@@ -325,7 +325,7 @@ impl Compactor {
 
 /// The background compaction thread that receives compaction tasks from hummock compaction
 /// manager and runs compaction tasks.
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 pub fn start_compactor(
     compactor_context: CompactorContext,
     hummock_meta_client: Arc<dyn HummockMetaClient>,
@@ -618,7 +618,7 @@ pub fn start_compactor(
 
 /// The background compaction thread that receives compaction tasks from hummock compaction
 /// manager and runs compaction tasks.
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 pub fn start_shared_compactor(
     grpc_proxy_client: GrpcCompactorProxyClient,
     mut receiver: mpsc::UnboundedReceiver<Request<DispatchCompactionTaskRequest>>,
diff --git a/src/storage/src/hummock/event_handler/uploader.rs b/src/storage/src/hummock/event_handler/uploader.rs
index 995a9d181e2f5..a07da55fb7046 100644
--- a/src/storage/src/hummock/event_handler/uploader.rs
+++ b/src/storage/src/hummock/event_handler/uploader.rs
@@ -1641,9 +1641,6 @@ mod tests {
         (buffer_tracker, uploader, new_task_notifier)
     }
 
-    // This is a clippy bug, see https://github.com/rust-lang/rust-clippy/issues/11380.
-    // TODO: remove `allow` here after the issued is closed.
-    #[expect(clippy::needless_pass_by_ref_mut)]
     async fn assert_uploader_pending(uploader: &mut HummockUploader) {
         for _ in 0..10 {
             yield_now().await;
diff --git a/src/storage/src/hummock/file_cache/store.rs b/src/storage/src/hummock/file_cache/store.rs
index 9de54552ae077..222700c8376b2 100644
--- a/src/storage/src/hummock/file_cache/store.rs
+++ b/src/storage/src/hummock/file_cache/store.rs
@@ -256,6 +256,7 @@ where
                     io_size: config.device_io_size,
                 },
                 allocator_bits: config.allocator_bits,
+                catalog_bits: 6,
                 admissions,
                 reinsertions: config.reinsertions,
                 buffer_pool_size: config.buffer_pool_size,
diff --git a/src/storage/src/lib.rs b/src/storage/src/lib.rs
index c5ffe656ab893..0f2f155f6a903 100644
--- a/src/storage/src/lib.rs
+++ b/src/storage/src/lib.rs
@@ -18,7 +18,7 @@
 #![feature(bound_map)]
 #![feature(custom_test_frameworks)]
 #![feature(extract_if)]
-#![feature(generators)]
+#![feature(coroutines)]
 #![feature(hash_extract_if)]
 #![feature(lint_reasons)]
 #![feature(proc_macro_hygiene)]
@@ -35,15 +35,13 @@
 #![feature(btree_extract_if)]
 #![feature(exact_size_is_empty)]
 #![feature(lazy_cell)]
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 #![recursion_limit = "256"]
 #![feature(error_generic_member_access)]
 #![feature(let_chains)]
 #![feature(associated_type_bounds)]
 #![feature(exclusive_range_pattern)]
 #![feature(impl_trait_in_assoc_type)]
-#![feature(async_fn_in_trait)]
-#![feature(return_position_impl_trait_in_trait)]
 
 pub mod hummock;
 pub mod memory;
diff --git a/src/storage/src/row_serde/value_serde.rs b/src/storage/src/row_serde/value_serde.rs
index 5d56cdba2d96d..9048b90c23a53 100644
--- a/src/storage/src/row_serde/value_serde.rs
+++ b/src/storage/src/row_serde/value_serde.rs
@@ -114,9 +114,10 @@ impl ValueRowSerdeNew for ColumnAwareSerde {
                     // It's okay since we previously banned impure expressions in default columns.
                     build_from_prost(&expr.expect("expr should not be none"))
                         .expect("build_from_prost error")
-                        .eval_row_infallible(&OwnedRow::empty())
+                        .eval_row(&OwnedRow::empty())
                         .now_or_never()
                         .expect("constant expression should not be async")
+                        .expect("eval_row failed")
                 };
                 Some((i, value))
             } else {
diff --git a/src/stream/clippy.toml b/src/stream/clippy.toml
index a6969d5bd607b..b7257c4acb98c 100644
--- a/src/stream/clippy.toml
+++ b/src/stream/clippy.toml
@@ -3,8 +3,8 @@ disallowed-methods = [
 
     { path = "risingwave_expr::expr::build_from_prost", reason = "Expressions in streaming must be in non-strict mode. Please use `build_non_strict_from_prost` instead." },
     { path = "risingwave_expr::expr::build_func", reason = "Expressions in streaming must be in non-strict mode. Please use `build_func_non_strict` instead." },
-    { path = "risingwave_expr::expr::Expression::eval", reason = "Please use `Expression::eval_infallible` instead." },
-    { path = "risingwave_expr::expr::Expression::eval_row", reason = "Please use `Expression::eval_row_infallible` instead." },
+    { path = "risingwave_expr::expr::Expression::eval", reason = "Please use `NonStrictExpression::eval_infallible` instead." },
+    { path = "risingwave_expr::expr::Expression::eval_row", reason = "Please use `NonStrictExpression::eval_row_infallible` instead." },
 
     { path = "risingwave_common::error::internal_err", reason = "Please use per-crate error type instead." },
     { path = "risingwave_common::error::internal_error", reason = "Please use per-crate error type instead." },
diff --git a/src/stream/src/common/table/state_table_cache.rs b/src/stream/src/common/table/state_table_cache.rs
index 156637a41a1a4..b458ef52537e4 100644
--- a/src/stream/src/common/table/state_table_cache.rs
+++ b/src/stream/src/common/table/state_table_cache.rs
@@ -67,9 +67,9 @@ type WatermarkCacheKey = DefaultOrdered<OwnedRow>;
 ///        Issue delete ranges.
 ///
 ///    B. Refreshing the cache:
-///        On barrier, do table scan from most_recently_cleaned_watermark (inclusive) to +inf.
+///        On barrier, do table scan from `most_recently_cleaned_watermark` (inclusive) to +inf.
 ///        Take the Top N rows and insert into cache.
-///        This has to be implemented in state_table.
+///        This has to be implemented in `state_table`.
 ///        We do not need to store any values, just the keys.
 ///
 /// TODO(kwannoel):
diff --git a/src/stream/src/executor/aggregation/mod.rs b/src/stream/src/executor/aggregation/mod.rs
index dd0ce9d01c544..9bb1113152962 100644
--- a/src/stream/src/executor/aggregation/mod.rs
+++ b/src/stream/src/executor/aggregation/mod.rs
@@ -21,6 +21,7 @@ use risingwave_common::bail;
 use risingwave_common::buffer::Bitmap;
 use risingwave_common::catalog::{Field, Schema};
 use risingwave_expr::aggregate::{AggCall, AggKind};
+use risingwave_expr::expr::{LogReport, NonStrictExpression};
 use risingwave_storage::StateStore;
 
 use crate::common::table::state_table::StateTable;
@@ -74,7 +75,12 @@ pub async fn agg_call_filter_res(
     }
 
     if let Some(ref filter) = agg_call.filter {
-        if let Bool(filter_res) = filter.eval_infallible(chunk).await.as_ref() {
+        // TODO: should we build `filter` in non-strict mode?
+        if let Bool(filter_res) = NonStrictExpression::new_topmost(&**filter, LogReport)
+            .eval_infallible(chunk)
+            .await
+            .as_ref()
+        {
             vis &= filter_res.to_bitmap();
         } else {
             bail!("Filter can only receive bool array");
diff --git a/src/stream/src/executor/backfill/utils.rs b/src/stream/src/executor/backfill/utils.rs
index 259b67d5f202b..663f9be94cf5e 100644
--- a/src/stream/src/executor/backfill/utils.rs
+++ b/src/stream/src/executor/backfill/utils.rs
@@ -309,9 +309,6 @@ pub(crate) async fn get_progress_per_vnode<S: StateStore, const IS_REPLICATED: b
 }
 
 /// Flush the data
-// This is a clippy bug, see https://github.com/rust-lang/rust-clippy/issues/11380.
-// TODO: remove `allow` here after the issued is closed.
-#[expect(clippy::needless_pass_by_ref_mut)]
 pub(crate) async fn flush_data<S: StateStore, const IS_REPLICATED: bool>(
     table: &mut StateTableInner<S, BasicSerde, IS_REPLICATED>,
     epoch: EpochPair,
diff --git a/src/stream/src/executor/dynamic_filter.rs b/src/stream/src/executor/dynamic_filter.rs
index e8eb4da545f2e..ccb55b75c24fc 100644
--- a/src/stream/src/executor/dynamic_filter.rs
+++ b/src/stream/src/executor/dynamic_filter.rs
@@ -26,7 +26,7 @@ use risingwave_common::row::{self, once, OwnedRow, OwnedRow as RowData, Row};
 use risingwave_common::types::{DataType, Datum, DefaultOrd, ScalarImpl, ToDatumRef, ToOwnedDatum};
 use risingwave_common::util::iter_util::ZipEqDebug;
 use risingwave_expr::expr::{
-    build_func_non_strict, BoxedExpression, InputRefExpression, LiteralExpression,
+    build_func_non_strict, InputRefExpression, LiteralExpression, NonStrictExpression,
 };
 use risingwave_pb::expr::expr_node::Type as ExprNodeType;
 use risingwave_pb::expr::expr_node::Type::{
@@ -97,7 +97,7 @@ impl<S: StateStore, const USE_WATERMARK_CACHE: bool> DynamicFilterExecutor<S, US
     async fn apply_batch(
         &mut self,
         chunk: &StreamChunk,
-        condition: Option<BoxedExpression>,
+        condition: Option<NonStrictExpression>,
     ) -> Result<(Vec<Op>, Bitmap), StreamExecutorError> {
         let mut new_ops = Vec::with_capacity(chunk.capacity());
         let mut new_visibility = BitmapBuilder::with_capacity(chunk.capacity());
@@ -265,7 +265,7 @@ impl<S: StateStore, const USE_WATERMARK_CACHE: bool> DynamicFilterExecutor<S, US
         let dynamic_cond = {
             let eval_error_report = ActorEvalErrorReport {
                 actor_context: self.ctx.clone(),
-                identity: self.identity.as_str().into(),
+                identity: Arc::from(self.identity.as_str()),
             };
             move |literal: Datum| {
                 literal.map(|scalar| {
diff --git a/src/stream/src/executor/filter.rs b/src/stream/src/executor/filter.rs
index ef593f8734284..1a1e645e44e6d 100644
--- a/src/stream/src/executor/filter.rs
+++ b/src/stream/src/executor/filter.rs
@@ -19,7 +19,7 @@ use risingwave_common::array::{Array, ArrayImpl, Op, StreamChunk};
 use risingwave_common::buffer::BitmapBuilder;
 use risingwave_common::catalog::Schema;
 use risingwave_common::util::iter_util::ZipEqFast;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::NonStrictExpression;
 
 use super::*;
 
@@ -34,14 +34,14 @@ pub struct FilterExecutor {
 
     /// Expression of the current filter, note that the filter must always have the same output for
     /// the same input.
-    expr: BoxedExpression,
+    expr: NonStrictExpression,
 }
 
 impl FilterExecutor {
     pub fn new(
         ctx: ActorContextRef,
         input: Box<dyn Executor>,
-        expr: BoxedExpression,
+        expr: NonStrictExpression,
         executor_id: u64,
     ) -> Self {
         let input_info = input.info();
@@ -190,8 +190,8 @@ mod tests {
     use risingwave_common::array::StreamChunk;
     use risingwave_common::catalog::{Field, Schema};
     use risingwave_common::types::DataType;
-    use risingwave_expr::expr::build_from_pretty;
 
+    use super::super::test_utils::expr::build_from_pretty;
     use super::super::test_utils::MockSource;
     use super::super::*;
     use super::*;
diff --git a/src/stream/src/executor/hash_join.rs b/src/stream/src/executor/hash_join.rs
index 7aed840679c82..75414fe24a379 100644
--- a/src/stream/src/executor/hash_join.rs
+++ b/src/stream/src/executor/hash_join.rs
@@ -28,7 +28,7 @@ use risingwave_common::row::{OwnedRow, Row};
 use risingwave_common::types::{DataType, DefaultOrd, ToOwnedDatum};
 use risingwave_common::util::epoch::EpochPair;
 use risingwave_common::util::iter_util::ZipEqDebug;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::NonStrictExpression;
 use risingwave_expr::ExprError;
 use risingwave_storage::StateStore;
 use tokio::time::Instant;
@@ -202,11 +202,11 @@ impl<K: HashKey, S: StateStore> std::fmt::Debug for JoinSide<K, S> {
 
 impl<K: HashKey, S: StateStore> JoinSide<K, S> {
     // WARNING: Please do not call this until we implement it.
-    #[expect(dead_code)]
     fn is_dirty(&self) -> bool {
         unimplemented!()
     }
 
+    #[expect(dead_code)]
     fn clear_cache(&mut self) {
         assert!(
             !self.is_dirty(),
@@ -242,9 +242,9 @@ pub struct HashJoinExecutor<K: HashKey, S: StateStore, const T: JoinTypePrimitiv
     /// The parameters of the right join executor
     side_r: JoinSide<K, S>,
     /// Optional non-equi join conditions
-    cond: Option<BoxedExpression>,
+    cond: Option<NonStrictExpression>,
     /// Column indices of watermark output and offset expression of each inequality, respectively.
-    inequality_pairs: Vec<(Vec<usize>, Option<BoxedExpression>)>,
+    inequality_pairs: Vec<(Vec<usize>, Option<NonStrictExpression>)>,
     /// The output watermark of each inequality condition and its value is the minimum of the
     /// calculation result of both side. It will be used to generate watermark into downstream
     /// and do state cleaning if `clean_state` field of that inequality is `true`.
@@ -313,7 +313,7 @@ struct EqJoinArgs<'a, K: HashKey, S: StateStore> {
     side_l: &'a mut JoinSide<K, S>,
     side_r: &'a mut JoinSide<K, S>,
     actual_output_data_types: &'a [DataType],
-    cond: &'a mut Option<BoxedExpression>,
+    cond: &'a mut Option<NonStrictExpression>,
     inequality_watermarks: &'a [Option<Watermark>],
     chunk: StreamChunk,
     append_only_optimize: bool,
@@ -448,8 +448,8 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
         pk_indices: PkIndices,
         output_indices: Vec<usize>,
         executor_id: u64,
-        cond: Option<BoxedExpression>,
-        inequality_pairs: Vec<(usize, usize, bool, Option<BoxedExpression>)>,
+        cond: Option<NonStrictExpression>,
+        inequality_pairs: Vec<(usize, usize, bool, Option<NonStrictExpression>)>,
         op_info: String,
         state_table_l: StateTable<S>,
         degree_state_table_l: StateTable<S>,
@@ -912,7 +912,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> HashJoinExecutor<K,
                 // allow since we will handle error manually.
                 #[allow(clippy::disallowed_methods)]
                 let eval_result = delta_expression
-                    .eval_row(&OwnedRow::new(vec![Some(input_watermark.val)]))
+                    .inner().eval_row(&OwnedRow::new(vec![Some(input_watermark.val)]))
                     .await;
                 match eval_result {
                     Ok(value) => input_watermark.val = value.unwrap(),
@@ -1275,11 +1275,11 @@ mod tests {
     use risingwave_common::hash::{Key128, Key64};
     use risingwave_common::types::ScalarImpl;
     use risingwave_common::util::sort_util::OrderType;
-    use risingwave_expr::expr::build_from_pretty;
     use risingwave_storage::memory::MemoryStateStore;
 
     use super::*;
     use crate::common::table::state_table::StateTable;
+    use crate::executor::test_utils::expr::build_from_pretty;
     use crate::executor::test_utils::{MessageSender, MockSource, StreamExecutorTestExt};
     use crate::executor::{ActorContext, Barrier, EpochPair};
 
@@ -1327,7 +1327,7 @@ mod tests {
         (state_table, degree_state_table)
     }
 
-    fn create_cond(condition_text: Option<String>) -> BoxedExpression {
+    fn create_cond(condition_text: Option<String>) -> NonStrictExpression {
         build_from_pretty(
             condition_text
                 .as_deref()
@@ -1339,7 +1339,7 @@ mod tests {
         with_condition: bool,
         null_safe: bool,
         condition_text: Option<String>,
-        inequality_pairs: Vec<(usize, usize, bool, Option<BoxedExpression>)>,
+        inequality_pairs: Vec<(usize, usize, bool, Option<NonStrictExpression>)>,
     ) -> (MessageSender, MessageSender, BoxedMessageStream) {
         let schema = Schema {
             fields: vec![
diff --git a/src/stream/src/executor/hop_window.rs b/src/stream/src/executor/hop_window.rs
index c6fffcd94896d..42d13d790da88 100644
--- a/src/stream/src/executor/hop_window.rs
+++ b/src/stream/src/executor/hop_window.rs
@@ -19,7 +19,7 @@ use futures_async_stream::try_stream;
 use itertools::Itertools;
 use risingwave_common::array::{DataChunk, Op};
 use risingwave_common::types::Interval;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::NonStrictExpression;
 use risingwave_expr::ExprError;
 
 use super::error::StreamExecutorError;
@@ -33,8 +33,8 @@ pub struct HopWindowExecutor {
     pub time_col_idx: usize,
     pub window_slide: Interval,
     pub window_size: Interval,
-    window_start_exprs: Vec<BoxedExpression>,
-    window_end_exprs: Vec<BoxedExpression>,
+    window_start_exprs: Vec<NonStrictExpression>,
+    window_end_exprs: Vec<NonStrictExpression>,
     pub output_indices: Vec<usize>,
     chunk_size: usize,
 }
@@ -48,8 +48,8 @@ impl HopWindowExecutor {
         time_col_idx: usize,
         window_slide: Interval,
         window_size: Interval,
-        window_start_exprs: Vec<BoxedExpression>,
-        window_end_exprs: Vec<BoxedExpression>,
+        window_start_exprs: Vec<NonStrictExpression>,
+        window_end_exprs: Vec<NonStrictExpression>,
         output_indices: Vec<usize>,
         chunk_size: usize,
     ) -> Self {
@@ -251,6 +251,7 @@ mod tests {
     use risingwave_common::types::test_utils::IntervalTestExt;
     use risingwave_common::types::{DataType, Interval};
     use risingwave_expr::expr::test_utils::make_hop_window_expression;
+    use risingwave_expr::expr::NonStrictExpression;
 
     use crate::executor::test_utils::MockSource;
     use crate::executor::{ActorContext, Executor, ExecutorInfo, StreamChunk};
@@ -302,8 +303,14 @@ mod tests {
             2,
             window_slide,
             window_size,
-            window_start_exprs,
-            window_end_exprs,
+            window_start_exprs
+                .into_iter()
+                .map(NonStrictExpression::for_test)
+                .collect(),
+            window_end_exprs
+                .into_iter()
+                .map(NonStrictExpression::for_test)
+                .collect(),
             output_indices,
             CHUNK_SIZE,
         )
diff --git a/src/stream/src/executor/integration_tests.rs b/src/stream/src/executor/integration_tests.rs
index a9c219a25641f..cd505093294f1 100644
--- a/src/stream/src/executor/integration_tests.rs
+++ b/src/stream/src/executor/integration_tests.rs
@@ -152,7 +152,7 @@ async fn test_merger_sum_aggr() {
         vec![],
         vec![
             // TODO: use the new streaming_if_null expression here, and add `None` tests
-            Box::new(InputRefExpression::new(DataType::Int64, 1)),
+            NonStrictExpression::for_test(InputRefExpression::new(DataType::Int64, 1)),
         ],
         3,
         MultiMap::new(),
diff --git a/src/stream/src/executor/mod.rs b/src/stream/src/executor/mod.rs
index 99b090e21a240..c28d6ec8564d9 100644
--- a/src/stream/src/executor/mod.rs
+++ b/src/stream/src/executor/mod.rs
@@ -31,7 +31,7 @@ use risingwave_common::util::epoch::{Epoch, EpochPair};
 use risingwave_common::util::tracing::TracingContext;
 use risingwave_common::util::value_encoding::{DatumFromProtoExt, DatumToProtoExt};
 use risingwave_connector::source::SplitImpl;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::{Expression, NonStrictExpression};
 use risingwave_pb::data::PbEpoch;
 use risingwave_pb::expr::PbInputRef;
 use risingwave_pb::stream_plan::barrier::{BarrierKind, PbMutation};
@@ -641,7 +641,7 @@ impl Watermark {
 
     pub async fn transform_with_expr(
         self,
-        expr: &BoxedExpression,
+        expr: &NonStrictExpression<impl Expression>,
         new_col_idx: usize,
     ) -> Option<Self> {
         let Self { col_idx, val, .. } = self;
@@ -651,7 +651,7 @@ impl Watermark {
             OwnedRow::new(row)
         };
         let val = expr.eval_row_infallible(&row).await?;
-        Some(Self::new(new_col_idx, expr.return_type(), val))
+        Some(Self::new(new_col_idx, expr.inner().return_type(), val))
     }
 
     /// Transform the watermark with the given output indices. If this watermark is not in the
diff --git a/src/stream/src/executor/project.rs b/src/stream/src/executor/project.rs
index 56a31bde901b9..8cfebfecd3f33 100644
--- a/src/stream/src/executor/project.rs
+++ b/src/stream/src/executor/project.rs
@@ -21,7 +21,7 @@ use risingwave_common::catalog::{Field, Schema};
 use risingwave_common::row::{Row, RowExt};
 use risingwave_common::types::ToOwnedDatum;
 use risingwave_common::util::iter_util::ZipEqFast;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::NonStrictExpression;
 
 use super::*;
 
@@ -38,7 +38,7 @@ struct Inner {
     info: ExecutorInfo,
 
     /// Expressions of the current projection.
-    exprs: Vec<BoxedExpression>,
+    exprs: Vec<NonStrictExpression>,
     /// All the watermark derivations, (input_column_index, output_column_index). And the
     /// derivation expression is the project's expression itself.
     watermark_derivations: MultiMap<usize, usize>,
@@ -58,7 +58,7 @@ impl ProjectExecutor {
         ctx: ActorContextRef,
         input: Box<dyn Executor>,
         pk_indices: PkIndices,
-        exprs: Vec<BoxedExpression>,
+        exprs: Vec<NonStrictExpression>,
         executor_id: u64,
         watermark_derivations: MultiMap<usize, usize>,
         nondecreasing_expr_indices: Vec<usize>,
@@ -233,11 +233,12 @@ mod tests {
     use risingwave_common::array::{DataChunk, StreamChunk};
     use risingwave_common::catalog::{Field, Schema};
     use risingwave_common::types::{DataType, Datum};
-    use risingwave_expr::expr::{self, build_from_pretty, Expression, ValueImpl};
+    use risingwave_expr::expr::{self, Expression, ValueImpl};
 
     use super::super::test_utils::MockSource;
     use super::super::*;
     use super::*;
+    use crate::executor::test_utils::expr::build_from_pretty;
     use crate::executor::test_utils::StreamExecutorTestExt;
 
     #[tokio::test]
@@ -345,7 +346,7 @@ mod tests {
 
         let a_expr = build_from_pretty("(add:int8 $0:int8 1:int8)");
         let b_expr = build_from_pretty("(subtract:int8 $0:int8 1:int8)");
-        let c_expr = DummyNondecreasingExpr.boxed();
+        let c_expr = NonStrictExpression::for_test(DummyNondecreasingExpr);
 
         let project = Box::new(ProjectExecutor::new(
             ActorContext::create(123),
diff --git a/src/stream/src/executor/project_set.rs b/src/stream/src/executor/project_set.rs
index 6867e3d55bfde..ff3214db88eaa 100644
--- a/src/stream/src/executor/project_set.rs
+++ b/src/stream/src/executor/project_set.rs
@@ -24,6 +24,7 @@ use risingwave_common::catalog::{Field, Schema};
 use risingwave_common::row::{Row, RowExt};
 use risingwave_common::types::{DataType, Datum, DatumRef, ToOwnedDatum};
 use risingwave_common::util::iter_util::ZipEqFast;
+use risingwave_expr::expr::{LogReport, NonStrictExpression};
 use risingwave_expr::table_function::ProjectSetSelectItem;
 
 use super::error::StreamExecutorError;
@@ -260,7 +261,11 @@ impl Inner {
                 ProjectSetSelectItem::Expr(expr) => {
                     watermark
                         .clone()
-                        .transform_with_expr(expr, expr_idx + PROJ_ROW_ID_OFFSET)
+                        .transform_with_expr(
+                            // TODO: should we build `expr` in non-strict mode?
+                            &NonStrictExpression::new_topmost(expr, LogReport),
+                            expr_idx + PROJ_ROW_ID_OFFSET,
+                        )
                         .await
                 }
                 ProjectSetSelectItem::TableFunction(_) => {
diff --git a/src/stream/src/executor/temporal_join.rs b/src/stream/src/executor/temporal_join.rs
index 3c8cde63c4ca9..82c1e56649672 100644
--- a/src/stream/src/executor/temporal_join.rs
+++ b/src/stream/src/executor/temporal_join.rs
@@ -32,7 +32,7 @@ use risingwave_common::hash::{HashKey, NullBitmap};
 use risingwave_common::row::{OwnedRow, Row, RowExt};
 use risingwave_common::types::DataType;
 use risingwave_common::util::iter_util::ZipEqDebug;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::NonStrictExpression;
 use risingwave_hummock_sdk::{HummockEpoch, HummockReadEpoch};
 use risingwave_storage::store::PrefetchOptions;
 use risingwave_storage::table::batch_table::storage_table::StorageTable;
@@ -57,7 +57,7 @@ pub struct TemporalJoinExecutor<K: HashKey, S: StateStore, const T: JoinTypePrim
     left_join_keys: Vec<usize>,
     right_join_keys: Vec<usize>,
     null_safe: Vec<bool>,
-    condition: Option<BoxedExpression>,
+    condition: Option<NonStrictExpression>,
     output_indices: Vec<usize>,
     pk_indices: PkIndices,
     schema: Schema,
@@ -338,7 +338,7 @@ impl<K: HashKey, S: StateStore, const T: JoinTypePrimitive> TemporalJoinExecutor
         left_join_keys: Vec<usize>,
         right_join_keys: Vec<usize>,
         null_safe: Vec<bool>,
-        condition: Option<BoxedExpression>,
+        condition: Option<NonStrictExpression>,
         pk_indices: PkIndices,
         output_indices: Vec<usize>,
         table_output_indices: Vec<usize>,
diff --git a/src/stream/src/executor/test_utils.rs b/src/stream/src/executor/test_utils.rs
index bb4864ac04ef8..13a9237cf0159 100644
--- a/src/stream/src/executor/test_utils.rs
+++ b/src/stream/src/executor/test_utils.rs
@@ -34,11 +34,11 @@ pub mod prelude {
     pub use risingwave_common::test_prelude::StreamChunkTestExt;
     pub use risingwave_common::types::DataType;
     pub use risingwave_common::util::sort_util::OrderType;
-    pub use risingwave_expr::expr::build_from_pretty;
     pub use risingwave_storage::memory::MemoryStateStore;
     pub use risingwave_storage::StateStore;
 
     pub use crate::common::table::state_table::StateTable;
+    pub use crate::executor::test_utils::expr::build_from_pretty;
     pub use crate::executor::test_utils::{MessageSender, MockSource, StreamExecutorTestExt};
     pub use crate::executor::{ActorContext, BoxedMessageStream, Executor, PkIndices};
 }
@@ -263,6 +263,14 @@ pub trait StreamExecutorTestExt: MessageStream + Unpin {
 // FIXME: implement on any `impl MessageStream` if the analyzer works well.
 impl StreamExecutorTestExt for BoxedMessageStream {}
 
+pub mod expr {
+    use risingwave_expr::expr::NonStrictExpression;
+
+    pub fn build_from_pretty(s: impl AsRef<str>) -> NonStrictExpression {
+        NonStrictExpression::for_test(risingwave_expr::expr::build_from_pretty(s))
+    }
+}
+
 pub mod agg_executor {
     use std::sync::atomic::AtomicU64;
     use std::sync::Arc;
diff --git a/src/stream/src/executor/values.rs b/src/stream/src/executor/values.rs
index 624b2531bf7bd..8c09b56aa3551 100644
--- a/src/stream/src/executor/values.rs
+++ b/src/stream/src/executor/values.rs
@@ -21,7 +21,7 @@ use risingwave_common::array::{DataChunk, Op, StreamChunk};
 use risingwave_common::catalog::Schema;
 use risingwave_common::ensure;
 use risingwave_common::util::iter_util::ZipEqFast;
-use risingwave_expr::expr::BoxedExpression;
+use risingwave_expr::expr::NonStrictExpression;
 use tokio::sync::mpsc::UnboundedReceiver;
 
 use super::{
@@ -40,7 +40,7 @@ pub struct ValuesExecutor {
     barrier_receiver: UnboundedReceiver<Barrier>,
     progress: CreateMviewProgress,
 
-    rows: vec::IntoIter<Vec<BoxedExpression>>,
+    rows: vec::IntoIter<Vec<NonStrictExpression>>,
     pk_indices: PkIndices,
     identity: String,
     schema: Schema,
@@ -51,7 +51,7 @@ impl ValuesExecutor {
     pub fn new(
         ctx: ActorContextRef,
         progress: CreateMviewProgress,
-        rows: Vec<Vec<BoxedExpression>>,
+        rows: Vec<Vec<NonStrictExpression>>,
         schema: Schema,
         barrier_receiver: UnboundedReceiver<Barrier>,
         executor_id: u64,
@@ -167,7 +167,7 @@ mod tests {
     };
     use risingwave_common::catalog::{Field, Schema};
     use risingwave_common::types::{DataType, ScalarImpl, StructType};
-    use risingwave_expr::expr::{BoxedExpression, LiteralExpression};
+    use risingwave_expr::expr::{BoxedExpression, LiteralExpression, NonStrictExpression};
     use tokio::sync::mpsc::unbounded_channel;
 
     use super::ValuesExecutor;
@@ -202,11 +202,11 @@ mod tests {
                     vec![],
                 ),
                 Some(ScalarImpl::Struct(value)),
-            )) as BoxedExpression,
+            )),
             Box::new(LiteralExpression::new(
                 DataType::Int64,
                 Some(ScalarImpl::Int64(0)),
-            )) as BoxedExpression,
+            )),
         ];
         let fields = exprs
             .iter() // for each column
@@ -215,7 +215,10 @@ mod tests {
         let values_executor_struct = ValuesExecutor::new(
             ActorContext::create(actor_id),
             progress,
-            vec![exprs],
+            vec![exprs
+                .into_iter()
+                .map(NonStrictExpression::for_test)
+                .collect()],
             Schema { fields },
             barrier_receiver,
             10005,
diff --git a/src/stream/src/executor/watermark_filter.rs b/src/stream/src/executor/watermark_filter.rs
index ad332112ef269..5e5454cecff93 100644
--- a/src/stream/src/executor/watermark_filter.rs
+++ b/src/stream/src/executor/watermark_filter.rs
@@ -23,7 +23,8 @@ use risingwave_common::row::{OwnedRow, Row};
 use risingwave_common::types::{DataType, DefaultOrd, ScalarImpl};
 use risingwave_common::{bail, row};
 use risingwave_expr::expr::{
-    build_func_non_strict, BoxedExpression, Expression, InputRefExpression, LiteralExpression,
+    build_func_non_strict, ExpressionBoxExt, InputRefExpression, LiteralExpression,
+    NonStrictExpression,
 };
 use risingwave_expr::Result as ExprResult;
 use risingwave_pb::expr::expr_node::Type;
@@ -44,7 +45,7 @@ use crate::task::ActorEvalErrorReport;
 pub struct WatermarkFilterExecutor<S: StateStore> {
     input: BoxedExecutor,
     /// The expression used to calculate the watermark value.
-    watermark_expr: BoxedExpression,
+    watermark_expr: NonStrictExpression,
     /// The column we should generate watermark and filter on.
     event_time_col_idx: usize,
     ctx: ActorContextRef,
@@ -55,7 +56,7 @@ pub struct WatermarkFilterExecutor<S: StateStore> {
 impl<S: StateStore> WatermarkFilterExecutor<S> {
     pub fn new(
         input: BoxedExecutor,
-        watermark_expr: BoxedExpression,
+        watermark_expr: NonStrictExpression,
         event_time_col_idx: usize,
         ctx: ActorContextRef,
         table: StateTable<S>,
@@ -298,7 +299,7 @@ impl<S: StateStore> WatermarkFilterExecutor<S> {
         event_time_col_idx: usize,
         watermark: ScalarImpl,
         eval_error_report: ActorEvalErrorReport,
-    ) -> ExprResult<BoxedExpression> {
+    ) -> ExprResult<NonStrictExpression> {
         build_func_non_strict(
             Type::GreaterThanOrEqual,
             DataType::Boolean,
@@ -350,11 +351,11 @@ mod tests {
     use risingwave_common::test_prelude::StreamChunkTestExt;
     use risingwave_common::types::Date;
     use risingwave_common::util::sort_util::OrderType;
-    use risingwave_expr::expr::build_from_pretty;
     use risingwave_storage::memory::MemoryStateStore;
     use risingwave_storage::table::Distribution;
 
     use super::*;
+    use crate::executor::test_utils::expr::build_from_pretty;
     use crate::executor::test_utils::{MessageSender, MockSource};
     use crate::executor::ActorContext;
 
diff --git a/src/stream/src/from_proto/hash_join.rs b/src/stream/src/from_proto/hash_join.rs
index 44799af9405c2..87174282e517a 100644
--- a/src/stream/src/from_proto/hash_join.rs
+++ b/src/stream/src/from_proto/hash_join.rs
@@ -18,7 +18,7 @@ use std::sync::Arc;
 use risingwave_common::hash::{HashKey, HashKeyDispatcher};
 use risingwave_common::types::DataType;
 use risingwave_expr::expr::{
-    build_func_non_strict, build_non_strict_from_prost, BoxedExpression, InputRefExpression,
+    build_func_non_strict, build_non_strict_from_prost, InputRefExpression, NonStrictExpression,
 };
 pub use risingwave_pb::expr::expr_node::Type as ExprType;
 use risingwave_pb::plan_common::JoinType as JoinTypeProto;
@@ -109,7 +109,8 @@ impl ExecutorBuilder for HashJoinExecutorBuilder {
                             build_non_strict_from_prost(
                                 delta_expression.delta.as_ref().unwrap(),
                                 params.eval_error_report.clone(),
-                            )?,
+                            )?
+                            .into_inner(),
                         ],
                         params.eval_error_report.clone(),
                     )?)
@@ -175,8 +176,8 @@ struct HashJoinExecutorDispatcherArgs<S: StateStore> {
     pk_indices: PkIndices,
     output_indices: Vec<usize>,
     executor_id: u64,
-    cond: Option<BoxedExpression>,
-    inequality_pairs: Vec<(usize, usize, bool, Option<BoxedExpression>)>,
+    cond: Option<NonStrictExpression>,
+    inequality_pairs: Vec<(usize, usize, bool, Option<NonStrictExpression>)>,
     op_info: String,
     state_table_l: StateTable<S>,
     degree_state_table_l: StateTable<S>,
diff --git a/src/stream/src/from_proto/source/fs_fetch.rs b/src/stream/src/from_proto/source/fs_fetch.rs
index ecf242f044b04..b6df84c8560e4 100644
--- a/src/stream/src/from_proto/source/fs_fetch.rs
+++ b/src/stream/src/from_proto/source/fs_fetch.rs
@@ -23,7 +23,8 @@ use risingwave_storage::StateStore;
 
 use crate::error::StreamResult;
 use crate::executor::{
-    BoxedExecutor, Executor, FsFetchExecutor, SourceStateTableHandler, StreamSourceCore,
+    BoxedExecutor, Executor, FlowControlExecutor, FsFetchExecutor, SourceStateTableHandler,
+    StreamSourceCore,
 };
 use crate::from_proto::ExecutorBuilder;
 use crate::task::{ExecutorParams, LocalStreamManagerCore};
@@ -99,7 +100,7 @@ impl ExecutorBuilder for FsFetchExecutorBuilder {
             state_table_handler,
         );
 
-        Ok(FsFetchExecutor::new(
+        let executor = FsFetchExecutor::new(
             params.actor_context,
             schema,
             params.pk_indices,
@@ -109,6 +110,11 @@ impl ExecutorBuilder for FsFetchExecutorBuilder {
             source_ctrl_opts,
             params.env.connector_params(),
         )
-        .boxed())
+        .boxed();
+
+        if let Ok(rate_limit) = source.get_rate_limit() {
+            return Ok(FlowControlExecutor::new(executor, *rate_limit).boxed());
+        }
+        Ok(executor)
     }
 }
diff --git a/src/stream/src/from_proto/temporal_join.rs b/src/stream/src/from_proto/temporal_join.rs
index 8b7b3b6af1335..58699089e8c27 100644
--- a/src/stream/src/from_proto/temporal_join.rs
+++ b/src/stream/src/from_proto/temporal_join.rs
@@ -18,7 +18,7 @@ use risingwave_common::catalog::{ColumnDesc, TableId, TableOption};
 use risingwave_common::hash::{HashKey, HashKeyDispatcher};
 use risingwave_common::types::DataType;
 use risingwave_common::util::sort_util::OrderType;
-use risingwave_expr::expr::{build_non_strict_from_prost, BoxedExpression};
+use risingwave_expr::expr::{build_non_strict_from_prost, NonStrictExpression};
 use risingwave_pb::plan_common::{JoinType as JoinTypeProto, StorageTableDesc};
 use risingwave_storage::table::batch_table::storage_table::StorageTable;
 use risingwave_storage::table::Distribution;
@@ -190,7 +190,7 @@ struct TemporalJoinExecutorDispatcherArgs<S: StateStore> {
     left_join_keys: Vec<usize>,
     right_join_keys: Vec<usize>,
     null_safe: Vec<bool>,
-    condition: Option<BoxedExpression>,
+    condition: Option<NonStrictExpression>,
     pk_indices: PkIndices,
     output_indices: Vec<usize>,
     table_output_indices: Vec<usize>,
diff --git a/src/stream/src/lib.rs b/src/stream/src/lib.rs
index 5a68b1b712b26..389dfae7b8c0c 100644
--- a/src/stream/src/lib.rs
+++ b/src/stream/src/lib.rs
@@ -21,8 +21,8 @@
 #![feature(let_chains)]
 #![feature(hash_extract_if)]
 #![feature(extract_if)]
-#![feature(generators)]
-#![feature(iter_from_generator)]
+#![feature(coroutines)]
+#![feature(iter_from_coroutine)]
 #![feature(proc_macro_hygiene)]
 #![feature(stmt_expr_attributes)]
 #![feature(allocator_api)]
@@ -36,13 +36,11 @@
 #![feature(bound_map)]
 #![feature(iter_order_by)]
 #![feature(exact_size_is_empty)]
-#![feature(return_position_impl_trait_in_trait)]
 #![feature(impl_trait_in_assoc_type)]
 #![feature(test)]
 #![feature(is_sorted)]
 #![feature(btree_cursors)]
 #![feature(assert_matches)]
-#![feature(async_fn_in_trait)]
 
 #[macro_use]
 extern crate tracing;
diff --git a/src/stream/tests/integration_tests/hash_agg.rs b/src/stream/tests/integration_tests/hash_agg.rs
index 1b61bc5cd1d7f..9f4908f252532 100644
--- a/src/stream/tests/integration_tests/hash_agg.rs
+++ b/src/stream/tests/integration_tests/hash_agg.rs
@@ -284,7 +284,7 @@ async fn test_hash_agg_emit_on_window_close() {
     };
 
     check_with_script(
-        || create_executor(),
+        create_executor,
         &format!(
             r###"
             - !barrier 1
diff --git a/src/stream/tests/integration_tests/hop_window.rs b/src/stream/tests/integration_tests/hop_window.rs
index 167857cc7d9fc..9d6d879240fc0 100644
--- a/src/stream/tests/integration_tests/hop_window.rs
+++ b/src/stream/tests/integration_tests/hop_window.rs
@@ -15,6 +15,7 @@
 use risingwave_common::types::test_utils::IntervalTestExt;
 use risingwave_common::types::{Interval, Timestamp};
 use risingwave_expr::expr::test_utils::make_hop_window_expression;
+use risingwave_expr::expr::NonStrictExpression;
 use risingwave_stream::executor::{ExecutorInfo, HopWindowExecutor};
 
 use crate::prelude::*;
@@ -55,8 +56,14 @@ fn create_executor(output_indices: Vec<usize>) -> (MessageSender, BoxedMessageSt
             TIME_COL_IDX,
             window_slide,
             window_size,
-            window_start_exprs,
-            window_end_exprs,
+            window_start_exprs
+                .into_iter()
+                .map(NonStrictExpression::for_test)
+                .collect(),
+            window_end_exprs
+                .into_iter()
+                .map(NonStrictExpression::for_test)
+                .collect(),
             output_indices,
             CHUNK_SIZE,
         )
diff --git a/src/stream/tests/integration_tests/project_set.rs b/src/stream/tests/integration_tests/project_set.rs
index bf1354c25b83b..61a879256108d 100644
--- a/src/stream/tests/integration_tests/project_set.rs
+++ b/src/stream/tests/integration_tests/project_set.rs
@@ -29,10 +29,10 @@ fn create_executor() -> (MessageSender, BoxedMessageStream) {
     };
     let (tx, source) = MockSource::channel(schema, PkIndices::new());
 
-    let test_expr = build_from_pretty("(add:int8 $0:int8 $1:int8)");
-    let test_expr_watermark = build_from_pretty("(add:int8 $0:int8 1:int8)");
-    let tf1 = repeat(build_from_pretty("1:int4"), 1);
-    let tf2 = repeat(build_from_pretty("2:int4"), 2);
+    let test_expr = build_from_pretty("(add:int8 $0:int8 $1:int8)").into_inner();
+    let test_expr_watermark = build_from_pretty("(add:int8 $0:int8 1:int8)").into_inner();
+    let tf1 = repeat(build_from_pretty("1:int4").into_inner(), 1);
+    let tf2 = repeat(build_from_pretty("2:int4").into_inner(), 2);
 
     let project_set = Box::new(ProjectSetExecutor::new(
         ActorContext::create(123),
diff --git a/src/tests/compaction_test/src/bin/compaction.rs b/src/tests/compaction_test/src/bin/compaction.rs
index 443b79ad625b8..d9ba16f7437b8 100644
--- a/src/tests/compaction_test/src/bin/compaction.rs
+++ b/src/tests/compaction_test/src/bin/compaction.rs
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 fn main() {
     use clap::Parser;
 
diff --git a/src/tests/compaction_test/src/bin/delete_range.rs b/src/tests/compaction_test/src/bin/delete_range.rs
index 348a71dc3cce5..592f61a3db4fa 100644
--- a/src/tests/compaction_test/src/bin/delete_range.rs
+++ b/src/tests/compaction_test/src/bin/delete_range.rs
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#![cfg_attr(coverage, feature(no_coverage))]
+#![cfg_attr(coverage, feature(coverage_attribute))]
 
-#[cfg_attr(coverage, no_coverage)]
+#[cfg_attr(coverage, coverage(off))]
 fn main() {
     use clap::Parser;
 
diff --git a/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs b/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs
index 1fd5c90e59e4b..89df82d4c21a0 100644
--- a/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs
+++ b/src/tests/simulation/tests/integration_tests/recovery/background_ddl.rs
@@ -61,6 +61,9 @@ async fn test_background_mv_barrier_recovery() -> Result<()> {
         .run("create materialized view m1 as select * from t1;")
         .await?;
 
+    // If the CN is killed before first barrier pass for the MV, the MV will be dropped.
+    // This is because it's table fragments will NOT be committed until first barrier pass.
+    sleep(Duration::from_secs(5)).await;
     kill_cn_and_wait_recover(&cluster).await;
 
     // Send some upstream updates.
diff --git a/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs b/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs
index c05e52c927424..776692b2fab90 100644
--- a/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs
+++ b/src/tests/simulation/tests/integration_tests/scale/cascade_materialized_view.rs
@@ -25,7 +25,7 @@ const ROOT_TABLE_CREATE: &str = "create table t1 (v1 int);";
 const MV1: &str = "create materialized view m1 as select * from t1 where v1 > 5;";
 const MV2: &str = "create materialized view m2 as select * from t1 where v1 > 10;";
 const MV3: &str = "create materialized view m3 as select * from m2 where v1 < 15;";
-const MV4: &str = "create materialized view m4 as select m1.v1 as m1v, m3.v1 as m3v from m1 join m3 on m1.v1 = m3.v1;";
+const MV4: &str = "create materialized view m4 as select m1.v1 as m1v, m3.v1 as m3v from m1 join m3 on m1.v1 = m3.v1 limit 100;";
 const MV5: &str = "create materialized view m5 as select * from m4;";
 
 #[tokio::test]
@@ -40,6 +40,7 @@ async fn test_simple_cascade_materialized_view() -> Result<()> {
         .locate_one_fragment([
             identity_contains("materialize"),
             no_identity_contains("chain"),
+            no_identity_contains("topn"),
             no_identity_contains("hashjoin"),
         ])
         .await?;
@@ -129,6 +130,7 @@ async fn test_diamond_cascade_materialized_view() -> Result<()> {
         .locate_one_fragment([
             identity_contains("materialize"),
             no_identity_contains("chain"),
+            no_identity_contains("topn"),
             no_identity_contains("hashjoin"),
         ])
         .await?;
diff --git a/src/tests/simulation/tests/integration_tests/scale/plan.rs b/src/tests/simulation/tests/integration_tests/scale/plan.rs
index c7244dc826b42..8b62a58998a3f 100644
--- a/src/tests/simulation/tests/integration_tests/scale/plan.rs
+++ b/src/tests/simulation/tests/integration_tests/scale/plan.rs
@@ -39,10 +39,7 @@ async fn test_resize_normal() -> Result<()> {
         .await?;
 
     let join_fragment = cluster
-        .locate_one_fragment([
-            identity_contains("hashJoin"),
-            identity_contains("materialize"),
-        ])
+        .locate_one_fragment([identity_contains("hashJoin")])
         .await?;
 
     let join_fragment_id = join_fragment.inner.fragment_id;
@@ -270,7 +267,7 @@ async fn test_resize_no_shuffle() -> Result<()> {
     session
         .run(
             "create materialized view mv7 as select mv1.v as mv1v, mv5.v as mv5v from mv1
-join mv5 on mv1.v = mv5.v;",
+join mv5 on mv1.v = mv5.v limit 1;",
         )
         .await?;
 
@@ -316,6 +313,7 @@ join mv5 on mv1.v = mv5.v;",
     let top_materialize_fragment = cluster
         .locate_one_fragment([
             identity_contains("materialize"),
+            no_identity_contains("topn"),
             no_identity_contains("chain"),
             no_identity_contains("hashJoin"),
         ])
diff --git a/src/utils/pgwire/src/lib.rs b/src/utils/pgwire/src/lib.rs
index 1cda373ee9568..84a17d9907879 100644
--- a/src/utils/pgwire/src/lib.rs
+++ b/src/utils/pgwire/src/lib.rs
@@ -17,8 +17,6 @@
 #![feature(result_option_inspect)]
 #![feature(iterator_try_collect)]
 #![feature(trusted_len)]
-#![feature(async_fn_in_trait)]
-#![feature(return_position_impl_trait_in_trait)]
 #![feature(lazy_cell)]
 #![expect(clippy::doc_markdown, reason = "FIXME: later")]
 
diff --git a/src/utils/pgwire/src/pg_response.rs b/src/utils/pgwire/src/pg_response.rs
index 29ea77f83b71b..eeec929732f50 100644
--- a/src/utils/pgwire/src/pg_response.rs
+++ b/src/utils/pgwire/src/pg_response.rs
@@ -92,6 +92,7 @@ pub enum StatementType {
     ROLLBACK,
     SET_TRANSACTION,
     CANCEL_COMMAND,
+    WAIT,
 }
 
 impl std::fmt::Display for StatementType {
@@ -278,6 +279,7 @@ impl StatementType {
             },
             Statement::Explain { .. } => Ok(StatementType::EXPLAIN),
             Statement::Flush => Ok(StatementType::FLUSH),
+            Statement::Wait => Ok(StatementType::WAIT),
             _ => Err("unsupported statement type".to_string()),
         }
     }
diff --git a/src/utils/workspace-config/Cargo.toml b/src/utils/workspace-config/Cargo.toml
index d8b2dd800ab1b..df70a2c6d0054 100644
--- a/src/utils/workspace-config/Cargo.toml
+++ b/src/utils/workspace-config/Cargo.toml
@@ -25,5 +25,10 @@ zstd-sys = { version = "2", optional = true, default-features = false, features
 # workspace-hack = { path = "../../workspace-hack" }
 # Don't add workspace-hack into this crate!
 
+# FIXME(xxchan): This is a temporary fix due to how cargo and hakari works. See related PR for more details.
+# We will revisit how to handle workspace-hack and build-dependency issues later.
+[build-dependencies]
+openssl-sys = { version = "=0.9.92", optional = true, features = ["vendored"] }
+
 [lints]
 workspace = true
diff --git a/src/workspace-hack/Cargo.toml b/src/workspace-hack/Cargo.toml
index 2ae671ca2de93..67b218c787652 100644
--- a/src/workspace-hack/Cargo.toml
+++ b/src/workspace-hack/Cargo.toml
@@ -37,7 +37,7 @@ combine = { version = "4", features = ["tokio"] }
 crossbeam-epoch = { version = "0.9" }
 crossbeam-queue = { version = "0.3" }
 crossbeam-utils = { version = "0.8" }
-deranged = { version = "0.3", default-features = false, features = ["serde", "std"] }
+deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] }
 digest = { version = "0.10", features = ["mac", "oid", "std"] }
 either = { version = "1", features = ["serde"] }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
@@ -53,7 +53,6 @@ futures-task = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14", features = ["nightly", "raw"] }
 hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12", features = ["nightly", "raw"] }
-heck = { version = "0.4", features = ["unicode"] }
 hyper = { version = "0.14", features = ["full"] }
 indexmap = { version = "1", default-features = false, features = ["serde", "std"] }
 itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10" }
@@ -113,11 +112,11 @@ sha1 = { version = "0.10" }
 sha2 = { version = "0.10" }
 signature = { version = "2", default-features = false, features = ["digest", "rand_core", "std"] }
 smallvec = { version = "1", default-features = false, features = ["serde", "union", "write"] }
-sqlx = { version = "0.7", features = ["bigdecimal", "chrono", "mysql", "postgres", "runtime-tokio-native-tls", "rust_decimal", "sqlite", "time", "uuid"] }
-sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-native-tls", "any", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
-sqlx-mysql = { version = "0.7", default-features = false, features = ["any", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
-sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
-sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "chrono", "json", "migrate", "offline", "time", "uuid"] }
+sqlx = { version = "0.7", default-features = false, features = ["bigdecimal", "chrono", "json", "mysql", "postgres", "runtime-tokio-native-tls", "rust_decimal", "sqlite", "time", "uuid"] }
+sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-native-tls", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
+sqlx-mysql = { version = "0.7", default-features = false, features = ["bigdecimal", "chrono", "json", "rust_decimal", "time", "uuid"] }
+sqlx-postgres = { version = "0.7", default-features = false, features = ["bigdecimal", "chrono", "json", "rust_decimal", "time", "uuid"] }
+sqlx-sqlite = { version = "0.7", default-features = false, features = ["chrono", "json", "time", "uuid"] }
 strum = { version = "0.25", features = ["derive"] }
 subtle = { version = "2" }
 time = { version = "0.3", features = ["local-offset", "macros", "serde-well-known"] }
@@ -145,47 +144,26 @@ ahash = { version = "0.8" }
 allocator-api2 = { version = "0.2", default-features = false, features = ["alloc", "nightly"] }
 anyhow = { version = "1", features = ["backtrace"] }
 auto_enums = { version = "0.8", features = ["futures03"] }
-base64 = { version = "0.21", features = ["alloc"] }
 bitflags = { version = "2", default-features = false, features = ["serde", "std"] }
-byteorder = { version = "1" }
 bytes = { version = "1", features = ["serde"] }
 cc = { version = "1", default-features = false, features = ["parallel"] }
-chrono = { version = "0.4", features = ["serde"] }
-crossbeam-queue = { version = "0.3" }
-crossbeam-utils = { version = "0.8" }
-deranged = { version = "0.3", default-features = false, features = ["serde", "std"] }
-digest = { version = "0.10", features = ["mac", "oid", "std"] }
+deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] }
 either = { version = "1", features = ["serde"] }
 fixedbitset = { version = "0.4" }
 frunk_core = { version = "0.4", default-features = false, features = ["std"] }
-futures-channel = { version = "0.3", features = ["sink"] }
-futures-core = { version = "0.3" }
-futures-io = { version = "0.3" }
-futures-sink = { version = "0.3" }
-futures-task = { version = "0.3" }
-futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14", features = ["nightly", "raw"] }
-heck = { version = "0.4", features = ["unicode"] }
 itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10" }
 itertools-a6292c17cd707f01 = { package = "itertools", version = "0.11" }
 lazy_static = { version = "1", default-features = false, features = ["spin_no_std"] }
 libc = { version = "0.2", features = ["extra_traits"] }
-lock_api = { version = "0.4", features = ["arc_lock"] }
 log = { version = "0.4", default-features = false, features = ["kv_unstable", "std"] }
-madsim-tokio = { version = "0.2", default-features = false, features = ["fs", "io-util", "macros", "net", "process", "rt", "rt-multi-thread", "signal", "sync", "time", "tracing"] }
-md-5 = { version = "0.10" }
-mio = { version = "0.8", features = ["net", "os-ext"] }
 nom = { version = "7" }
 num-bigint = { version = "0.4" }
 num-integer = { version = "0.1", features = ["i128"] }
-num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
 num-traits = { version = "0.2", features = ["i128", "libm"] }
-parking_lot = { version = "0.12", features = ["arc_lock", "deadlock_detection"] }
-parking_lot_core = { version = "0.9", default-features = false, features = ["deadlock_detection"] }
 petgraph = { version = "0.6" }
 phf = { version = "0.11", features = ["uncased"] }
 phf_shared = { version = "0.11", features = ["uncased"] }
-postgres-types = { version = "0.2", default-features = false, features = ["derive", "with-chrono-0_4", "with-serde_json-1"] }
 proc-macro2 = { version = "1", features = ["span-locations"] }
 prost-5ef9efb8ec2df382 = { package = "prost", version = "0.12", features = ["no-recursion-limit"] }
 prost-a6292c17cd707f01 = { package = "prost", version = "0.11" }
@@ -196,36 +174,13 @@ rand_core = { version = "0.6", default-features = false, features = ["std"] }
 regex = { version = "1" }
 regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
 regex-syntax = { version = "0.8" }
-rust_decimal = { version = "1", features = ["db-postgres", "maths"] }
-scopeguard = { version = "1" }
 serde = { version = "1", features = ["alloc", "derive", "rc"] }
 serde_json = { version = "1", features = ["alloc", "raw_value"] }
-sha1 = { version = "0.10" }
-sha2 = { version = "0.10" }
-signature = { version = "2", default-features = false, features = ["digest", "rand_core", "std"] }
-smallvec = { version = "1", default-features = false, features = ["serde", "union", "write"] }
-sqlx-core = { version = "0.7", features = ["_rt-tokio", "_tls-native-tls", "any", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
-sqlx-mysql = { version = "0.7", default-features = false, features = ["any", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
-sqlx-postgres = { version = "0.7", default-features = false, features = ["any", "bigdecimal", "chrono", "json", "migrate", "offline", "rust_decimal", "time", "uuid"] }
-sqlx-sqlite = { version = "0.7", default-features = false, features = ["any", "chrono", "json", "migrate", "offline", "time", "uuid"] }
-subtle = { version = "2" }
 syn-dff4ba8e3ae991db = { package = "syn", version = "1", features = ["extra-traits", "full", "visit", "visit-mut"] }
 syn-f595c2ba2a3f28df = { package = "syn", version = "2", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] }
 time = { version = "0.3", features = ["local-offset", "macros", "serde-well-known"] }
 time-macros = { version = "0.2", default-features = false, features = ["formatting", "parsing", "serde"] }
-tinyvec = { version = "1", features = ["alloc", "grab_spare_slice", "rustc_1_55"] }
-tokio = { version = "1", features = ["full", "stats", "tracing"] }
-tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88", features = ["with-chrono-0_4"] }
-tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "fe39bb8e", features = ["fs", "net"] }
-tokio-util = { version = "0.7", features = ["codec", "io"] }
 toml_datetime = { version = "0.6", default-features = false, features = ["serde"] }
 toml_edit = { version = "0.19", features = ["serde"] }
-tracing = { version = "0.1", features = ["log"] }
-tracing-core = { version = "0.1" }
-unicode-bidi = { version = "0.3" }
-unicode-normalization = { version = "0.1" }
-url = { version = "2", features = ["serde"] }
-uuid = { version = "1", features = ["fast-rng", "serde", "v4"] }
-whoami = { version = "1" }
 
 ### END HAKARI SECTION