diff --git a/Cargo.lock b/Cargo.lock
index 51381e82037a4..7183c990bfce9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1751,7 +1751,7 @@ dependencies = [
  "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
- "hyper 1.1.0",
+ "hyper 1.4.1",
  "hyper-util",
  "itoa",
  "matchit",
@@ -2791,22 +2791,22 @@ dependencies = [
 
 [[package]]
 name = "console-api"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a257c22cd7e487dd4a13d413beabc512c5052f0bc048db0da6a84c3d8a6142fd"
+checksum = "86ed14aa9c9f927213c6e4f3ef75faaad3406134efe84ba2cb7983431d5f0931"
 dependencies = [
  "futures-core",
- "prost 0.12.1",
- "prost-types 0.12.1",
- "tonic 0.11.0",
+ "prost 0.13.1",
+ "prost-types 0.13.1",
+ "tonic 0.12.1",
  "tracing-core",
 ]
 
 [[package]]
 name = "console-subscriber"
-version = "0.3.0"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31c4cc54bae66f7d9188996404abdf7fdfa23034ef8e43478c8810828abad758"
+checksum = "e2e3a111a37f3333946ebf9da370ba5c5577b18eb342ec683eb488dd21980302"
 dependencies = [
  "console-api",
  "crossbeam-channel",
@@ -2814,14 +2814,15 @@ dependencies = [
  "futures-task",
  "hdrhistogram",
  "humantime",
- "prost 0.12.1",
- "prost-types 0.12.1",
+ "hyper-util",
+ "prost 0.13.1",
+ "prost-types 0.13.1",
  "serde",
  "serde_json",
  "thread_local",
  "tokio",
- "tokio-stream",
- "tonic 0.11.0",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tonic 0.12.1",
  "tracing",
  "tracing-core",
  "tracing-subscriber",
@@ -4436,15 +4437,15 @@ dependencies = [
 
 [[package]]
 name = "etcd-client"
-version = "0.12.4"
+version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ae697f3928e8c89ae6f4dcf788059f49fd01a76dc53e63628f5a33881f5715e"
+checksum = "39bde3ce50a626efeb1caa9ab1083972d178bebb55ca627639c8ded507dfcbde"
 dependencies = [
- "http 0.2.9",
- "prost 0.12.1",
+ "http 1.1.0",
+ "prost 0.13.1",
  "tokio",
- "tokio-stream",
- "tonic 0.10.2",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tonic 0.12.1",
  "tonic-build",
  "tower",
  "tower-service",
@@ -5185,7 +5186,7 @@ dependencies = [
  "thiserror",
  "time",
  "tokio",
- "tokio-stream",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "url",
  "yup-oauth2",
 ]
@@ -5301,9 +5302,9 @@ dependencies = [
 
 [[package]]
 name = "google-cloud-auth"
-version = "0.15.0"
+version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e09ed5b2998bc8d0d3df09c859028210d4961b8fe779cfda8dc8ca4e83d5def2"
+checksum = "1112c453c2e155b3e683204ffff52bcc6d6495d04b68d9e90cd24161270c5058"
 dependencies = [
  "async-trait",
  "base64 0.21.7",
@@ -5323,9 +5324,9 @@ dependencies = [
 
 [[package]]
 name = "google-cloud-bigquery"
-version = "0.9.0"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e321c127945bb44a5cf5129c37530e2494b97afefe7f334a983ac754e40914e"
+checksum = "305cb7214d11b719e9f00f982c1ee1304c674f7a8dfc44a43b8bad3c909750c2"
 dependencies = [
  "anyhow",
  "arrow 50.0.0",
@@ -5350,29 +5351,29 @@ dependencies = [
 
 [[package]]
 name = "google-cloud-gax"
-version = "0.17.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cb60314136e37de9e2a05ddb427b9c5a39c3d188de2e2f026c6af74425eef44"
+checksum = "9c3eaaad103912825594d674a4b1e556ccbb05a13a6cac17dcfd871997fb760a"
 dependencies = [
  "google-cloud-token",
- "http 0.2.9",
+ "http 1.1.0",
  "thiserror",
  "tokio",
  "tokio-retry",
- "tonic 0.10.2",
+ "tonic 0.12.1",
  "tower",
  "tracing",
 ]
 
 [[package]]
 name = "google-cloud-googleapis"
-version = "0.13.0"
+version = "0.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32cd184c52aa2619ac1b16ad8b5a752e91d25be88a8cf08eaec19777dfacbe54"
+checksum = "0ae8ab26ef7c7c3f7dfb9cc3982293d031d8e78c85d00ddfb704b5c35aeff7c8"
 dependencies = [
- "prost 0.12.1",
- "prost-types 0.12.1",
- "tonic 0.10.2",
+ "prost 0.13.1",
+ "prost-types 0.13.1",
+ "tonic 0.12.1",
 ]
 
 [[package]]
@@ -5388,9 +5389,9 @@ dependencies = [
 
 [[package]]
 name = "google-cloud-pubsub"
-version = "0.25.0"
+version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a35e4a008db5cf01a5c03d3c67bd90b3cad77427ca949f3c8eddd90c4a3c932"
+checksum = "55ef73601dcec5ea144e59969e921d35d66000211603fee8023b7947af09248f"
 dependencies = [
  "async-channel 1.9.0",
  "async-stream",
@@ -5398,7 +5399,7 @@ dependencies = [
  "google-cloud-gax",
  "google-cloud-googleapis",
  "google-cloud-token",
- "prost-types 0.12.1",
+ "prost-types 0.13.1",
  "thiserror",
  "tokio",
  "tokio-util",
@@ -5407,9 +5408,9 @@ dependencies = [
 
 [[package]]
 name = "google-cloud-token"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fcd62eb34e3de2f085bcc33a09c3e17c4f65650f36d53eb328b00d63bcb536a"
+checksum = "8f49c12ba8b21d128a2ce8585955246977fbce4415f680ebf9199b6f9d6d725f"
 dependencies = [
  "async-trait",
 ]
@@ -5595,9 +5596,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.2"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
 
 [[package]]
 name = "hex"
@@ -5760,9 +5761,9 @@ dependencies = [
 
 [[package]]
 name = "hyper"
-version = "1.1.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb5aa53871fc917b1a9ed87b683a5d86db645e23acb32c2e0785a353e522fb75"
+checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
 dependencies = [
  "bytes",
  "futures-channel",
@@ -5774,6 +5775,7 @@ dependencies = [
  "httpdate",
  "itoa",
  "pin-project-lite",
+ "smallvec",
  "tokio",
  "want",
 ]
@@ -5803,7 +5805,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
 dependencies = [
  "futures-util",
  "http 1.1.0",
- "hyper 1.1.0",
+ "hyper 1.4.1",
  "hyper-util",
  "rustls 0.22.4",
  "rustls-pki-types",
@@ -5824,6 +5826,19 @@ dependencies = [
  "tokio-io-timeout",
 ]
 
+[[package]]
+name = "hyper-timeout"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
+dependencies = [
+ "hyper 1.4.1",
+ "hyper-util",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
 [[package]]
 name = "hyper-tls"
 version = "0.5.0"
@@ -5845,7 +5860,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
 dependencies = [
  "bytes",
  "http-body-util",
- "hyper 1.1.0",
+ "hyper 1.4.1",
  "hyper-util",
  "native-tls",
  "tokio",
@@ -5855,16 +5870,16 @@ dependencies = [
 
 [[package]]
 name = "hyper-util"
-version = "0.1.3"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
+checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956"
 dependencies = [
  "bytes",
  "futures-channel",
  "futures-util",
  "http 1.1.0",
  "http-body 1.0.0",
- "hyper 1.1.0",
+ "hyper 1.4.1",
  "pin-project-lite",
  "socket2 0.5.6",
  "tokio",
@@ -5977,8 +5992,8 @@ dependencies = [
 
 [[package]]
 name = "icelake"
-version = "0.0.10"
-source = "git+https://github.com/icelake-io/icelake?rev=07d53893d7788b4e41fc11efad8a6be828405c31#07d53893d7788b4e41fc11efad8a6be828405c31"
+version = "0.3.141592654"
+source = "git+https://github.com/risingwavelabs/icelake.git?rev=1860eb315183a5f3f72b4097c1e40d49407f8373#1860eb315183a5f3f72b4097c1e40d49407f8373"
 dependencies = [
  "anyhow",
  "apache-avro 0.17.0",
@@ -6814,13 +6829,13 @@ dependencies = [
 
 [[package]]
 name = "madsim-etcd-client"
-version = "0.4.0+0.12.1"
+version = "0.6.0+0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02b4b5de48bb7f3f7eae0bca62b3ed0b7d714b1b273d7347329b92c3a2eef113"
+checksum = "8edcf23498cb590e415ce2ba6c7f186c7aa3340e7aa716ddddb34faf0a9ffdfb"
 dependencies = [
  "etcd-client",
  "futures-util",
- "http 0.2.9",
+ "http 1.1.0",
  "madsim",
  "serde",
  "serde_with 3.8.0",
@@ -6828,7 +6843,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "toml 0.8.12",
- "tonic 0.10.2",
+ "tonic 0.12.1",
  "tracing",
 ]
 
@@ -6881,29 +6896,29 @@ dependencies = [
 
 [[package]]
 name = "madsim-tonic"
-version = "0.4.1+0.10.0"
+version = "0.5.1+0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "813977c7870103e113a0332d97731f961bc48aaa8860edd318ef7d7754214436"
+checksum = "61c668c82f0c2aca7ffed3235047f2539e6e41278c7c47a822999f3b7a067887"
 dependencies = [
  "async-stream",
  "chrono",
  "futures-util",
  "madsim",
  "tokio",
- "tonic 0.10.2",
+ "tonic 0.12.1",
  "tower",
  "tracing",
 ]
 
 [[package]]
 name = "madsim-tonic-build"
-version = "0.4.2+0.10.0"
+version = "0.5.0+0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a2ad2776ba20221ccbe4e136e2fa0f7ab90eebd608373177f3e74a198a288ec"
+checksum = "f271a476bbaa9d2139e1e1a5beb869c6119e805a0b67ad2b2857e4a8785b111a"
 dependencies = [
  "prettyplease 0.2.15",
  "proc-macro2",
- "prost-build 0.12.1",
+ "prost-build 0.13.1",
  "quote",
  "syn 2.0.66",
  "tonic-build",
@@ -7851,15 +7866,6 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
-[[package]]
-name = "openssl-src"
-version = "300.3.1+3.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91"
-dependencies = [
- "cc",
-]
-
 [[package]]
 name = "openssl-sys"
 version = "0.9.103"
@@ -7868,7 +7874,6 @@ checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6"
 dependencies = [
  "cc",
  "libc",
- "openssl-src",
  "pkg-config",
  "vcpkg",
 ]
@@ -7942,21 +7947,7 @@ dependencies = [
  "rand",
  "thiserror",
  "tokio",
- "tokio-stream",
-]
-
-[[package]]
-name = "opentls"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f561874f8d6ecfb674fc08863414040c93cc90c0b6963fe679895fab8b65560"
-dependencies = [
- "futures-util",
- "log",
- "openssl",
- "openssl-probe",
- "openssl-sys",
- "url",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -8009,20 +8000,20 @@ dependencies = [
 [[package]]
 name = "otlp-embedded"
 version = "0.0.1"
-source = "git+https://github.com/risingwavelabs/otlp-embedded?rev=492c244e0be91feb659c0cd48a624bbd96045a33#492c244e0be91feb659c0cd48a624bbd96045a33"
+source = "git+https://github.com/risingwavelabs/otlp-embedded?rev=e6cd165b9bc85783b42c106e99186b86b73e3507#e6cd165b9bc85783b42c106e99186b86b73e3507"
 dependencies = [
  "axum 0.7.4",
  "datasize",
  "hex",
- "itertools 0.12.1",
- "madsim-tonic",
- "madsim-tonic-build",
- "prost 0.12.1",
+ "itertools 0.13.0",
+ "prost 0.13.1",
  "rust-embed",
  "schnellru",
  "serde",
  "serde_json",
  "tokio",
+ "tonic 0.12.1",
+ "tonic-build",
  "tracing",
 ]
 
@@ -8959,6 +8950,16 @@ dependencies = [
  "prost-derive 0.12.1",
 ]
 
+[[package]]
+name = "prost"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc"
+dependencies = [
+ "bytes",
+ "prost-derive 0.13.1",
+]
+
 [[package]]
 name = "prost-build"
 version = "0.11.9"
@@ -9003,6 +9004,27 @@ dependencies = [
  "which",
 ]
 
+[[package]]
+name = "prost-build"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bb182580f71dd070f88d01ce3de9f4da5021db7115d2e1c3605a754153b77c1"
+dependencies = [
+ "bytes",
+ "heck 0.5.0",
+ "itertools 0.13.0",
+ "log",
+ "multimap 0.10.0",
+ "once_cell",
+ "petgraph",
+ "prettyplease 0.2.15",
+ "prost 0.13.1",
+ "prost-types 0.13.1",
+ "regex",
+ "syn 2.0.66",
+ "tempfile",
+]
+
 [[package]]
 name = "prost-derive"
 version = "0.11.9"
@@ -9029,6 +9051,19 @@ dependencies = [
  "syn 2.0.66",
 ]
 
+[[package]]
+name = "prost-derive"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca"
+dependencies = [
+ "anyhow",
+ "itertools 0.13.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.66",
+]
+
 [[package]]
 name = "prost-helpers"
 version = "0.1.0"
@@ -9040,13 +9075,13 @@ dependencies = [
 
 [[package]]
 name = "prost-reflect"
-version = "0.13.0"
+version = "0.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ae9372e3227f3685376a0836e5c248611eafc95a0be900d44bc6cdf225b700f"
+checksum = "55a6a9143ae25c25fa7b6a48d6cc08b10785372060009c25140a4e7c340e95af"
 dependencies = [
  "once_cell",
- "prost 0.12.1",
- "prost-types 0.12.1",
+ "prost 0.13.1",
+ "prost-types 0.13.1",
 ]
 
 [[package]]
@@ -9067,6 +9102,15 @@ dependencies = [
  "prost 0.12.1",
 ]
 
+[[package]]
+name = "prost-types"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2"
+dependencies = [
+ "prost 0.13.1",
+]
+
 [[package]]
 name = "protobuf"
 version = "2.28.0"
@@ -9184,7 +9228,7 @@ dependencies = [
  "indoc",
  "libc",
  "memoffset",
- "parking_lot 0.11.2",
+ "parking_lot 0.12.1",
  "portable-atomic",
  "pyo3-build-config",
  "pyo3-ffi",
@@ -9647,7 +9691,7 @@ dependencies = [
  "http 1.1.0",
  "http-body 1.0.0",
  "http-body-util",
- "hyper 1.1.0",
+ "hyper 1.4.1",
  "hyper-rustls 0.26.0",
  "hyper-tls 0.6.0",
  "hyper-util",
@@ -9837,7 +9881,7 @@ dependencies = [
  "bytes",
  "itertools 0.12.1",
  "parking_lot 0.12.1",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_hummock_sdk",
  "risingwave_meta_model_v2",
@@ -9879,7 +9923,7 @@ dependencies = [
  "parquet 52.0.0",
  "paste",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "risingwave_common",
  "risingwave_common_estimate_size",
@@ -9899,7 +9943,7 @@ dependencies = [
  "thiserror-ext",
  "tikv-jemallocator",
  "tokio-metrics",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tokio-util",
  "tracing",
  "twox-hash",
@@ -9942,7 +9986,7 @@ dependencies = [
  "serde",
  "serde_yaml",
  "thiserror-ext",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "toml 0.8.12",
  "tracing",
  "tracing-subscriber",
@@ -10048,7 +10092,7 @@ dependencies = [
  "governor",
  "hashbrown 0.14.3",
  "hex",
- "http 0.2.9",
+ "http 1.1.0",
  "http-body 0.4.5",
  "humantime",
  "hytra",
@@ -10074,7 +10118,7 @@ dependencies = [
  "pretty_assertions",
  "procfs 0.16.0",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "regex",
  "reqwest 0.12.4",
@@ -10156,14 +10200,18 @@ dependencies = [
 name = "risingwave_common_metrics"
 version = "1.11.0-alpha"
 dependencies = [
+ "auto_impl",
  "bytes",
  "clap",
  "darwin-libproc",
  "easy-ext",
  "futures",
  "http 0.2.9",
- "http-body 0.4.5",
+ "http 1.1.0",
+ "http-body 1.0.0",
  "hyper 0.14.27",
+ "hyper 1.4.1",
+ "hyper-util",
  "hytra",
  "itertools 0.12.1",
  "libc",
@@ -10204,7 +10252,7 @@ dependencies = [
  "anyhow",
  "bincode 1.3.3",
  "parking_lot 0.12.1",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_pb",
  "serde",
  "thiserror",
@@ -10219,7 +10267,7 @@ dependencies = [
  "async-trait",
  "axum 0.7.4",
  "futures",
- "hyper 0.14.27",
+ "http 1.1.0",
  "madsim-tokio",
  "madsim-tonic",
  "prometheus",
@@ -10273,7 +10321,7 @@ dependencies = [
  "madsim-tokio",
  "madsim-tonic",
  "parking_lot 0.12.1",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_common_heap_profiling",
  "risingwave_common_service",
@@ -10299,14 +10347,15 @@ dependencies = [
  "foyer",
  "futures",
  "futures-async-stream",
- "hyper 0.14.27",
+ "http 1.1.0",
+ "hyper 1.4.1",
  "itertools 0.12.1",
  "madsim-tokio",
  "madsim-tonic",
  "maplit",
  "pprof",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "risingwave_batch",
  "risingwave_common",
@@ -10325,7 +10374,7 @@ dependencies = [
  "tempfile",
  "thiserror-ext",
  "tikv-jemalloc-ctl",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tower",
  "tracing",
  "uuid",
@@ -10412,10 +10461,10 @@ dependencies = [
  "postgres-openssl",
  "pretty_assertions",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "prost-build 0.12.1",
  "prost-reflect",
- "prost-types 0.12.1",
+ "prost-types 0.13.1",
  "protobuf-native",
  "protobuf-src",
  "pulsar",
@@ -10455,7 +10504,7 @@ dependencies = [
  "time",
  "tokio-postgres",
  "tokio-retry",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tokio-util",
  "tracing",
  "tracing-subscriber",
@@ -10513,7 +10562,7 @@ dependencies = [
  "madsim-tokio",
  "madsim-tonic",
  "memcomparable",
- "prost 0.12.1",
+ "prost 0.13.1",
  "regex",
  "risingwave_common",
  "risingwave_connector",
@@ -10753,7 +10802,7 @@ dependencies = [
  "pretty-xmlish",
  "pretty_assertions",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "risingwave_batch",
  "risingwave_common",
@@ -10780,7 +10829,7 @@ dependencies = [
  "tempfile",
  "thiserror",
  "thiserror-ext",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tracing",
  "uuid",
  "workspace-hack",
@@ -10805,7 +10854,7 @@ dependencies = [
  "hex",
  "itertools 0.12.1",
  "parse-display",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_common_estimate_size",
  "risingwave_pb",
@@ -10863,7 +10912,7 @@ dependencies = [
  "madsim-tokio",
  "mockall",
  "parking_lot 0.12.1",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_hummock_sdk",
  "risingwave_pb",
@@ -10882,7 +10931,7 @@ dependencies = [
  "futures",
  "jni",
  "madsim-tokio",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_expr",
  "risingwave_hummock_sdk",
@@ -10912,7 +10961,7 @@ dependencies = [
  "jni",
  "madsim-tokio",
  "paste",
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_expr",
  "risingwave_hummock_sdk",
@@ -10979,7 +11028,7 @@ dependencies = [
  "function_name",
  "futures",
  "hex",
- "hyper 0.14.27",
+ "http 1.1.0",
  "itertools 0.12.1",
  "jsonbb",
  "madsim-etcd-client",
@@ -10994,7 +11043,7 @@ dependencies = [
  "parking_lot 0.12.1",
  "prometheus",
  "prometheus-http-query",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "risingwave_backup",
  "risingwave_common",
@@ -11021,7 +11070,7 @@ dependencies = [
  "thiserror",
  "thiserror-ext",
  "tokio-retry",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tower",
  "tower-http",
  "tracing",
@@ -11067,7 +11116,7 @@ dependencies = [
 name = "risingwave_meta_model_v2"
 version = "1.11.0-alpha"
 dependencies = [
- "prost 0.12.1",
+ "prost 0.13.1",
  "risingwave_common",
  "risingwave_hummock_sdk",
  "risingwave_pb",
@@ -11121,7 +11170,7 @@ dependencies = [
  "itertools 0.12.1",
  "madsim-tokio",
  "madsim-tonic",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "regex",
  "risingwave_common",
@@ -11134,7 +11183,7 @@ dependencies = [
  "serde_json",
  "sync-point",
  "thiserror-ext",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tracing",
  "workspace-hack",
 ]
@@ -11186,8 +11235,8 @@ dependencies = [
  "madsim-tonic-build",
  "pbjson",
  "pbjson-build",
- "prost 0.12.1",
- "prost-build 0.12.1",
+ "prost 0.13.1",
+ "prost-build 0.13.1",
  "prost-helpers",
  "risingwave_error",
  "serde",
@@ -11242,8 +11291,8 @@ dependencies = [
  "easy-ext",
  "either",
  "futures",
- "http 0.2.9",
- "hyper 0.14.27",
+ "http 1.1.0",
+ "hyper 1.4.1",
  "itertools 0.12.1",
  "lru 0.7.6",
  "madsim-tokio",
@@ -11260,7 +11309,7 @@ dependencies = [
  "thiserror",
  "thiserror-ext",
  "tokio-retry",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tower",
  "tracing",
  "url",
@@ -11291,6 +11340,7 @@ dependencies = [
  "rlimit",
  "thiserror-ext",
  "time",
+ "tokio",
  "tracing",
  "tracing-opentelemetry",
  "tracing-subscriber",
@@ -11345,7 +11395,7 @@ dependencies = [
  "tempfile",
  "tikv-jemallocator",
  "tokio-postgres",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tracing",
  "tracing-subscriber",
 ]
@@ -11413,7 +11463,7 @@ dependencies = [
  "serde",
  "serde_with 3.8.0",
  "tokio-postgres",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "toml 0.8.12",
  "tracing",
  "workspace-hack",
@@ -11460,7 +11510,7 @@ dependencies = [
  "parking_lot 0.12.1",
  "procfs 0.16.0",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "risingwave_backup",
  "risingwave_common",
@@ -11529,7 +11579,7 @@ dependencies = [
  "pin-project",
  "prehash",
  "prometheus",
- "prost 0.12.1",
+ "prost 0.13.1",
  "rand",
  "risingwave_common",
  "risingwave_common_estimate_size",
@@ -11554,7 +11604,7 @@ dependencies = [
  "thiserror-ext",
  "tokio-metrics",
  "tokio-retry",
- "tokio-stream",
+ "tokio-stream 0.1.15 (git+https://github.com/madsim-rs/tokio.git?rev=0dd1055)",
  "tracing",
  "tracing-test",
  "workspace-hack",
@@ -12100,9 +12150,9 @@ dependencies = [
 
 [[package]]
 name = "schnellru"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "772575a524feeb803e5b0fcbc6dd9f367e579488197c94c6e4023aad2305774d"
+checksum = "c9a8ef13a93c54d20580de1e5c413e624e53121d42fc7e2c11d10ef7f8b02367"
 dependencies = [
  "ahash 0.8.11",
  "cfg-if",
@@ -13145,7 +13195,7 @@ dependencies = [
  "thiserror",
  "time",
  "tokio",
- "tokio-stream",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "tracing",
  "url",
  "uuid",
@@ -13740,13 +13790,15 @@ dependencies = [
  "futures-util",
  "num-traits",
  "once_cell",
- "opentls",
  "pin-project-lite",
  "pretty-hex",
  "rust_decimal",
+ "rustls-native-certs 0.6.3",
+ "rustls-pemfile 1.0.4",
  "thiserror",
  "time",
  "tokio",
+ "tokio-rustls 0.24.1",
  "tokio-util",
  "tracing",
  "uuid",
@@ -13849,9 +13901,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.37.0"
+version = "1.38.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
+checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a"
 dependencies = [
  "backtrace",
  "bytes",
@@ -13879,9 +13931,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.2.0"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
+checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -13897,7 +13949,7 @@ dependencies = [
  "futures-util",
  "pin-project-lite",
  "tokio",
- "tokio-stream",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -13991,8 +14043,19 @@ dependencies = [
 
 [[package]]
 name = "tokio-stream"
-version = "0.1.14"
-source = "git+https://github.com/madsim-rs/tokio.git?rev=fe39bb8e#fe39bb8e8ab0ed96ee1b4477ab5508c20ce017fb"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.15"
+source = "git+https://github.com/madsim-rs/tokio.git?rev=0dd1055#0dd105567b323c863c29f794d2221ed588956d8d"
 dependencies = [
  "futures-core",
  "madsim-tokio",
@@ -14095,12 +14158,11 @@ dependencies = [
  "axum 0.6.20",
  "base64 0.21.7",
  "bytes",
- "flate2",
  "h2 0.3.26",
  "http 0.2.9",
  "http-body 0.4.5",
  "hyper 0.14.27",
- "hyper-timeout",
+ "hyper-timeout 0.4.1",
  "percent-encoding",
  "pin-project",
  "prost 0.12.1",
@@ -14108,12 +14170,11 @@ dependencies = [
  "rustls-pemfile 1.0.4",
  "tokio",
  "tokio-rustls 0.24.1",
- "tokio-stream",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "tower",
  "tower-layer",
  "tower-service",
  "tracing",
- "webpki-roots 0.25.2",
 ]
 
 [[package]]
@@ -14131,27 +14192,61 @@ dependencies = [
  "http 0.2.9",
  "http-body 0.4.5",
  "hyper 0.14.27",
- "hyper-timeout",
+ "hyper-timeout 0.4.1",
  "percent-encoding",
  "pin-project",
  "prost 0.12.1",
  "tokio",
- "tokio-stream",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
  "tower",
  "tower-layer",
  "tower-service",
  "tracing",
 ]
 
+[[package]]
+name = "tonic"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38659f4a91aba8598d27821589f5db7dddd94601e7a01b1e485a50e5484c7401"
+dependencies = [
+ "async-stream",
+ "async-trait",
+ "axum 0.7.4",
+ "base64 0.22.0",
+ "bytes",
+ "flate2",
+ "h2 0.4.4",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "hyper 1.4.1",
+ "hyper-timeout 0.5.1",
+ "hyper-util",
+ "percent-encoding",
+ "pin-project",
+ "prost 0.13.1",
+ "rustls-pemfile 2.1.1",
+ "socket2 0.5.6",
+ "tokio",
+ "tokio-rustls 0.26.0",
+ "tokio-stream 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "webpki-roots 0.26.1",
+]
+
 [[package]]
 name = "tonic-build"
-version = "0.10.2"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d021fc044c18582b9a2408cd0dd05b1596e3ecdb5c4df822bb0183545683889"
+checksum = "568392c5a2bd0020723e3f387891176aabafe36fd9fcd074ad309dfa0c8eb964"
 dependencies = [
  "prettyplease 0.2.15",
  "proc-macro2",
- "prost-build 0.12.1",
+ "prost-build 0.13.1",
  "quote",
  "syn 2.0.66",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 53dfaef0ac595..5bfab4feb27fb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -121,7 +121,7 @@ aws-smithy-types = { version = "1", default-features = false, features = [
 aws-endpoint = "0.60"
 aws-types = "1"
 axum = "=0.7.4" # TODO: 0.7.5+ does not work with current toolchain
-etcd-client = { package = "madsim-etcd-client", version = "0.4" }
+etcd-client = { package = "madsim-etcd-client", version = "0.6" }
 futures-async-stream = "0.2.9"
 hytra = "0.1"
 rdkafka = { package = "madsim-rdkafka", version = "0.4.1", features = [
@@ -129,12 +129,12 @@ rdkafka = { package = "madsim-rdkafka", version = "0.4.1", features = [
 ] }
 hashbrown = { version = "0.14", features = ["ahash", "inline-more", "nightly"] }
 criterion = { version = "0.5", features = ["async_futures"] }
-tonic = { package = "madsim-tonic", version = "0.4.1" }
-tonic-build = { package = "madsim-tonic-build", version = "0.4.2" }
-otlp-embedded = { git = "https://github.com/risingwavelabs/otlp-embedded", rev = "492c244e0be91feb659c0cd48a624bbd96045a33" }
-prost = { version = "0.12" }
-prost-build = { version = "0.12" }
-icelake = { git = "https://github.com/icelake-io/icelake", rev = "07d53893d7788b4e41fc11efad8a6be828405c31", features = [
+tonic = { package = "madsim-tonic", version = "0.5.1" }
+tonic-build = { package = "madsim-tonic-build", version = "0.5" }
+otlp-embedded = { git = "https://github.com/risingwavelabs/otlp-embedded", rev = "e6cd165b9bc85783b42c106e99186b86b73e3507" }
+prost = { version = "0.13" }
+prost-build = { version = "0.13" }
+icelake = { git = "https://github.com/risingwavelabs/icelake.git", rev = "1860eb315183a5f3f72b4097c1e40d49407f8373", features = [
     "prometheus",
 ] }
 arrow-array-iceberg = { package = "arrow-array", version = "52" }
@@ -180,6 +180,7 @@ tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git"
     "profiling",
     "stats",
 ], rev = "64a2d9" }
+# TODO(http-bump): bump to use tonic 0.12 once minitrace-opentelemetry is updated
 opentelemetry = "0.23"
 opentelemetry-otlp = "0.16"
 opentelemetry_sdk = { version = "0.23", default-features = false }
@@ -195,6 +196,7 @@ sea-orm = { version = "0.12.14", features = [
     "runtime-tokio-native-tls",
 ] }
 sqlx = "0.7"
+tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "0dd1055", features = ["net", "fs"] }
 tokio-util = "0.7"
 tracing-opentelemetry = "0.24"
 rand = { version = "0.8", features = ["small_rng"] }
@@ -335,7 +337,9 @@ opt-level = 2
 # Patch third-party crates for deterministic simulation.
 quanta = { git = "https://github.com/madsim-rs/quanta.git", rev = "948bdc3" }
 getrandom = { git = "https://github.com/madsim-rs/getrandom.git", rev = "e79a7ae" }
-tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "fe39bb8e" }
+# Don't patch `tokio-stream`, but only use the madsim version for **direct** dependencies.
+# Imagine an unpatched dependency depends on the original `tokio` and the patched `tokio-stream`.
+# tokio-stream = { git = "https://github.com/madsim-rs/tokio.git", rev = "0dd1055" }
 tokio-retry = { git = "https://github.com/madsim-rs/rust-tokio-retry.git", rev = "95e2fd3" }
 tokio-postgres = { git = "https://github.com/madsim-rs/rust-postgres.git", rev = "ac00d88" }
 futures-timer = { git = "https://github.com/madsim-rs/futures-timer.git", rev = "05b33b4" }
diff --git a/README.md b/README.md
index bf50ae208a972..07d12e99223ef 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@
 
 RisingWave is a Postgres-compatible SQL engine engineered to provide the <i><b>simplest</b></i> and <i><b>most cost-efficient</b></i> approach for <b>processing</b>, <b>analyzing</b>, and <b>managing</b> real-time event streaming data.
 
-![RisingWave](https://github.com/risingwavelabs/risingwave/assets/41638002/10c44404-f78b-43ce-bbd9-3646690acc59)
+![RisingWave](./docs/dev/src/images/architecture_20240814.png)
 
 ## When to use RisingWave?
 RisingWave can ingest millions of events per second, continuously join live data streams with historical tables, and serve ad-hoc queries in real-time. Typical use cases include, but are not limited to:
diff --git a/ci/Dockerfile b/ci/Dockerfile
index b14454859a791..95dbb5205c754 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -11,7 +11,7 @@ ENV LANG en_US.utf8
 # Use AWS ubuntu mirror
 RUN sed -i 's|http://archive.ubuntu.com/ubuntu|http://us-east-2.ec2.archive.ubuntu.com/ubuntu/|g' /etc/apt/sources.list
 RUN apt-get update -yy && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install sudo make build-essential cmake protobuf-compiler curl parallel python3 python3-pip python3-venv software-properties-common \
+    DEBIAN_FRONTEND=noninteractive apt-get -y install sudo make build-essential cmake protobuf-compiler curl parallel python3 python3-pip python3-venv software-properties-common psmisc \
     openssl libssl-dev libsasl2-dev libcurl4-openssl-dev pkg-config bash openjdk-17-jdk wget unzip git tmux lld postgresql-client kcat netcat-openbsd mysql-client \
     maven zstd libzstd-dev locales \
     python3.12 python3.12-dev \
diff --git a/ci/build-ci-image.sh b/ci/build-ci-image.sh
index 6602509824e05..88542b4aa5f12 100755
--- a/ci/build-ci-image.sh
+++ b/ci/build-ci-image.sh
@@ -10,7 +10,7 @@ cat ../rust-toolchain
 # shellcheck disable=SC2155
 
 # REMEMBER TO ALSO UPDATE ci/docker-compose.yml
-export BUILD_ENV_VERSION=v20240731
+export BUILD_ENV_VERSION=v20240812
 
 export BUILD_TAG="public.ecr.aws/w1p7b4n3/rw-build-env:${BUILD_ENV_VERSION}"
 
diff --git a/ci/docker-compose.yml b/ci/docker-compose.yml
index 78ad69c0995ab..4b1954ff5ae2c 100644
--- a/ci/docker-compose.yml
+++ b/ci/docker-compose.yml
@@ -71,7 +71,7 @@ services:
       retries: 5
 
   source-test-env:
-    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240731
+    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812
     depends_on:
       - mysql
       - sqlserver-server
@@ -85,7 +85,7 @@ services:
       - ..:/risingwave
 
   sink-test-env:
-    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240731
+    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812
     depends_on:
       - mysql
       - db
@@ -108,12 +108,12 @@ services:
 
 
   rw-build-env:
-    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240731
+    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812
     volumes:
       - ..:/risingwave
 
   ci-flamegraph-env:
-    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240731
+    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812
     # NOTE(kwannoel): This is used in order to permit
     # syscalls for `nperf` (perf_event_open),
     # so it can do CPU profiling.
@@ -124,7 +124,7 @@ services:
       - ..:/risingwave
 
   regress-test-env:
-    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240731
+    image: public.ecr.aws/w1p7b4n3/rw-build-env:v20240812
     depends_on:
       db:
         condition: service_healthy
@@ -266,7 +266,6 @@ services:
       SCHEMA_REGISTRY_HOST_NAME: schemaregistry
       SCHEMA_REGISTRY_LISTENERS: http://schemaregistry:8082
       SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: message_queue:29092
-      SCHEMA_REGISTRY_DEBUG: 'true'
 
   pulsar-server:
     container_name: pulsar-server
diff --git a/ci/scripts/common.sh b/ci/scripts/common.sh
index ac64d1a7a89cc..176c10b4ebc4f 100755
--- a/ci/scripts/common.sh
+++ b/ci/scripts/common.sh
@@ -97,7 +97,6 @@ function filter_stack_trace() {
   touch tmp
   cat "$1" \
   | sed -E '/  [1-9][0-9]+:/d' \
-  | sed -E '/  [3-9]+:/d' \
   | sed -E '/  at .rustc/d' \
   | sed -E '/  at ...cargo/d' > tmp
   cp tmp "$1"
diff --git a/ci/scripts/deterministic-recovery-test.sh b/ci/scripts/deterministic-recovery-test.sh
index 1a400d4ade9e0..2afe2f03b956b 100755
--- a/ci/scripts/deterministic-recovery-test.sh
+++ b/ci/scripts/deterministic-recovery-test.sh
@@ -9,7 +9,7 @@ echo "--- Download artifacts"
 download-and-decompress-artifact risingwave_simulation .
 chmod +x ./risingwave_simulation
 
-export RUST_LOG="risingwave_meta::barrier::recovery=debug,\
+export RUST_LOG="info,risingwave_meta::barrier::recovery=debug,\
 risingwave_meta::manager::catalog=debug,\
 risingwave_meta::rpc::ddl_controller=debug,\
 risingwave_meta::barrier::mod=debug,\
diff --git a/ci/scripts/gen-integration-test-yaml.py b/ci/scripts/gen-integration-test-yaml.py
index c778205cfbb3e..e3a8e632709c1 100644
--- a/ci/scripts/gen-integration-test-yaml.py
+++ b/ci/scripts/gen-integration-test-yaml.py
@@ -65,6 +65,7 @@ def gen_pipeline_steps():
          env:
            GHCR_USERNAME: ghcr-username
            GHCR_TOKEN: ghcr-token
+           RW_LICENSE_KEY: rw-license-key
      - ./ci/plugins/docker-compose-logs
 """
     return pipeline_steps
diff --git a/ci/scripts/run-e2e-test.sh b/ci/scripts/run-e2e-test.sh
index b4c00cec53fe8..c0f8e9f387d61 100755
--- a/ci/scripts/run-e2e-test.sh
+++ b/ci/scripts/run-e2e-test.sh
@@ -77,7 +77,7 @@ mv target/debug/risingwave_e2e_extended_mode_test-"$profile" target/debug/rising
 chmod +x ./target/debug/risingwave_e2e_extended_mode_test
 
 echo "--- e2e, $mode, streaming"
-RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \
+RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info,risingwave_stream::common::table::state_table=warn" \
 cluster_start
 # Please make sure the regression is expected before increasing the timeout.
 sqllogictest -p 4566 -d dev './e2e_test/streaming/**/*.slt' --junit "streaming-${profile}"
diff --git a/ci/scripts/single-node-utils.sh b/ci/scripts/single-node-utils.sh
index f882084197af8..852cd2099c2fe 100755
--- a/ci/scripts/single-node-utils.sh
+++ b/ci/scripts/single-node-utils.sh
@@ -19,7 +19,7 @@ start_single_node() {
 }
 
 stop_single_node() {
-  pkill risingwave
+  killall --wait risingwave
   rm -rf "$HOME/.risingwave/state_store"
   rm -rf "$HOME/.risingwave/meta_store"
 }
@@ -47,7 +47,6 @@ wait_single_node() {
 
 restart_single_node() {
   stop_single_node
-  sleep 5
   start_single_node "$PREFIX_LOG"/single-node-restarted.log &
   wait_single_node
 }
diff --git a/ci/scripts/sql/nexmark/q0-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q0-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..ab23d53d1251d
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q0-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q0_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q0-temporal-filter.sql b/ci/scripts/sql/nexmark/q0-temporal-filter.sql
new file mode 100644
index 0000000000000..0cdbb4062caec
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q0-temporal-filter.sql
@@ -0,0 +1,7 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q0_temporal_filter
+AS
+SELECT auction, bidder, price, date_time
+FROM bid_filtered
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q0.sql b/ci/scripts/sql/nexmark/q0.sql
index c07afe673e834..1b90c9391f37c 100644
--- a/ci/scripts/sql/nexmark/q0.sql
+++ b/ci/scripts/sql/nexmark/q0.sql
@@ -4,4 +4,4 @@ CREATE SINK nexmark_q0
 AS
 SELECT auction, bidder, price, date_time
 FROM bid
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q1-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q1-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..368b0be99ddf3
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q1-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q1_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q1-temporal-filter.sql b/ci/scripts/sql/nexmark/q1-temporal-filter.sql
new file mode 100644
index 0000000000000..20594f6a15347
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q1-temporal-filter.sql
@@ -0,0 +1,10 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q1_temporal_filter
+AS
+SELECT auction,
+       bidder,
+       0.908 * price as price,
+       date_time
+FROM bid_filtered
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q1.sql b/ci/scripts/sql/nexmark/q1.sql
index 4e38b643928dd..503485046696a 100644
--- a/ci/scripts/sql/nexmark/q1.sql
+++ b/ci/scripts/sql/nexmark/q1.sql
@@ -7,4 +7,4 @@ SELECT auction,
        0.908 * price as price,
        date_time
 FROM bid
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q10-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q10-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..994438f15a6a2
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q10-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q10_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q10-temporal-filter.sql b/ci/scripts/sql/nexmark/q10-temporal-filter.sql
new file mode 100644
index 0000000000000..3914ec575180b
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q10-temporal-filter.sql
@@ -0,0 +1,11 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q10_temporal_filter AS
+SELECT auction,
+       bidder,
+       price,
+       date_time,
+       TO_CHAR(date_time, 'YYYY-MM-DD') as date,
+       TO_CHAR(date_time, 'HH:MI')      as time
+FROM bid_filtered
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q10.sql b/ci/scripts/sql/nexmark/q10.sql
index e20a590ecf108..e155dce0a62b3 100644
--- a/ci/scripts/sql/nexmark/q10.sql
+++ b/ci/scripts/sql/nexmark/q10.sql
@@ -8,4 +8,4 @@ SELECT auction,
        TO_CHAR(date_time, 'YYYY-MM-DD') as date,
        TO_CHAR(date_time, 'HH:MI')      as time
 FROM bid
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q101-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q101-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..bac9715316da1
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q101-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q101_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q101-temporal-filter.sql b/ci/scripts/sql/nexmark/q101-temporal-filter.sql
new file mode 100644
index 0000000000000..0f7bb7c048220
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q101-temporal-filter.sql
@@ -0,0 +1,16 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q101_temporal_filter AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name,
+    b.max_price AS current_highest_bid
+FROM auction a
+LEFT OUTER JOIN (
+    SELECT
+        b1.auction,
+        MAX(b1.price) max_price
+    FROM bid_filtered b1
+    GROUP BY b1.auction
+) b ON a.id = b.auction
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q102-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q102-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..748a1d388600d
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q102-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q102_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q102-temporal-filter.sql b/ci/scripts/sql/nexmark/q102-temporal-filter.sql
new file mode 100644
index 0000000000000..d517aacc1c383
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q102-temporal-filter.sql
@@ -0,0 +1,14 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q102_temporal_filter AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name,
+    COUNT(b.auction) AS bid_count
+FROM auction a
+JOIN bid_filtered b ON a.id = b.auction
+GROUP BY a.id, a.item_name
+HAVING COUNT(b.auction) >= (
+    SELECT COUNT(*) / COUNT(DISTINCT auction) FROM bid_filtered
+)
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q103-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q103-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..ebae2ca09a33e
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q103-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q103_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q103-temporal-filter.sql b/ci/scripts/sql/nexmark/q103-temporal-filter.sql
new file mode 100644
index 0000000000000..e415e914b9720
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q103-temporal-filter.sql
@@ -0,0 +1,13 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q103_temporal_filter AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name
+FROM auction a
+WHERE a.id IN (
+    SELECT b.auction FROM bid_filtered b
+    GROUP BY b.auction
+    HAVING COUNT(*) >= 20
+)
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q104-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q104-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..f2a066f42c770
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q104-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q104_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q104-temporal-filter.sql b/ci/scripts/sql/nexmark/q104-temporal-filter.sql
new file mode 100644
index 0000000000000..6c6145a03f2cc
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q104-temporal-filter.sql
@@ -0,0 +1,13 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q104_temporal_filter AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name
+FROM auction a
+WHERE a.id NOT IN (
+    SELECT b.auction FROM bid_filtered b
+    GROUP BY b.auction
+    HAVING COUNT(*) < 20
+)
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q105-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q105-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..e3ebe8ecc61f5
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q105-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q105_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q105-temporal-filter.sql b/ci/scripts/sql/nexmark/q105-temporal-filter.sql
new file mode 100644
index 0000000000000..8862ecfbc33dc
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q105-temporal-filter.sql
@@ -0,0 +1,13 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q105_temporal_filter AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name,
+    COUNT(b.auction) AS bid_count
+FROM auction a
+JOIN bid_filtered b ON a.id = b.auction
+GROUP BY a.id, a.item_name
+ORDER BY bid_count DESC
+LIMIT 1000
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q105-without-limit-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q105-without-limit-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..aff265cb37c48
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q105-without-limit-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q105_without_limit_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q105-without-limit-temporal-filter.sql b/ci/scripts/sql/nexmark/q105-without-limit-temporal-filter.sql
new file mode 100644
index 0000000000000..d79a78db866e1
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q105-without-limit-temporal-filter.sql
@@ -0,0 +1,11 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q105_without_limit_temporal_filter AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name,
+    COUNT(b.auction) AS bid_count
+FROM auction a
+JOIN bid_filtered b ON a.id = b.auction
+GROUP BY a.id, a.item_name
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q105-without-limit.drop.sql b/ci/scripts/sql/nexmark/q105-without-limit.drop.sql
new file mode 100644
index 0000000000000..b43a6c4b1eedf
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q105-without-limit.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q105_without_limit;
diff --git a/ci/scripts/sql/nexmark/q105-without-limit.sql b/ci/scripts/sql/nexmark/q105-without-limit.sql
new file mode 100644
index 0000000000000..ea68188ab86e8
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q105-without-limit.sql
@@ -0,0 +1,11 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q105_without_limit AS
+SELECT
+    a.id AS auction_id,
+    a.item_name AS auction_item_name,
+    COUNT(b.auction) AS bid_count
+FROM auction a
+JOIN bid b ON a.id = b.auction
+GROUP BY a.id, a.item_name
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q106.drop.sql b/ci/scripts/sql/nexmark/q106.drop.sql
new file mode 100644
index 0000000000000..f651e37253804
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q106.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q106;
diff --git a/ci/scripts/sql/nexmark/q106.sql b/ci/scripts/sql/nexmark/q106.sql
new file mode 100644
index 0000000000000..79c22f9345876
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q106.sql
@@ -0,0 +1,21 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q106
+AS
+SELECT
+    MIN(final) AS min_final
+FROM
+    (
+        SELECT
+            auction.id,
+            MAX(price) AS final
+        FROM
+            auction,
+            bid
+        WHERE
+            bid.auction = auction.id
+            AND bid.date_time BETWEEN auction.date_time AND auction.expires
+        GROUP BY
+            auction.id
+    )
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q107.drop.sql b/ci/scripts/sql/nexmark/q107.drop.sql
new file mode 100644
index 0000000000000..204d81cea0e49
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q107.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q107;
diff --git a/ci/scripts/sql/nexmark/q107.sql b/ci/scripts/sql/nexmark/q107.sql
new file mode 100644
index 0000000000000..f1d354fcf98a7
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q107.sql
@@ -0,0 +1,11 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q107 AS
+SELECT
+    approx_percentile(0.01, 0.01) within group (order by price) as p01,
+    approx_percentile(0.1, 0.01) within group (order by price) as p10,
+    approx_percentile(0.5, 0.01) within group (order by price) as p50,
+    approx_percentile(0.9, 0.01) within group (order by price) as p90,
+    approx_percentile(0.99, 0.01) within group (order by price) as p99
+FROM bid
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q108.drop.sql b/ci/scripts/sql/nexmark/q108.drop.sql
new file mode 100644
index 0000000000000..4f47359cd7c78
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q108.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q108;
diff --git a/ci/scripts/sql/nexmark/q108.sql b/ci/scripts/sql/nexmark/q108.sql
new file mode 100644
index 0000000000000..4b471d0b78548
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q108.sql
@@ -0,0 +1,13 @@
+-- test two-phase simple approx percentile and merge
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q108 AS
+SELECT
+    approx_percentile(0.01, 0.01) within group (order by price) as p01,
+    approx_percentile(0.1, 0.01) within group (order by price) as p10,
+    approx_percentile(0.5, 0.01) within group (order by price) as p50,
+    approx_percentile(0.9, 0.01) within group (order by price) as p90,
+    approx_percentile(0.99, 0.01) within group (order by price) as p99
+FROM bid
+GROUP BY auction
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q12-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q12-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..5b0a91296c748
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q12-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q12_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q12-temporal-filter.sql b/ci/scripts/sql/nexmark/q12-temporal-filter.sql
new file mode 100644
index 0000000000000..d4874b913fe26
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q12-temporal-filter.sql
@@ -0,0 +1,7 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q12_temporal_filter AS
+SELECT bidder, count(*) as bid_count, window_start, window_end
+FROM TUMBLE(bid_filtered, p_time, INTERVAL '10' SECOND)
+GROUP BY bidder, window_start, window_end
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q13-by-row-id.drop.sql b/ci/scripts/sql/nexmark/q13-by-row-id.drop.sql
new file mode 100644
index 0000000000000..1349e9974365d
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q13-by-row-id.drop.sql
@@ -0,0 +1,4 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q13_by_row_id;
+DROP TABLE side_input;
diff --git a/ci/scripts/sql/nexmark/q13-by-row-id.sql b/ci/scripts/sql/nexmark/q13-by-row-id.sql
new file mode 100644
index 0000000000000..8baf39eda4651
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q13-by-row-id.sql
@@ -0,0 +1,12 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE TABLE side_input(
+    key BIGINT PRIMARY KEY,
+    value VARCHAR
+);
+INSERT INTO side_input SELECT v, v::varchar FROM generate_series(0, ${BENCHMARK_NEXMARK_RISINGWAVE_Q13_SIDE_INPUT_ROW_COUNT} - 1) AS s(v);
+
+CREATE SINK nexmark_q13_by_row_id AS
+SELECT B.auction, B.bidder, B.price, B.date_time, S.value
+FROM bid B join side_input FOR SYSTEM_TIME AS OF PROCTIME() S on mod(B._row_id::bigint, ${BENCHMARK_NEXMARK_RISINGWAVE_Q13_SIDE_INPUT_ROW_COUNT}) = S.key
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q13-non-lookup-cond.drop.sql b/ci/scripts/sql/nexmark/q13-non-lookup-cond.drop.sql
new file mode 100644
index 0000000000000..f950bf5da887a
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q13-non-lookup-cond.drop.sql
@@ -0,0 +1,4 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q13_non_lookup_cond;
+DROP TABLE side_input;
diff --git a/ci/scripts/sql/nexmark/q13-non-lookup-cond.sql b/ci/scripts/sql/nexmark/q13-non-lookup-cond.sql
new file mode 100644
index 0000000000000..4162b8ed7bea8
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q13-non-lookup-cond.sql
@@ -0,0 +1,14 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE TABLE side_input(
+    key BIGINT PRIMARY KEY,
+    value VARCHAR
+);
+INSERT INTO side_input SELECT v, v::varchar FROM generate_series(0, ${BENCHMARK_NEXMARK_RISINGWAVE_Q13_SIDE_INPUT_ROW_COUNT} - 1) AS s(v);
+
+CREATE SINK nexmark_q13_non_lookup_cond AS
+SELECT B.auction, B.bidder, B.price, B.date_time, S.value
+FROM bid B join side_input FOR SYSTEM_TIME AS OF PROCTIME() S
+ON mod(B.auction, ${BENCHMARK_NEXMARK_RISINGWAVE_Q13_SIDE_INPUT_ROW_COUNT}) = S.key
+  AND S.key % 4 != 1
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q13.sql b/ci/scripts/sql/nexmark/q13.sql
index d409de228ade5..2aee5ffab4f4a 100644
--- a/ci/scripts/sql/nexmark/q13.sql
+++ b/ci/scripts/sql/nexmark/q13.sql
@@ -1,11 +1,12 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
 CREATE TABLE side_input(
-  key BIGINT PRIMARY KEY,
-  value VARCHAR
+    key BIGINT PRIMARY KEY,
+    value VARCHAR
 );
-INSERT INTO side_input SELECT i::bigint, i::varchar FROM(SELECT generate_series(0,9999,1) as i);
+INSERT INTO side_input SELECT v, v::varchar FROM generate_series(0, ${BENCHMARK_NEXMARK_RISINGWAVE_Q13_SIDE_INPUT_ROW_COUNT} - 1) AS s(v);
+
 CREATE SINK nexmark_q13 AS
 SELECT B.auction, B.bidder, B.price, B.date_time, S.value
-FROM bid B join side_input FOR SYSTEM_TIME AS OF PROCTIME() S on mod(B.auction, 10000) = S.key
-WITH ( connector = 'blackhole', type = 'append-only');
+FROM bid B join side_input FOR SYSTEM_TIME AS OF PROCTIME() S on mod(B.auction, ${BENCHMARK_NEXMARK_RISINGWAVE_Q13_SIDE_INPUT_ROW_COUNT}) = S.key
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q14-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q14-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..059ef57dee668
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q14-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q14_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q14-temporal-filter.sql b/ci/scripts/sql/nexmark/q14-temporal-filter.sql
new file mode 100644
index 0000000000000..d0aba0a8d5c55
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q14-temporal-filter.sql
@@ -0,0 +1,26 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q14_temporal_filter AS
+SELECT auction,
+       bidder,
+       0.908 * price as price,
+       CASE
+           WHEN
+                       extract(hour from date_time) >= 8 AND
+                       extract(hour from date_time) <= 18
+               THEN 'dayTime'
+           WHEN
+                       extract(hour from date_time) <= 6 OR
+                       extract(hour from date_time) >= 20
+               THEN 'nightTime'
+           ELSE 'otherTime'
+           END       AS bidTimeType,
+       date_time
+       -- extra
+       -- TODO: count_char is an UDF, add it back when we support similar functionality.
+       -- https://github.com/nexmark/nexmark/blob/master/nexmark-flink/src/main/java/com/github/nexmark/flink/udf/CountChar.java
+       -- count_char(extra, 'c') AS c_counts
+FROM bid_filtered
+WHERE 0.908 * price > 1000000
+  AND 0.908 * price < 50000000
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q14.drop.sql b/ci/scripts/sql/nexmark/q14.drop.sql
index 6cd0e12d317a0..b4f51674fb03b 100644
--- a/ci/scripts/sql/nexmark/q14.drop.sql
+++ b/ci/scripts/sql/nexmark/q14.drop.sql
@@ -1,4 +1,4 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
 DROP SINK nexmark_q14;
-DROP FUNCTION count_char;
\ No newline at end of file
+DROP FUNCTION count_char;
diff --git a/ci/scripts/sql/nexmark/q14.sql b/ci/scripts/sql/nexmark/q14.sql
index c5c174e3579ca..258f72c1ca7fb 100644
--- a/ci/scripts/sql/nexmark/q14.sql
+++ b/ci/scripts/sql/nexmark/q14.sql
@@ -1,16 +1,10 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
-
-CREATE FUNCTION count_char(s varchar, c varchar) RETURNS int LANGUAGE javascript AS $$
-    var count = 0;
-    for (var cc of s) {
-        if (cc === c) {
-            count++;
-        }
-    }
-    return count;
+CREATE FUNCTION count_char(s varchar, c varchar) RETURNS int LANGUAGE rust AS $$
+fn count_char(s: &str, c: &str) -> i32 {
+  s.matches(c).count() as i32
+}
 $$;
-
 CREATE SINK nexmark_q14 AS
 SELECT auction,
        bidder,
@@ -27,8 +21,9 @@ SELECT auction,
            ELSE 'otherTime'
            END       AS bidTimeType,
        date_time,
+       -- extra
        count_char(extra, 'c') AS c_counts
 FROM bid
 WHERE 0.908 * price > 1000000
   AND 0.908 * price < 50000000
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q15-no-distinct.drop.sql b/ci/scripts/sql/nexmark/q15-no-distinct.drop.sql
new file mode 100644
index 0000000000000..2a3680882ed28
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q15-no-distinct.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q15_no_distinct;
diff --git a/ci/scripts/sql/nexmark/q15-no-distinct.sql b/ci/scripts/sql/nexmark/q15-no-distinct.sql
new file mode 100644
index 0000000000000..a786285a6b8fd
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q15-no-distinct.sql
@@ -0,0 +1,20 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+SET rw_force_two_phase_agg = ${BENCHMARK_NEXMARK_RISINGWAVE_Q15_NO_DISTINCT_RW_FORCE_TWO_PHASE_AGG};
+CREATE SINK nexmark_q15_no_distinct AS
+SELECT to_char(date_time, 'YYYY-MM-DD')                                          as "day",
+       count(*)                                                                  AS total_bids,
+       count(*) filter (where price < 10000)                                     AS rank1_bids,
+       count(*) filter (where price >= 10000 and price < 1000000)                AS rank2_bids,
+       count(*) filter (where price >= 1000000)                                  AS rank3_bids,
+       count(bidder)                                                    AS total_bidders,
+       count(bidder) filter (where price < 10000)                       AS rank1_bidders,
+       count(bidder) filter (where price >= 10000 and price < 1000000)  AS rank2_bidders,
+       count(bidder) filter (where price >= 1000000)                    AS rank3_bidders,
+       count(auction)                                                   AS total_auctions,
+       count(auction) filter (where price < 10000)                      AS rank1_auctions,
+       count(auction) filter (where price >= 10000 and price < 1000000) AS rank2_auctions,
+       count(auction) filter (where price >= 1000000)                   AS rank3_auctions
+FROM bid
+GROUP BY to_char(date_time, 'YYYY-MM-DD')
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q15-simple.drop.sql b/ci/scripts/sql/nexmark/q15-simple.drop.sql
new file mode 100644
index 0000000000000..74ba29061ab05
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q15-simple.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q15_simple;
diff --git a/ci/scripts/sql/nexmark/q15-simple.sql b/ci/scripts/sql/nexmark/q15-simple.sql
new file mode 100644
index 0000000000000..d6e9b7e279207
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q15-simple.sql
@@ -0,0 +1,8 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q15_simple AS
+SELECT to_char(date_time, 'YYYY-MM-DD')                                          as "day",
+       count(*)                                                                  AS total_bids
+FROM bid
+GROUP BY to_char(date_time, 'YYYY-MM-DD')
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q15.sql b/ci/scripts/sql/nexmark/q15.sql
index cdee0bf20b83e..49c5b05e52457 100644
--- a/ci/scripts/sql/nexmark/q15.sql
+++ b/ci/scripts/sql/nexmark/q15.sql
@@ -1,5 +1,7 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
+SET rw_force_split_distinct_agg = ${BENCHMARK_NEXMARK_RISINGWAVE_Q15_RW_FORCE_SPLIT_DISTINCT_AGG};
+SET rw_force_two_phase_agg = ${BENCHMARK_NEXMARK_RISINGWAVE_Q15_RW_FORCE_TWO_PHASE_AGG};
 CREATE SINK nexmark_q15 AS
 SELECT to_char(date_time, 'YYYY-MM-DD')                                          as "day",
        count(*)                                                                  AS total_bids,
diff --git a/ci/scripts/sql/nexmark/q16-no-distinct.drop.sql b/ci/scripts/sql/nexmark/q16-no-distinct.drop.sql
new file mode 100644
index 0000000000000..8fc59bf972fa1
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q16-no-distinct.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q16_no_distinct;
diff --git a/ci/scripts/sql/nexmark/q16-no-distinct.sql b/ci/scripts/sql/nexmark/q16-no-distinct.sql
new file mode 100644
index 0000000000000..b1148fa66a43c
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q16-no-distinct.sql
@@ -0,0 +1,22 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+SET rw_force_two_phase_agg = ${BENCHMARK_NEXMARK_RISINGWAVE_Q16_NO_DISTINCT_RW_FORCE_TWO_PHASE_AGG};
+CREATE SINK nexmark_q16_no_distinct AS
+SELECT channel,
+       to_char(date_time, 'YYYY-MM-DD')                                          as "day",
+       max(to_char(date_time, 'HH:mm'))                                          as "minute",
+       count(*)                                                                  AS total_bids,
+       count(*) filter (where price < 10000)                                     AS rank1_bids,
+       count(*) filter (where price >= 10000 and price < 1000000)                AS rank2_bids,
+       count(*) filter (where price >= 1000000)                                  AS rank3_bids,
+       count(bidder)                                                    AS total_bidders,
+       count(bidder) filter (where price < 10000)                       AS rank1_bidders,
+       count(bidder) filter (where price >= 10000 and price < 1000000)  AS rank2_bidders,
+       count(bidder) filter (where price >= 1000000)                    AS rank3_bidders,
+       count(auction)                                                   AS total_auctions,
+       count(auction) filter (where price < 10000)                      AS rank1_auctions,
+       count(auction) filter (where price >= 10000 and price < 1000000) AS rank2_auctions,
+       count(auction) filter (where price >= 1000000)                   AS rank3_auctions
+FROM bid
+GROUP BY to_char(date_time, 'YYYY-MM-DD'), channel
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q16.sql b/ci/scripts/sql/nexmark/q16.sql
index 1192a1f8af805..a6dcff0fb2316 100644
--- a/ci/scripts/sql/nexmark/q16.sql
+++ b/ci/scripts/sql/nexmark/q16.sql
@@ -1,5 +1,7 @@
 -- noinspection SqlNoDataSourceInspectionForFile
 -- noinspection SqlResolveForFile
+SET rw_force_split_distinct_agg = ${BENCHMARK_NEXMARK_RISINGWAVE_Q16_RW_FORCE_SPLIT_DISTINCT_AGG};
+SET rw_force_two_phase_agg = ${BENCHMARK_NEXMARK_RISINGWAVE_Q16_RW_FORCE_TWO_PHASE_AGG};
 CREATE SINK nexmark_q16 AS
 SELECT channel,
        to_char(date_time, 'YYYY-MM-DD')                                          as "day",
diff --git a/ci/scripts/sql/nexmark/q18-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q18-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..6848636f73976
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q18-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q18_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q18-temporal-filter.sql b/ci/scripts/sql/nexmark/q18-temporal-filter.sql
new file mode 100644
index 0000000000000..c1aaec28c5e57
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q18-temporal-filter.sql
@@ -0,0 +1,12 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q18_temporal_filter AS
+SELECT auction, bidder, price, channel, url, date_time
+FROM (SELECT *,
+             ROW_NUMBER() OVER (
+                 PARTITION BY bidder, auction
+                 ORDER BY date_time DESC
+                 ) AS rank_number
+      FROM bid_filtered)
+WHERE rank_number <= 1
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q19-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q19-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..4344479feeaea
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q19-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q19_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q19-temporal-filter.sql b/ci/scripts/sql/nexmark/q19-temporal-filter.sql
new file mode 100644
index 0000000000000..67a2e503839db
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q19-temporal-filter.sql
@@ -0,0 +1,12 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q19_temporal_filter AS
+SELECT *
+FROM (SELECT *,
+             ROW_NUMBER() OVER (
+                 PARTITION BY auction
+                 ORDER BY price DESC
+             ) AS rank_number
+      FROM bid_filtered)
+WHERE rank_number <= 10
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q2.sql b/ci/scripts/sql/nexmark/q2.sql
index b33480bbfe020..33a1d0d6bcb17 100644
--- a/ci/scripts/sql/nexmark/q2.sql
+++ b/ci/scripts/sql/nexmark/q2.sql
@@ -9,4 +9,4 @@ WHERE auction = 1007
    OR auction = 2001
    OR auction = 2019
    OR auction = 2087
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q20-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q20-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..b0a0a8827b4cf
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q20-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q20_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q20-temporal-filter.sql b/ci/scripts/sql/nexmark/q20-temporal-filter.sql
new file mode 100644
index 0000000000000..095d33f05f5d6
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q20-temporal-filter.sql
@@ -0,0 +1,23 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q20_temporal_filter AS
+SELECT auction,
+       bidder,
+       price,
+       channel,
+       url,
+       B.date_time as bid_date_time,
+       B.extra     as bid_extra,
+       item_name,
+       description,
+       initial_bid,
+       reserve,
+       A.date_time as auction_date_time,
+       expires,
+       seller,
+       category,
+       A.extra     as auction_extra
+FROM bid_filtered AS B
+         INNER JOIN auction AS A on B.auction = A.id
+WHERE A.category = 10
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q20.sql b/ci/scripts/sql/nexmark/q20.sql
index b3d46d8bae721..3f353cad0d123 100644
--- a/ci/scripts/sql/nexmark/q20.sql
+++ b/ci/scripts/sql/nexmark/q20.sql
@@ -20,4 +20,4 @@ SELECT auction,
 FROM bid AS B
          INNER JOIN auction AS A on B.auction = A.id
 WHERE A.category = 10
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q21-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q21-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..637eefee2a4b1
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q21-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q21_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q21-temporal-filter.sql b/ci/scripts/sql/nexmark/q21-temporal-filter.sql
new file mode 100644
index 0000000000000..ec64fc4242910
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q21-temporal-filter.sql
@@ -0,0 +1,19 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q21_temporal_filter AS
+SELECT auction,
+       bidder,
+       price,
+       channel,
+       CASE
+           WHEN LOWER(channel) = 'apple' THEN '0'
+           WHEN LOWER(channel) = 'google' THEN '1'
+           WHEN LOWER(channel) = 'facebook' THEN '2'
+           WHEN LOWER(channel) = 'baidu' THEN '3'
+           ELSE (regexp_match(url, '(&|^)channel_id=([^&]*)'))[2]
+           END
+           AS channel_id
+FROM bid_filtered
+WHERE (regexp_match(url, '(&|^)channel_id=([^&]*)'))[2] is not null
+   or LOWER(channel) in ('apple', 'google', 'facebook', 'baidu')
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q21.sql b/ci/scripts/sql/nexmark/q21.sql
index 57c48d55451fa..75322978d0b78 100644
--- a/ci/scripts/sql/nexmark/q21.sql
+++ b/ci/scripts/sql/nexmark/q21.sql
@@ -16,4 +16,4 @@ SELECT auction,
 FROM bid
 WHERE (regexp_match(url, '(&|^)channel_id=([^&]*)'))[2] is not null
    or LOWER(channel) in ('apple', 'google', 'facebook', 'baidu')
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q22-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q22-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..7539ad8e25404
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q22-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q22_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q22-temporal-filter.sql b/ci/scripts/sql/nexmark/q22-temporal-filter.sql
new file mode 100644
index 0000000000000..40c47c6e236fd
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q22-temporal-filter.sql
@@ -0,0 +1,12 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q22_temporal_filter AS
+SELECT auction,
+       bidder,
+       price,
+       channel,
+       split_part(url, '/', 4) as dir1,
+       split_part(url, '/', 5) as dir2,
+       split_part(url, '/', 6) as dir3
+FROM bid_filtered
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q22.sql b/ci/scripts/sql/nexmark/q22.sql
index db0ece23786b5..81f861e65b7c6 100644
--- a/ci/scripts/sql/nexmark/q22.sql
+++ b/ci/scripts/sql/nexmark/q22.sql
@@ -9,4 +9,4 @@ SELECT auction,
        split_part(url, '/', 5) as dir2,
        split_part(url, '/', 6) as dir3
 FROM bid
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q3.sql b/ci/scripts/sql/nexmark/q3.sql
index c12f96705059c..9d5e0cd62dc02 100644
--- a/ci/scripts/sql/nexmark/q3.sql
+++ b/ci/scripts/sql/nexmark/q3.sql
@@ -10,4 +10,4 @@ FROM auction AS A
          INNER JOIN person AS P on A.seller = P.id
 WHERE A.category = 10
   and (P.state = 'or' OR P.state = 'id' OR P.state = 'ca')
-WITH ( connector = 'blackhole', type = 'append-only');
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q4-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q4-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..6e1e0a2effd36
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q4-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q4_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q4-temporal-filter.sql b/ci/scripts/sql/nexmark/q4-temporal-filter.sql
new file mode 100644
index 0000000000000..ba3300eb72678
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q4-temporal-filter.sql
@@ -0,0 +1,15 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q4_temporal_filter
+AS
+SELECT Q.category,
+       AVG(Q.final) as avg
+FROM (SELECT MAX(B.price) AS final,
+             A.category
+      FROM auction A,
+           bid_filtered B
+      WHERE A.id = B.auction
+        AND B.date_time BETWEEN A.date_time AND A.expires
+      GROUP BY A.id, A.category) Q
+GROUP BY Q.category
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q5-many-windows-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q5-many-windows-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..0642187cd5eed
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q5-many-windows-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q5_many_windows_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q5-many-windows-temporal-filter.sql b/ci/scripts/sql/nexmark/q5-many-windows-temporal-filter.sql
new file mode 100644
index 0000000000000..50a9c7a20c2c0
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q5-many-windows-temporal-filter.sql
@@ -0,0 +1,43 @@
+-- https://web.archive.org/web/20100620010601/http://datalab.cs.pdx.edu/niagaraST/NEXMark/
+-- The original q5 is `[RANGE 60 MINUTE SLIDE 1 MINUTE]`.
+-- However, using 60 minute may require running a very long period to see the effect.
+-- Therefore, we change it to `[RANGE 5 MINUTE SLIDE 5 SECOND]` to generate many sliding windows.
+-- The percentage between window size and hop interval stays the same as the one in original nexmark.
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q5_many_windows_temporal_filter
+AS
+SELECT
+    AuctionBids.auction, AuctionBids.num
+FROM (
+    SELECT
+        bid.auction,
+        count(*) AS num,
+        window_start AS starttime
+    FROM
+        HOP(bid_filtered, date_time, INTERVAL '5' SECOND, INTERVAL '5' MINUTE) as bid
+    GROUP BY
+        bid.auction,
+        window_start
+) AS AuctionBids
+JOIN (
+	SELECT
+        max(CountBids.num) AS maxn,
+        CountBids.starttime_c
+  FROM (
+    SELECT
+            count(*) AS num,
+            window_start AS starttime_c
+    FROM
+            HOP(bid_filtered, date_time, INTERVAL '5' SECOND, INTERVAL '5' MINUTE) as bid
+        GROUP BY
+            bid.auction,
+            window_start
+    ) AS CountBids
+  GROUP BY
+        CountBids.starttime_c
+  ) AS MaxBids
+ON
+    AuctionBids.starttime = MaxBids.starttime_c AND
+    AuctionBids.num >= MaxBids.maxn
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q5-many-windows.sql b/ci/scripts/sql/nexmark/q5-many-windows.sql
index 1a0daafd4176d..6fd49d2edf682 100644
--- a/ci/scripts/sql/nexmark/q5-many-windows.sql
+++ b/ci/scripts/sql/nexmark/q5-many-windows.sql
@@ -24,19 +24,19 @@ JOIN (
 	SELECT
         max(CountBids.num) AS maxn,
         CountBids.starttime_c
-	FROM (
-		SELECT
+  FROM (
+    SELECT
             count(*) AS num,
             window_start AS starttime_c
-		FROM
+    FROM
             HOP(bid, date_time, INTERVAL '5' SECOND, INTERVAL '5' MINUTE)
         GROUP BY
             bid.auction,
             window_start
-		) AS CountBids
-	GROUP BY
+    ) AS CountBids
+  GROUP BY
         CountBids.starttime_c
-	) AS MaxBids
+  ) AS MaxBids
 ON
     AuctionBids.starttime = MaxBids.starttime_c AND
     AuctionBids.num >= MaxBids.maxn
diff --git a/ci/scripts/sql/nexmark/q5-rewrite-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q5-rewrite-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..a13d94658f2d1
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q5-rewrite-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q5_rewrite_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q5-rewrite-temporal-filter.sql b/ci/scripts/sql/nexmark/q5-rewrite-temporal-filter.sql
new file mode 100644
index 0000000000000..2adb38d99b0d0
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q5-rewrite-temporal-filter.sql
@@ -0,0 +1,20 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q5_rewrite_temporal_filter AS
+SELECT
+  B.auction,
+  B.num
+FROM (
+  SELECT
+      auction,
+      num,
+      /*use rank here to express top-N with ties*/
+      rank() over (partition by starttime order by num desc) as num_rank
+  FROM (
+      SELECT bid.auction, count(*) AS num, window_start AS starttime
+      FROM HOP(bid_filtered, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) as bid
+      GROUP BY window_start, bid.auction
+  )
+) B
+WHERE num_rank <= 1
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q5-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q5-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..14375f59190af
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q5-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q5_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q5-temporal-filter.sql b/ci/scripts/sql/nexmark/q5-temporal-filter.sql
new file mode 100644
index 0000000000000..db19bf6988ef7
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q5-temporal-filter.sql
@@ -0,0 +1,38 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q5_temporal_filter
+AS
+SELECT
+    AuctionBids.auction, AuctionBids.num
+FROM (
+    SELECT
+        bid.auction,
+        count(*) AS num,
+        window_start AS starttime
+    FROM
+        HOP(bid_filtered, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) as bid
+    GROUP BY
+        bid.auction,
+        window_start
+) AS AuctionBids
+JOIN (
+    SELECT
+        max(CountBids.num) AS maxn,
+        CountBids.starttime_c
+    FROM (
+        SELECT
+            count(*) AS num,
+            window_start AS starttime_c
+        FROM
+            HOP(bid_filtered, date_time, INTERVAL '2' SECOND, INTERVAL '10' SECOND) as bid
+        GROUP BY
+            bid.auction,
+            window_start
+        ) AS CountBids
+    GROUP BY
+        CountBids.starttime_c
+    ) AS MaxBids
+ON
+    AuctionBids.starttime = MaxBids.starttime_c AND
+    AuctionBids.num >= MaxBids.maxn
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q6-group-top1-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q6-group-top1-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..22b1ecc2a1424
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q6-group-top1-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q6_group_top1_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q6-group-top1-temporal-filter.sql b/ci/scripts/sql/nexmark/q6-group-top1-temporal-filter.sql
new file mode 100644
index 0000000000000..93c8edd37b903
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q6-group-top1-temporal-filter.sql
@@ -0,0 +1,16 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q6_group_top1_temporal_filter
+AS
+SELECT
+    Q.seller,
+    AVG(Q.final) OVER
+        (PARTITION BY Q.seller ORDER BY Q.date_time ROWS BETWEEN 10 PRECEDING AND CURRENT ROW)
+    as avg
+FROM (
+    SELECT ROW_NUMBER() OVER (PARTITION BY A.id, A.seller ORDER BY B.price) as rank, A.seller, B.price as final,  B.date_time
+    FROM auction AS A, bid_filtered AS B
+    WHERE A.id = B.auction and B.date_time between A.date_time and A.expires
+) AS Q
+WHERE Q.rank <= 1
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q7-rewrite-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q7-rewrite-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..9061af9b68af9
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q7-rewrite-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q7_rewrite_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q7-rewrite-temporal-filter.sql b/ci/scripts/sql/nexmark/q7-rewrite-temporal-filter.sql
new file mode 100644
index 0000000000000..80066a8f29a8e
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q7-rewrite-temporal-filter.sql
@@ -0,0 +1,21 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q7_rewrite_temporal_filter AS
+SELECT
+  B.auction,
+  B.price,
+  B.bidder,
+  B.date_time
+FROM (
+  SELECT
+    auction,
+    price,
+    bidder,
+    date_time,
+    /*use rank here to express top-N with ties*/
+    rank() over (partition by window_end order by price desc) as price_rank
+  FROM
+    TUMBLE(bid_filtered, date_time, INTERVAL '10' SECOND)
+) B
+WHERE price_rank <= 1
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q7-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q7-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..217c0e3d41d5c
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q7-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q7_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q7-temporal-filter.sql b/ci/scripts/sql/nexmark/q7-temporal-filter.sql
new file mode 100644
index 0000000000000..e4e3dbeed3561
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q7-temporal-filter.sql
@@ -0,0 +1,17 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q7_temporal_filter
+AS
+SELECT B.auction,
+       B.price,
+       B.bidder,
+       B.date_time
+from bid_filtered B
+         JOIN (SELECT MAX(price) AS maxprice,
+                      window_end as date_time
+               FROM
+                   TUMBLE(bid_filtered, date_time, INTERVAL '10' SECOND)
+               GROUP BY window_end) B1 ON B.price = B1.maxprice
+WHERE B.date_time BETWEEN B1.date_time - INTERVAL '10' SECOND
+          AND B1.date_time
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/sql/nexmark/q9-temporal-filter.drop.sql b/ci/scripts/sql/nexmark/q9-temporal-filter.drop.sql
new file mode 100644
index 0000000000000..a0684f0dbf44c
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q9-temporal-filter.drop.sql
@@ -0,0 +1,3 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+DROP SINK nexmark_q9_temporal_filter;
diff --git a/ci/scripts/sql/nexmark/q9-temporal-filter.sql b/ci/scripts/sql/nexmark/q9-temporal-filter.sql
new file mode 100644
index 0000000000000..02cfc8997122b
--- /dev/null
+++ b/ci/scripts/sql/nexmark/q9-temporal-filter.sql
@@ -0,0 +1,29 @@
+-- noinspection SqlNoDataSourceInspectionForFile
+-- noinspection SqlResolveForFile
+CREATE SINK nexmark_q9_temporal_filter
+AS
+SELECT id,
+       item_name,
+       description,
+       initial_bid,
+       reserve,
+       date_time,
+       expires,
+       seller,
+       category,
+       auction,
+       bidder,
+       price,
+       bid_date_time
+FROM (SELECT A.*,
+             B.auction,
+             B.bidder,
+             B.price,
+             B.date_time                                                                  AS bid_date_time,
+             ROW_NUMBER() OVER (PARTITION BY A.id ORDER BY B.price DESC, B.date_time ASC) AS rownum
+      FROM auction A,
+           bid_filtered B
+      WHERE A.id = B.auction
+        AND B.date_time BETWEEN A.date_time AND A.expires) tmp
+WHERE rownum <= 1
+WITH ( connector = 'blackhole', type = 'append-only', force_append_only = 'true');
diff --git a/ci/scripts/standalone-utils.sh b/ci/scripts/standalone-utils.sh
index c0767ddaefb62..059d11aada83f 100755
--- a/ci/scripts/standalone-utils.sh
+++ b/ci/scripts/standalone-utils.sh
@@ -75,7 +75,7 @@ start_standalone() {
 }
 
 stop_standalone() {
-  pkill standalone
+  killall --wait standalone
 }
 
 wait_standalone() {
@@ -101,7 +101,6 @@ wait_standalone() {
 
 restart_standalone() {
   stop_standalone
-  sleep 5
   start_standalone "$PREFIX_LOG"/standalone-restarted.log &
   wait_standalone
 }
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index c1681ff658765..bce57e69147f4 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -60,6 +60,7 @@ services:
       ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true}
       RW_TELEMETRY_TYPE: ${RW_TELEMETRY_TYPE:-"docker-compose"}
       RW_SECRET_STORE_PRIVATE_KEY_HEX: ${RW_SECRET_STORE_PRIVATE_KEY_HEX:-0123456789abcdef}
+      RW_LICENSE_KEY: ${RW_LICENSE_KEY:-""}
     container_name: risingwave-standalone
     healthcheck:
       test:
diff --git a/docs/dev/book.toml b/docs/dev/book.toml
index 77608409452b0..3479e428351a5 100644
--- a/docs/dev/book.toml
+++ b/docs/dev/book.toml
@@ -11,5 +11,6 @@ smart-punctuation = true
 git-repository-url = "https://github.com/risingwavelabs/risingwave/tree/main/docs/dev/src"
 edit-url-template = "https://github.com/risingwavelabs/risingwave/edit/main/docs/dev/{path}"
 search.use-boolean-and = true
+mathjax-support = true
 
 [output.linkcheck]
diff --git a/docs/dev/src/design/consistent-hash.md b/docs/dev/src/design/consistent-hash.md
index 43c3b973c04bc..551b7f4c9633b 100644
--- a/docs/dev/src/design/consistent-hash.md
+++ b/docs/dev/src/design/consistent-hash.md
@@ -20,23 +20,23 @@ First, we need to introduce a little about how we schedule the actors. Each work
 
 Here comes the main part, where we will construct a mapping that determines data distribution.
 
-For all data $k \in U_k$, where $U_k$ is an unbounded set, we apply a hash function $v = H(k)$, where $v$ falls to a limited range. The hash function $H$ ensures that all data are hashed **uniformly** to that range. We call $v$ vnode, namely virtual node, as is shown as the squares in the figure below.
+For all data \\( k \in U_k \\), where \\( U_k \\) is an unbounded set, we apply a hash function \\( v = H(k) \\), where \\( v \\) falls to a limited range. The hash function \\( H \\) ensures that all data are hashed **uniformly** to that range. We call \\( v \\) vnode, namely virtual node, as is shown as the squares in the figure below.
 
 ![initial data distribution](../images/consistent-hash/data-distribution.svg)
 
-Then we have vnode mapping, which ensures that vnodes are mapped evenly to parallel units in the cluster. In other words, the number of vnodes that are mapped to each parallel unit should be as close as possible. This is denoted by different colors in the figure above. As is depicted, we have 3 parallel units (shown as circles), each taking $\frac{1}{3}$ of total vnodes. Vnode mapping is [constructed and maintained by meta](https://github.com/risingwavelabs/risingwave/blob/main/src/meta/src/stream/scheduler.rs).
+Then we have vnode mapping, which ensures that vnodes are mapped evenly to parallel units in the cluster. In other words, the number of vnodes that are mapped to each parallel unit should be as close as possible. This is denoted by different colors in the figure above. As is depicted, we have 3 parallel units (shown as circles), each taking \\( \frac{1}{3} \\) of total vnodes. Vnode mapping is [constructed and maintained by meta](https://github.com/risingwavelabs/risingwave/blob/main/src/meta/src/stream/scheduler.rs).
 
-As long as the hash function $H$ could ensure uniformity, the data distribution determined by this strategy would be even across physical resources. The evenness will be retained even if data in $U_k$ are skewed to a certain range, say, most students scoring over 60 in a hundred-mark system.
+As long as the hash function \\( H \\) could ensure uniformity, the data distribution determined by this strategy would be even across physical resources. The evenness will be retained even if data in \\( U_k \\) are skewed to a certain range, say, most students scoring over 60 in a hundred-mark system.
 
 #### Data Redistribution
 
-Since $v = H(k)$, the way that data are mapped to vnodes will be invariant. Therefore, when scaling occurs, we only need to modify vnode mapping (the way that vnodes are mapped to parallel units), so as to redistribute the data.
+Since \\( v = H(k) \\), the way that data are mapped to vnodes will be invariant. Therefore, when scaling occurs, we only need to modify vnode mapping (the way that vnodes are mapped to parallel units), so as to redistribute the data.
 
-Let's take scaling out for example. Assume that we have one more parallel unit after scaling out, as is depicted as the orange circle in the figure below. Using the optimal strategy, we modify the vnode mapping in such a way that only $\frac{1}{4}$ of the data have to be moved, as is shown in the figure below. The vnodes whose data are required to be moved are highlighted with bold border in the figure.
+Let's take scaling out for example. Assume that we have one more parallel unit after scaling out, as is depicted as the orange circle in the figure below. Using the optimal strategy, we modify the vnode mapping in such a way that only \\( \frac{1}{4} \\) of the data have to be moved, as is shown in the figure below. The vnodes whose data are required to be moved are highlighted with bold border in the figure.
 
 ![optimal data redistribution](../images/consistent-hash/data-redistribution-1.svg)
 
-To minimize data movement when scaling occurs, we should be careful when we modify the vnode mapping. Below is an opposite example. Modifying vnode mapping like this will result in $\frac{1}{2}$ of the data being moved.
+To minimize data movement when scaling occurs, we should be careful when we modify the vnode mapping. Below is an opposite example. Modifying vnode mapping like this will result in \\( \frac{1}{2} \\) of the data being moved.
 
 ![worst data redistribution](../images/consistent-hash/data-redistribution-2.svg)
 
@@ -49,9 +49,9 @@ We know that a fragment has several actors as its different parallelisms, and th
 In the figure, we can see that one upstream actor dispatches data to three downstream actors. The downstream actors are scheduled on the parallel units mentioned in previous example respectively.
 
 Based on our consistent hash design, the dispatcher is informed of the latest vnode mapping by meta node. It then decides how to send data by following steps:
-1. Compute vnode of the data via the hash function $H$. Let the vnode be $v_k$.
-2. Look up vnode mapping and find out parallel unit $p_n$ that vnode $v_k$ maps to.
-3. Send data to the downstream actor that is scheduled on parallel unit $p_n$ (remember that one actor will be scheduled on exactly one parallel unit).
+1. Compute vnode of the data via the hash function \\( H \\). Let the vnode be \\( v_k \\).
+2. Look up vnode mapping and find out parallel unit \\( p_n \\) that vnode \\( v_k \\) maps to.
+3. Send data to the downstream actor that is scheduled on parallel unit \\( p_n \\) (remember that one actor will be scheduled on exactly one parallel unit).
 
 In this way, all actors' data (i.e. actors' states) will be distributed according to the vnode mapping constructed by meta.
 
@@ -78,7 +78,7 @@ We know that [Hummock](./state-store-overview.md#overview), our LSM-Tree-based s
 ```
 table_id | vnode | ...
 ```
-where `table_id` denotes the [state table](relational-table.md), and `vnode` is computed via $H$ on key of the data.
+where `table_id` denotes the [state table](relational-table.md), and `vnode` is computed via \\( H \\) on key of the data.
 
 To illustrate this, let's revisit the [previous example](#streaming). Executors of an operator will share the same logical state table, just as is shown in the figure below:
 
diff --git a/docs/dev/src/images/architecture_20240814.png b/docs/dev/src/images/architecture_20240814.png
new file mode 100644
index 0000000000000..9d90e7bd86555
Binary files /dev/null and b/docs/dev/src/images/architecture_20240814.png differ
diff --git a/e2e_test/batch/basic/func.slt.part b/e2e_test/batch/basic/func.slt.part
index ebcce5ceb6a47..b5c68f86d6236 100644
--- a/e2e_test/batch/basic/func.slt.part
+++ b/e2e_test/batch/basic/func.slt.part
@@ -317,6 +317,11 @@ select count(current_database());
 ----
 1
 
+query I
+select count(current_catalog);
+----
+1
+
 query T
 select regexp_match('abc', 'bc');
 ----
diff --git a/e2e_test/batch/distribution_mode.slt b/e2e_test/batch/distribution_mode.slt
index c125000a3acd4..b680796277c13 100644
--- a/e2e_test/batch/distribution_mode.slt
+++ b/e2e_test/batch/distribution_mode.slt
@@ -4,7 +4,7 @@ SET RW_IMPLICIT_FLUSH TO true;
 statement ok
 SET QUERY_MODE TO distributed;
 
-include ./basic/*.slt.part
+include ./basic/**/*.slt.part
 include ./duckdb/all.slt.part
 include ./order/*.slt.part
 include ./join/*.slt.part
diff --git a/e2e_test/batch/local_mode.slt b/e2e_test/batch/local_mode.slt
index c3818989443a7..68df9f0d91950 100644
--- a/e2e_test/batch/local_mode.slt
+++ b/e2e_test/batch/local_mode.slt
@@ -4,7 +4,7 @@ SET RW_IMPLICIT_FLUSH TO true;
 statement ok
 SET QUERY_MODE TO local;
 
-include ./basic/*.slt.part
+include ./basic/**/*.slt.part
 include ./duckdb/all.slt.part
 include ./order/*.slt.part
 include ./join/*.slt.part
diff --git a/e2e_test/batch/types/list.slt.part b/e2e_test/batch/types/list.slt.part
deleted file mode 100644
index 031a466a5a3b2..0000000000000
--- a/e2e_test/batch/types/list.slt.part
+++ /dev/null
@@ -1,2 +0,0 @@
-# Test cases for list don't work for now as the parser cannot recognize the cast expression.
-# include list/*.slt.part
diff --git a/e2e_test/batch/types/map.slt.part b/e2e_test/batch/types/map.slt.part
new file mode 100644
index 0000000000000..bcdc92103e936
--- /dev/null
+++ b/e2e_test/batch/types/map.slt.part
@@ -0,0 +1,126 @@
+statement ok
+SET RW_IMPLICIT_FLUSH TO true;
+
+
+statement error
+create table t (m map (float, float));
+----
+db error: ERROR: Failed to run the query
+
+Caused by:
+  invalid map key type: double precision
+
+
+query error
+select map_from_entries(array[1.0,2.0,3.0], array[1,2,3]);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: Failed to bind expression: map_from_entries(ARRAY[1.0, 2.0, 3.0], ARRAY[1, 2, 3])
+  2: Expr error
+  3: invalid map key type: numeric
+
+
+query error
+select map_from_entries(array[1,1,3], array[1,2,3]);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: Expr error
+  2: error while evaluating expression `map('{1,1,3}', '{1,2,3}')`
+  3: map keys must be unique
+
+
+query ?
+select map_from_entries(array[1,2,3], array[1,null,3]);
+----
+{"1":1,"2":NULL,"3":3}
+
+
+query error
+select map_from_entries(array[1,null,3], array[1,2,3]);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: Expr error
+  2: error while evaluating expression `map('{1,NULL,3}', '{1,2,3}')`
+  3: map keys must not be NULL
+
+
+query error
+select map_from_entries(array[1,3], array[1,2,3]);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: Expr error
+  2: error while evaluating expression `map('{1,3}', '{1,2,3}')`
+  3: map keys and values have different length
+
+
+query error
+select map_from_entries(array[1,2], array[1,2]) = map_from_entries(array[2,1], array[2,1]);
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: Failed to bind expression: map_from_entries(ARRAY[1, 2], ARRAY[1, 2]) = map_from_entries(ARRAY[2, 1], ARRAY[2, 1])
+  2: function equal(map(integer,integer), map(integer,integer)) does not exist
+
+
+statement ok
+create table t (
+    m1 map(varchar, float),
+    m2 map(int, bool),
+    m3 map(varchar, map(varchar, varchar)),
+    l map(varchar,int)[],
+    s struct<m map(varchar, struct<x int>)>,
+);
+
+
+statement ok
+insert into t values (
+    map_from_entries(array['a','b','c'], array[1.0,2.0,3.0]::float[]),
+    map_from_entries(array[1,2,3], array[true,false,true]),
+    map_from_entries(array['a','b'],
+        array[
+            map_from_entries(array['a1'], array['a2']),
+            map_from_entries(array['b1'], array['b2'])
+        ]
+    ),
+    array[
+        map_from_entries(array['a','b','c'], array[1,2,3]),
+        map_from_entries(array['d','e','f'], array[4,5,6])
+    ],
+    row(
+        map_from_entries(array['a','b','c'], array[row(1),row(2),row(3)]::struct<x int>[])
+    )
+);
+
+# cast(map(character varying,integer)) -> map(character varying,double precision)
+query ?
+select map_from_entries(array['a','b','c'], array[1,2,3])::map(varchar,float);
+----
+{"a":1,"b":2,"c":3}
+
+
+statement ok
+insert into t(m1) values (map_from_entries(array['a','b','c'], array[1,2,3]));
+
+query ????? rowsort
+select * from t;
+----
+{"a":1,"b":2,"c":3}	NULL	NULL	NULL	NULL
+{"a":1,"b":2,"c":3}	{"1":t,"2":f,"3":t}	{"a":{"a1":a2},"b":{"b1":b2}}	{"{\"a\":1,\"b\":2,\"c\":3}","{\"d\":4,\"e\":5,\"f\":6}"}	("{""a"":(1),""b"":(2),""c"":(3)}")
+
+query ????? rowsort
+select to_jsonb(m1), to_jsonb(m2), to_jsonb(m3), to_jsonb(l), to_jsonb(s) from t;
+----
+{"a": 1.0, "b": 2.0, "c": 3.0}	null	null	null	null
+{"a": 1.0, "b": 2.0, "c": 3.0}	{"1": true, "2": false, "3": true}	{"a": {"a1": "a2"}, "b": {"b1": "b2"}}	[{"a": 1, "b": 2, "c": 3}, {"d": 4, "e": 5, "f": 6}]	{"m": {"a": {"x": 1}, "b": {"x": 2}, "c": {"x": 3}}}
+
+statement ok
+drop table t;
diff --git a/e2e_test/batch/types/struct.slt.part b/e2e_test/batch/types/struct.slt.part
deleted file mode 100644
index 396881000d48a..0000000000000
--- a/e2e_test/batch/types/struct.slt.part
+++ /dev/null
@@ -1 +0,0 @@
-include struct/*.slt.part
diff --git a/e2e_test/s3/fs_source_batch.py b/e2e_test/s3/fs_source_batch.py
index d606be36f37f0..9f8da63533a68 100644
--- a/e2e_test/s3/fs_source_batch.py
+++ b/e2e_test/s3/fs_source_batch.py
@@ -109,6 +109,59 @@ def _assert_eq(field, got, expect):
     cur.close()
     conn.close()
 
+def test_empty_source(config, prefix, fmt):
+    conn = psycopg2.connect(
+        host="localhost",
+        port="4566",
+        user="root",
+        database="dev"
+    )
+
+    # Open a cursor to execute SQL statements
+    cur = conn.cursor()
+
+    def _source():
+        return f's3_test_empty_{fmt}'
+
+    def _encode():
+        if fmt == 'json':
+            return 'JSON'
+        else:
+            return f"CSV (delimiter = ',', without_header = {str('without' in fmt).lower()})"
+
+    # Execute a SELECT statement
+    cur.execute(f'''CREATE SOURCE {_source()}(
+        id int,
+        name TEXT,
+        sex int,
+        mark int,
+    ) WITH (
+        connector = 's3_v2',
+        match_pattern = '{prefix}*.{fmt}',
+        s3.region_name = '{config['S3_REGION']}',
+        s3.bucket_name = '{config['S3_BUCKET']}',
+        s3.credentials.access = '{config['S3_ACCESS_KEY']}',
+        s3.credentials.secret = '{config['S3_SECRET_KEY']}',
+        s3.endpoint_url = 'https://{config['S3_ENDPOINT']}'
+    ) FORMAT PLAIN ENCODE {_encode()};''')
+
+    stmt = f'select count(*), sum(id), sum(sex), sum(mark) from {_source()}'
+    print(f'Execute {stmt}')
+    cur.execute(stmt)
+    result = cur.fetchone()
+
+    print('Got:', result)
+
+    def _assert_eq(field, got, expect):
+        assert got == expect, f'{field} assertion failed: got {got}, expect {expect}.'
+
+    _assert_eq('count(*)', result[0], 0)
+
+    print('Empty source test pass')
+
+    cur.execute(f'drop source {_source()}')
+    cur.close()
+    conn.close()
 
 if __name__ == "__main__":
     FILE_NUM = 4001
@@ -153,3 +206,5 @@ def _assert_eq(field, got, expect):
     # clean up s3 files
     for idx, _ in enumerate(formatted_files):
         client.remove_object(config["S3_BUCKET"], _s3(idx))
+
+    test_empty_source(config, run_id, fmt)
\ No newline at end of file
diff --git a/e2e_test/sink/kafka/protobuf.slt b/e2e_test/sink/kafka/protobuf.slt
index 61a91435567da..5f032ba32f8dc 100644
--- a/e2e_test/sink/kafka/protobuf.slt
+++ b/e2e_test/sink/kafka/protobuf.slt
@@ -28,6 +28,18 @@ format plain encode protobuf (
   schema.registry = 'http://schemaregistry:8082',
   message = 'test.package.MessageH.MessageI');
 
+system ok
+rpk topic create test-rw-sink-upsert-protobuf
+
+statement ok
+create table from_kafka_raw (kafka_value bytea)
+include key as kafka_key
+with (
+  connector = 'kafka',
+  topic = 'test-rw-sink-upsert-protobuf',
+  properties.bootstrap.server = 'message_queue:29092')
+format plain encode bytes;
+
 statement ok
 create table into_kafka (
   bool_field bool,
@@ -84,6 +96,40 @@ format plain encode protobuf (
   schema.registry = 'http://schemaregistry:8082',
   message = 'test.package.MessageH.MessageI');
 
+statement error
+create sink sink_upsert from into_kafka with (
+  connector = 'kafka',
+  topic = 'test-rw-sink-upsert-protobuf',
+  properties.bootstrap.server = 'message_queue:29092',
+  primary_key = 'string_field')
+format upsert encode protobuf (
+  schema.location = 'file:///risingwave/proto-recursive',
+  message = 'recursive.AllTypes');
+----
+db error: ERROR: Failed to run the query
+
+Caused by these errors (recent errors listed first):
+  1: gRPC request to meta service failed: Internal error
+  2: failed to validate sink
+  3: config error
+  4: sink format/encode/key_encode unsupported: Upsert Protobuf None
+
+
+statement ok
+create sink sink_upsert from into_kafka with (
+  connector = 'kafka',
+  topic = 'test-rw-sink-upsert-protobuf',
+  properties.bootstrap.server = 'message_queue:29092',
+  primary_key = 'string_field')
+format upsert encode protobuf (
+  schema.location = 'file:///risingwave/proto-recursive',
+  message = 'recursive.AllTypes')
+key encode text;
+
+# Shall be ignored by force_append_only sinks but processed by upsert sinks.
+statement ok
+delete from into_kafka where bool_field;
+
 sleep 2s
 
 query TTTRRIIIIIITTTI
@@ -119,6 +165,11 @@ select field_i from from_kafka_csr_nested order by 1;
 13
 24
 
+query T
+select convert_from(kafka_key, 'utf-8') from from_kafka_raw where kafka_value is null;
+----
+Rising
+
 statement error No such file
 create sink sink_err from into_kafka with (
   connector = 'kafka',
@@ -150,16 +201,19 @@ format plain encode protobuf (
   message = 'recursive.AllTypes');
 
 statement ok
-drop sink sink_csr_nested;
+drop table from_kafka cascade;
 
 statement ok
-drop sink sink_csr_trivial;
+drop table from_kafka_csr_trivial cascade;
 
 statement ok
-drop sink sink0;
+drop table from_kafka_csr_nested cascade;
 
 statement ok
-drop table into_kafka;
+drop table from_kafka_raw cascade;
 
 statement ok
-drop table from_kafka;
+drop table into_kafka cascade;
+
+system ok
+rpk topic delete test-rw-sink-upsert-protobuf
diff --git a/e2e_test/source/cdc_inline/alter/cdc_backfill_rate_limit.slt b/e2e_test/source/cdc_inline/alter/cdc_backfill_rate_limit.slt
new file mode 100644
index 0000000000000..ffc0fdfea102b
--- /dev/null
+++ b/e2e_test/source/cdc_inline/alter/cdc_backfill_rate_limit.slt
@@ -0,0 +1,78 @@
+control substitution on
+
+# mysql env vars will be read from the `.risingwave/config/risedev-env` file
+
+system ok
+mysql -e "
+    SET GLOBAL time_zone = '+00:00';
+"
+
+system ok
+mysql -e "
+  DROP DATABASE IF EXISTS testdb2;
+  CREATE DATABASE testdb2;
+  USE testdb2;
+  CREATE TABLE orders (
+      order_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
+      order_date DATETIME NOT NULL,
+      customer_name VARCHAR(255) NOT NULL,
+      price DECIMAL(10, 5) NOT NULL,
+      product_id INTEGER NOT NULL,
+      order_status BOOLEAN NOT NULL
+  ) AUTO_INCREMENT = 10001;
+  INSERT INTO orders
+  VALUES (default, '2020-07-30 10:08:22', 'Jark', 50.50, 102, false),
+         (default, '2020-07-30 10:11:09', 'Sally', 15.00, 105, false),
+         (default, '2020-07-30 12:00:30', 'Edward', 25.25, 106, false);
+"
+
+statement ok
+create source mysql_source with (
+  connector = 'mysql-cdc',
+  hostname = '${MYSQL_HOST}',
+  port = '${MYSQL_TCP_PORT}',
+  username = 'root',
+  password = '${MYSQL_PWD}',
+  database.name = 'testdb2',
+  server.id = '5185'
+);
+
+# backfill rate limit to zero
+statement ok
+set backfill_rate_limit=0;
+
+statement ok
+create table my_orders (
+   order_id int,
+   order_date timestamp,
+   customer_name string,
+   price decimal,
+   product_id int,
+   order_status smallint,
+   PRIMARY KEY (order_id)
+) from mysql_source table 'testdb2.orders';
+
+sleep 3s
+
+query I
+select count(*) from my_orders;
+----
+0
+
+# alter rate limit
+statement ok
+ALTER TABLE my_orders SET backfill_rate_limit = 1000;
+
+# wait alter ddl
+sleep 3s
+
+query I
+select count(*) from my_orders;
+----
+3
+
+statement ok
+drop table my_orders;
+
+statement ok
+drop source mysql_source cascade;
diff --git a/e2e_test/source/cdc_inline/sql_server_cdc/sql_server_cdc.slt b/e2e_test/source/cdc_inline/sql_server_cdc/sql_server_cdc.slt
index ec62c3d08adf4..57275043da202 100644
--- a/e2e_test/source/cdc_inline/sql_server_cdc/sql_server_cdc.slt
+++ b/e2e_test/source/cdc_inline/sql_server_cdc/sql_server_cdc.slt
@@ -5,9 +5,18 @@ control substitution on
 system ok
 sqlcmd -C -d master -Q 'create database mydb;' -b
 
+system ok
+sqlcmd -C -d master -Q 'create database UpperDB COLLATE SQL_Latin1_General_CP1_CS_AS;' -b
+
 system ok
 sqlcmd -C -i e2e_test/source/cdc_inline/sql_server_cdc/sql_server_cdc_prepare.sql -b
 
+system ok
+sqlcmd -C -d UpperDB -Q "CREATE SCHEMA UpperSchema;" -b
+
+system ok
+sqlcmd -C -d UpperDB -Q "EXEC sys.sp_cdc_enable_db; CREATE TABLE UpperSchema.UpperTable (ID INT PRIMARY KEY, Name VARCHAR(100)); EXEC sys.sp_cdc_enable_table @source_schema = 'UpperSchema', @source_name = 'UpperTable', @role_name = NULL; INSERT INTO UpperSchema.UpperTable VALUES (1, 'Alice');" -b
+
 # ------------ validate stage ------------
 
 # invalid address, comment this test out because it takes long to wait for TCP connection timeout.
@@ -114,6 +123,17 @@ CREATE TABLE sqlserver_all_data_types (
   database.name = '${SQLCMDDBNAME}',
 );
 
+# invalid dbname
+statement error does not match db_name
+CREATE SOURCE upper_mssql_source WITH (
+    connector = 'sqlserver-cdc',
+    hostname = '${SQLCMDSERVER:sqlserver-server}',
+    port = '${SQLCMDPORT:1433}',
+    username = '${SQLCMDUSER:SA}',
+    password = '${SQLCMDPASSWORD}',
+    database.name = 'upperdb',
+);
+
 # ------------ Create source/table/mv stage ------------
 # create a cdc source job, which format fixed to `FORMAT PLAIN ENCODE JSON`
 statement ok
@@ -126,6 +146,16 @@ CREATE SOURCE mssql_source WITH (
     database.name = '${SQLCMDDBNAME}',
 );
 
+statement ok
+CREATE SOURCE upper_mssql_source WITH (
+    connector = 'sqlserver-cdc',
+    hostname = '${SQLCMDSERVER:sqlserver-server}',
+    port = '${SQLCMDPORT:1433}',
+    username = '${SQLCMDUSER:SA}',
+    password = '${SQLCMDPASSWORD}',
+    database.name = 'UpperDB',
+);
+
 statement error Should not create MATERIALIZED VIEW or SELECT directly on shared CDC source
 create materialized view mv as select * from mssql_source;
 
@@ -250,6 +280,34 @@ CREATE TABLE shared_sqlserver_all_data_types (
     PRIMARY KEY (id)
 ) from mssql_source table 'dbo.sqlserver_all_data_types';
 
+statement error Sql Server table 'UpperSchema'.'UpperTable' doesn't exist in 'mydb'
+CREATE TABLE upper_table (
+    "ID" INT,
+    "Name" VARCHAR,
+    PRIMARY KEY ("ID")
+)  from mssql_source table 'UpperSchema.UpperTable';
+
+statement error Column 'name' not found in the upstream database
+CREATE TABLE upper_table (
+    "ID" INT,
+    name VARCHAR,
+    PRIMARY KEY ("ID")
+)  from upper_mssql_source table 'UpperSchema.UpperTable';
+
+statement error Sql Server table 'upperSchema'.'upperTable' doesn't exist in 'UpperDB'
+CREATE TABLE upper_table (
+    "ID" INT,
+    "Name" VARCHAR,
+    PRIMARY KEY ("ID")
+)  from upper_mssql_source table 'upperSchema.upperTable';
+
+statement ok
+CREATE TABLE upper_table (
+    "ID" INT,
+    "Name" VARCHAR,
+    PRIMARY KEY ("ID")
+)  from upper_mssql_source table 'UpperSchema.UpperTable';
+
 statement ok
 create materialized view shared_orders_cnt as select count(*) as cnt from shared_orders;
 
@@ -307,6 +365,9 @@ SELECT * from shared_sqlserver_all_data_types order by id;
 system ok
 sqlcmd -C -i e2e_test/source/cdc_inline/sql_server_cdc/sql_server_cdc_insert.sql -b
 
+system ok
+sqlcmd -C -d UpperDB -Q "INSERT INTO UpperSchema.UpperTable VALUES (11, 'Alice');" -b
+
 sleep 10s
 
 # ------------ recover cluster ------------
@@ -332,7 +393,6 @@ select cnt from shared_sqlserver_all_data_types_cnt;
 ----
 6
 
-
 query III
 select * from shared_orders order by order_id;
 ----
@@ -359,6 +419,15 @@ SELECT * from shared_sqlserver_all_data_types order by id;
 12 t 255 -32768 -2147483648 -9223372036854775808 -10 -10000 -10000 aa \xff 1990-01-01 13:59:59.123 2000-01-01 11:00:00.123 1990-01-01 00:00:01.123+00:00
 13 t 127 32767 2147483647 9223372036854775807 -10 10000 10000 zzzz \xffffffff 2999-12-31 23:59:59.999 2099-12-31 23:59:59.999 2999-12-31 23:59:59.999+00:00
 
+query TT
+SELECT * from upper_table order by "ID";
+----
+1  Alice
+11 Alice
+
 # ------------ drop stage ------------
+statement ok
+drop source upper_mssql_source cascade;
+
 statement ok
 drop source mssql_source cascade;
diff --git a/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateful_agg.slt b/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateful_agg.slt
new file mode 100644
index 0000000000000..012b1ffffb762
--- /dev/null
+++ b/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateful_agg.slt
@@ -0,0 +1,80 @@
+# Single phase approx percentile
+statement ok
+create table t(p_col double, grp_col int);
+
+statement ok
+insert into t select a, 1 from generate_series(-1000, 1000) t(a);
+
+statement ok
+flush;
+
+query I
+select
+  percentile_cont(0.01) within group (order by p_col) as p01,
+  min(p_col),
+  percentile_cont(0.5) within group (order by p_col) as p50,
+  count(*),
+  percentile_cont(0.99) within group (order by p_col) as p99
+from t;
+----
+-980 -1000 0 2001 980
+
+statement ok
+create materialized view m1 as
+ select
+     approx_percentile(0.01, 0.01) within group (order by p_col) as p01,
+     min(p_col),
+     approx_percentile(0.5, 0.01) within group (order by p_col) as p50,
+     count(*),
+     approx_percentile(0.99, 0.01) within group (order by p_col) as p99
+ from t;
+
+query I
+select * from m1;
+----
+-982.5779489474152 -1000 0 2001 982.5779489474152
+
+# Test state encode / decode
+onlyif can-use-recover
+statement ok
+recover;
+
+onlyif can-use-recover
+sleep 10s
+
+query I
+select * from m1;
+----
+-982.5779489474152 -1000 0 2001 982.5779489474152
+
+# Test 0<x<1 values
+statement ok
+insert into t select 0.001, 1 from generate_series(1, 500);
+
+statement ok
+insert into t select 0.0001, 1 from generate_series(1, 501);
+
+statement ok
+flush;
+
+query I
+select * from m1;
+----
+-963.1209598593477 -1000 0.00009999833511933609 3002 963.1209598593477
+
+query I
+select
+  percentile_cont(0.01) within group (order by p_col) as p01,
+  min(p_col),
+  percentile_cont(0.5) within group (order by p_col) as p50,
+  count(*),
+  percentile_cont(0.99) within group (order by p_col) as p99
+from t;
+----
+-969.99 -1000 0.0001 3002 969.9899999999998
+
+statement ok
+drop materialized view m1;
+
+statement ok
+drop table t;
\ No newline at end of file
diff --git a/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_normal_agg.slt b/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateless_agg.slt
similarity index 90%
rename from e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_normal_agg.slt
rename to e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateless_agg.slt
index 959e3684a9fd2..fd54f7200f958 100644
--- a/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_normal_agg.slt
+++ b/e2e_test/streaming/aggregate/two_phase_approx_percentile_merge_stateless_agg.slt
@@ -47,19 +47,6 @@ select * from m1;
 ----
 -982.5779489474152 0 0 2001 982.5779489474152
 
-# Test state encode / decode
-onlyif can-use-recover
-statement ok
-recover;
-
-onlyif can-use-recover
-sleep 10s
-
-query I
-select * from m1;
-----
--982.5779489474152 0 0 2001 982.5779489474152
-
 # Test 0<x<1 values
 statement ok
 insert into t select 0.001, 1 from generate_series(1, 500);
diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/SqlServerValidator.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/SqlServerValidator.java
index 1c0e8176add74..e2b58fad4195f 100644
--- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/SqlServerValidator.java
+++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/SqlServerValidator.java
@@ -73,7 +73,7 @@ public void validateDbConfig() {
                     throw ValidatorUtils.invalidArgument(
                             "Sql Server's DB_NAME() '"
                                     + res.getString(1)
-                                    + "' does not match db_name'"
+                                    + "' does not match db_name '"
                                     + dbName
                                     + "'.");
                 }
@@ -142,8 +142,8 @@ private void validateTableSchema() throws SQLException {
                 if (res.getInt(1) == 0) {
                     throw ValidatorUtils.invalidArgument(
                             String.format(
-                                    "Sql Server table '%s'.'%s' doesn't exist",
-                                    schemaName, tableName));
+                                    "Sql Server table '%s'.'%s' doesn't exist in '%s'",
+                                    schemaName, tableName, dbName));
                 }
             }
         }
@@ -182,7 +182,20 @@ private void validateTableSchema() throws SQLException {
                 throw ValidatorUtils.invalidArgument("Primary key mismatch");
             }
         }
-
+        // Check whether the db is case-sensitive
+        boolean isCaseSensitive = false;
+        try (var stmt =
+                jdbcConnection.prepareStatement(
+                        ValidatorUtils.getSql("sqlserver.case.sensitive"))) {
+            stmt.setString(1, this.dbName);
+            var res = stmt.executeQuery();
+            while (res.next()) {
+                var caseSensitive = res.getInt(2);
+                if (caseSensitive == 1) {
+                    isCaseSensitive = true;
+                }
+            }
+        }
         // Check whether source schema match table schema on upstream
         // All columns defined must exist in upstream database
         try (var stmt =
@@ -196,7 +209,11 @@ private void validateTableSchema() throws SQLException {
             while (res.next()) {
                 var field = res.getString(1);
                 var dataType = res.getString(2);
-                schema.put(field.toLowerCase(), dataType);
+                if (isCaseSensitive) {
+                    schema.put(field, dataType);
+                } else {
+                    schema.put(field.toLowerCase(), dataType);
+                }
             }
 
             for (var e : tableSchema.getColumnTypes().entrySet()) {
@@ -204,7 +221,7 @@ private void validateTableSchema() throws SQLException {
                 if (e.getKey().startsWith(ValidatorUtils.INTERNAL_COLUMN_PREFIX)) {
                     continue;
                 }
-                var dataType = schema.get(e.getKey().toLowerCase());
+                var dataType = schema.get(isCaseSensitive ? e.getKey() : e.getKey().toLowerCase());
                 if (dataType == null) {
                     throw ValidatorUtils.invalidArgument(
                             "Column '" + e.getKey() + "' not found in the upstream database");
diff --git a/java/connector-node/risingwave-connector-service/src/main/resources/validate_sql.properties b/java/connector-node/risingwave-connector-service/src/main/resources/validate_sql.properties
index 54214b6b46190..cb092acb5a26a 100644
--- a/java/connector-node/risingwave-connector-service/src/main/resources/validate_sql.properties
+++ b/java/connector-node/risingwave-connector-service/src/main/resources/validate_sql.properties
@@ -38,11 +38,12 @@ GROUP BY r1.rolname \
 tmp AS (SELECT DISTINCT(UNNEST(m)) AS members FROM base) \
 SELECT ARRAY_AGG(members) AS members FROM tmp
 sqlserver.db.cdc.enabled=SELECT name, is_cdc_enabled FROM sys.databases WHERE name = DB_NAME()
-sqlserver.table=SELECT count(*) FROM information_schema.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_NAME = ?
+sqlserver.table=SELECT count(*) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = ? AND TABLE_NAME = ?
 sqlserver.table.cdc.enabled=SELECT COUNT(*) FROM cdc.change_tables AS ct INNER JOIN sys.tables AS t ON ct.source_object_id = t.object_id INNER JOIN sys.schemas AS s ON t.schema_id = s.schema_id WHERE s.name = ? AND t.name = ?
-sqlserver.pk=SELECT k.column_name FROM information_schema.table_constraints t INNER JOIN information_schema.key_column_usage k ON t.constraint_name = k.constraint_name AND t.table_name = k.table_name WHERE t.constraint_type = 'PRIMARY KEY' AND t.table_schema = ? AND t.table_name = ?
+sqlserver.pk=SELECT k.column_name FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS t INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE k ON t.constraint_name = k.constraint_name AND t.table_name = k.table_name WHERE t.constraint_type = 'PRIMARY KEY' AND t.table_schema = ? AND t.table_name = ?
 sqlserver.table_schema=SELECT COLUMN_NAME, DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = ? AND TABLE_NAME = ? ORDER BY ORDINAL_POSITION
 sqlserver.has.perms=SELECT HAS_PERMS_BY_NAME('cdc.' + ct.capture_instance + '_CT', 'OBJECT', 'SELECT') FROM cdc.change_tables AS ct INNER JOIN sys.tables AS t ON ct.source_object_id = t.object_id INNER JOIN sys.schemas AS s ON t.schema_id = s.schema_id WHERE s.name = ? AND t.name = ?
 sqlserver.sql.agent.enabled=SELECT sys.fn_cdc_get_max_lsn()
+sqlserver.case.sensitive=WITH collations AS (SELECT name, CASE WHEN description like '%case-insensitive%' THEN 0 WHEN description like '%case-sensitive%' THEN 1 END isCaseSensitive FROM sys.fn_helpcollations()) SELECT * FROM collations WHERE name = CONVERT(varchar, DATABASEPROPERTYEX( ? ,'collation'));
 citus.distributed_table=select citus_table_type from citus_tables where table_name=?::regclass
 postgres.rds.role.check=SELECT r.rolname, r.rolsuper, r.rolinherit, r.rolcreaterole, r.rolcreatedb, r.rolcanlogin, r.rolconnlimit, r.rolvaliduntil, ARRAY(SELECT b.rolname FROM pg_catalog.pg_auth_members m JOIN pg_catalog.pg_roles b ON (m.roleid = b.oid) WHERE m.member = r.oid) as memberof , r.rolreplication , r.rolbypassrls FROM pg_catalog.pg_roles r WHERE r.rolname = ?
diff --git a/proto/data.proto b/proto/data.proto
index 06d223d142bf9..9bb15ebcc8d62 100644
--- a/proto/data.proto
+++ b/proto/data.proto
@@ -52,6 +52,7 @@ message DataType {
     JSONB = 18;
     SERIAL = 19;
     INT256 = 20;
+    MAP = 21;
   }
   TypeName type_name = 1;
   // Data length for char.
@@ -102,6 +103,7 @@ enum ArrayType {
   JSONB = 16;
   SERIAL = 17;
   INT256 = 18;
+  MAP = 20;
 }
 
 message Array {
diff --git a/proto/expr.proto b/proto/expr.proto
index 9887505bf61dc..0f543d3514e3b 100644
--- a/proto/expr.proto
+++ b/proto/expr.proto
@@ -283,6 +283,10 @@ message ExprNode {
     JSONB_TO_RECORD = 630;
     JSONB_SET = 631;
 
+    // Map functions
+    MAP_FROM_ENTRIES = 700;
+    MAP_ACCESS = 701;
+
     // Non-pure functions below (> 1000)
     // ------------------------
     // Internal functions
diff --git a/proto/meta.proto b/proto/meta.proto
index 0371b5540a6da..bcb6c331549f2 100644
--- a/proto/meta.proto
+++ b/proto/meta.proto
@@ -273,6 +273,7 @@ enum ThrottleTarget {
   SOURCE = 1;
   MV = 2;
   TABLE_WITH_SOURCE = 3;
+  CDC_TABLE = 4;
 }
 
 message ApplyThrottleRequest {
diff --git a/proto/stream_service.proto b/proto/stream_service.proto
index 08f0ff1e7684f..ef49d4902a14a 100644
--- a/proto/stream_service.proto
+++ b/proto/stream_service.proto
@@ -101,7 +101,7 @@ message WaitEpochCommitResponse {
 
 message StreamingControlStreamRequest {
   message InitRequest {
-    uint64 prev_epoch = 2;
+    uint64 version_id = 1;
   }
 
   message RemovePartialGraphRequest {
diff --git a/risedev.yml b/risedev.yml
index db8f6fe5600e2..3c7f8e0e09be4 100644
--- a/risedev.yml
+++ b/risedev.yml
@@ -477,8 +477,8 @@ profile:
         parallelism: 8
       - use: frontend
       - use: compactor
-      - use: prometheus
-      - use: grafana
+      # - use: prometheus
+      # - use: grafana
       # Do not use kafka here, we will spawn it separately,
       # so we don't have to re-generate data each time.
       # RW will still be ale to talk to it.
diff --git a/src/batch/Cargo.toml b/src/batch/Cargo.toml
index 099ae9019afcf..403eb864229d3 100644
--- a/src/batch/Cargo.toml
+++ b/src/batch/Cargo.toml
@@ -63,7 +63,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "fs",
 ] }
 tokio-metrics = "0.3.0"
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tokio-util = { workspace = true }
 tonic = { workspace = true }
 tracing = "0.1"
diff --git a/src/batch/src/executor/hash_agg.rs b/src/batch/src/executor/hash_agg.rs
index d69d4fbc8b174..00073217f7ead 100644
--- a/src/batch/src/executor/hash_agg.rs
+++ b/src/batch/src/executor/hash_agg.rs
@@ -20,7 +20,6 @@ use bytes::Bytes;
 use futures_async_stream::try_stream;
 use hashbrown::hash_map::Entry;
 use itertools::Itertools;
-use prost::Message;
 use risingwave_common::array::{DataChunk, StreamChunk};
 use risingwave_common::bitmap::Bitmap;
 use risingwave_common::catalog::{Field, Schema};
@@ -35,6 +34,7 @@ use risingwave_expr::aggregate::{AggCall, AggregateState, BoxedAggregateFunction
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::batch_plan::HashAggNode;
 use risingwave_pb::data::DataChunk as PbDataChunk;
+use risingwave_pb::Message;
 
 use crate::error::{BatchError, Result};
 use crate::executor::aggregation::build as build_agg;
diff --git a/src/batch/src/executor/join/distributed_lookup_join.rs b/src/batch/src/executor/join/distributed_lookup_join.rs
index f5ad5ab5ed984..1068ffd7f3349 100644
--- a/src/batch/src/executor/join/distributed_lookup_join.rs
+++ b/src/batch/src/executor/join/distributed_lookup_join.rs
@@ -354,10 +354,7 @@ impl<S: StateStore> LookupExecutorBuilder for InnerSideExecutorBuilder<S> {
         let pk_prefix = OwnedRow::new(scan_range.eq_conds);
 
         if self.lookup_prefix_len == self.table.pk_indices().len() {
-            let row = self
-                .table
-                .get_row(&pk_prefix, self.epoch.clone().into())
-                .await?;
+            let row = self.table.get_row(&pk_prefix, self.epoch.into()).await?;
 
             if let Some(row) = row {
                 self.row_list.push(row);
@@ -366,7 +363,7 @@ impl<S: StateStore> LookupExecutorBuilder for InnerSideExecutorBuilder<S> {
             let iter = self
                 .table
                 .batch_iter_with_pk_bounds(
-                    self.epoch.clone().into(),
+                    self.epoch.into(),
                     &pk_prefix,
                     ..,
                     false,
diff --git a/src/batch/src/executor/join/hash_join.rs b/src/batch/src/executor/join/hash_join.rs
index 026f03fb65deb..3bfb583d6459d 100644
--- a/src/batch/src/executor/join/hash_join.rs
+++ b/src/batch/src/executor/join/hash_join.rs
@@ -20,7 +20,6 @@ use std::sync::Arc;
 use bytes::Bytes;
 use futures_async_stream::try_stream;
 use itertools::Itertools;
-use prost::Message;
 use risingwave_common::array::{Array, DataChunk, RowRef};
 use risingwave_common::bitmap::{Bitmap, BitmapBuilder};
 use risingwave_common::catalog::Schema;
@@ -34,6 +33,7 @@ use risingwave_common_estimate_size::EstimateSize;
 use risingwave_expr::expr::{build_from_prost, BoxedExpression, Expression};
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::data::DataChunk as PbDataChunk;
+use risingwave_pb::Message;
 
 use super::{ChunkedData, JoinType, RowId};
 use crate::error::{BatchError, Result};
diff --git a/src/batch/src/executor/join/local_lookup_join.rs b/src/batch/src/executor/join/local_lookup_join.rs
index 7fcaba71a9c3b..a3be00fc39a22 100644
--- a/src/batch/src/executor/join/local_lookup_join.rs
+++ b/src/batch/src/executor/join/local_lookup_join.rs
@@ -134,7 +134,7 @@ impl<C: BatchTaskContext> InnerSideExecutorBuilder<C> {
                     ..Default::default()
                 }),
             }),
-            epoch: Some(self.epoch.clone()),
+            epoch: Some(self.epoch),
             tracing_context: TracingContext::from_current_span().to_protobuf(),
         };
 
@@ -237,7 +237,7 @@ impl<C: BatchTaskContext> LookupExecutorBuilder for InnerSideExecutorBuilder<C>
             &plan_node,
             &task_id,
             self.context.clone(),
-            self.epoch.clone(),
+            self.epoch,
             self.shutdown_rx.clone(),
         );
 
diff --git a/src/batch/src/executor/mod.rs b/src/batch/src/executor/mod.rs
index 3a64901c64a04..80dc57b4f3620 100644
--- a/src/batch/src/executor/mod.rs
+++ b/src/batch/src/executor/mod.rs
@@ -174,7 +174,7 @@ impl<'a, C: Clone> ExecutorBuilder<'a, C> {
             plan_node,
             self.task_id,
             self.context.clone(),
-            self.epoch.clone(),
+            self.epoch,
             self.shutdown_rx.clone(),
         )
     }
@@ -188,7 +188,7 @@ impl<'a, C: Clone> ExecutorBuilder<'a, C> {
     }
 
     pub fn epoch(&self) -> BatchQueryEpoch {
-        self.epoch.clone()
+        self.epoch
     }
 }
 
diff --git a/src/batch/src/executor/order_by.rs b/src/batch/src/executor/order_by.rs
index 3f8c8e106c78f..ad7cc13992346 100644
--- a/src/batch/src/executor/order_by.rs
+++ b/src/batch/src/executor/order_by.rs
@@ -17,7 +17,6 @@ use std::sync::Arc;
 use bytes::Bytes;
 use futures_async_stream::try_stream;
 use itertools::Itertools;
-use prost::Message;
 use risingwave_common::array::DataChunk;
 use risingwave_common::catalog::Schema;
 use risingwave_common::memory::MemoryContext;
@@ -28,6 +27,7 @@ use risingwave_common::util::sort_util::ColumnOrder;
 use risingwave_common_estimate_size::EstimateSize;
 use risingwave_pb::batch_plan::plan_node::NodeBody;
 use risingwave_pb::data::DataChunk as PbDataChunk;
+use risingwave_pb::Message;
 
 use super::{
     BoxedDataChunkStream, BoxedExecutor, BoxedExecutorBuilder, Executor, ExecutorBuilder,
diff --git a/src/batch/src/executor/row_seq_scan.rs b/src/batch/src/executor/row_seq_scan.rs
index b8287147c6750..b897dbd813787 100644
--- a/src/batch/src/executor/row_seq_scan.rs
+++ b/src/batch/src/executor/row_seq_scan.rs
@@ -237,7 +237,7 @@ impl BoxedExecutorBuilder for RowSeqScanExecutorBuilder {
 
         let ordered = seq_scan_node.ordered;
 
-        let epoch = source.epoch.clone();
+        let epoch = source.epoch;
         let limit = seq_scan_node.limit;
         let as_of = seq_scan_node
             .as_of
@@ -341,8 +341,7 @@ impl<S: StateStore> RowSeqScanExecutor<S> {
         for point_get in point_gets {
             let table = table.clone();
             if let Some(row) =
-                Self::execute_point_get(table, point_get, query_epoch.clone(), histogram.clone())
-                    .await?
+                Self::execute_point_get(table, point_get, query_epoch, histogram.clone()).await?
             {
                 if let Some(chunk) = data_chunk_builder.append_one_row(row) {
                     returned += chunk.cardinality() as u64;
@@ -373,7 +372,7 @@ impl<S: StateStore> RowSeqScanExecutor<S> {
                 table.clone(),
                 range,
                 ordered,
-                query_epoch.clone(),
+                query_epoch,
                 chunk_size,
                 limit,
                 histogram.clone(),
diff --git a/src/batch/src/spill/spill_op.rs b/src/batch/src/spill/spill_op.rs
index 237ee3baf0099..b3e842a269ec7 100644
--- a/src/batch/src/spill/spill_op.rs
+++ b/src/batch/src/spill/spill_op.rs
@@ -22,9 +22,9 @@ use futures_util::AsyncReadExt;
 use opendal::layers::RetryLayer;
 use opendal::services::{Fs, Memory};
 use opendal::Operator;
-use prost::Message;
 use risingwave_common::array::DataChunk;
 use risingwave_pb::data::DataChunk as PbDataChunk;
+use risingwave_pb::Message;
 use thiserror_ext::AsReport;
 use tokio::sync::Mutex;
 use twox_hash::XxHash64;
diff --git a/src/batch/src/task/broadcast_channel.rs b/src/batch/src/task/broadcast_channel.rs
index d66eda7d7d620..9781e38e7d7f6 100644
--- a/src/batch/src/task/broadcast_channel.rs
+++ b/src/batch/src/task/broadcast_channel.rs
@@ -86,7 +86,7 @@ pub fn new_broadcast_channel(
     output_channel_size: usize,
 ) -> (ChanSenderImpl, Vec<ChanReceiverImpl>) {
     let broadcast_info = match shuffle.distribution {
-        Some(exchange_info::Distribution::BroadcastInfo(ref v)) => v.clone(),
+        Some(exchange_info::Distribution::BroadcastInfo(ref v)) => *v,
         _ => BroadcastInfo::default(),
     };
 
diff --git a/src/batch/src/task/task_execution.rs b/src/batch/src/task/task_execution.rs
index 4536dad1c031f..7186ced55febd 100644
--- a/src/batch/src/task/task_execution.rs
+++ b/src/batch/src/task/task_execution.rs
@@ -393,7 +393,7 @@ impl<C: BatchTaskContext> BatchTaskExecution<C> {
                 self.plan.root.as_ref().unwrap(),
                 &self.task_id,
                 self.context.clone(),
-                self.epoch.clone(),
+                self.epoch,
                 self.shutdown_rx.clone(),
             )
             .build(),
diff --git a/src/bench/Cargo.toml b/src/bench/Cargo.toml
index d451ef46ef838..43451ebaeb9d1 100644
--- a/src/bench/Cargo.toml
+++ b/src/bench/Cargo.toml
@@ -50,7 +50,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "time",
     "signal",
 ] }
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 toml = "0.8"
 tracing = "0.1"
 tracing-subscriber = "0.3.17"
diff --git a/src/cmd_all/src/standalone.rs b/src/cmd_all/src/standalone.rs
index ceb890f4cb3af..27c8c40203397 100644
--- a/src/cmd_all/src/standalone.rs
+++ b/src/cmd_all/src/standalone.rs
@@ -467,6 +467,7 @@ mod test {
                             heap_profiling_dir: None,
                             dangerous_max_idle_secs: None,
                             connector_rpc_endpoint: None,
+                            license_key: None,
                             temp_secret_file_dir: "./meta/secrets/",
                         },
                     ),
diff --git a/src/common/Cargo.toml b/src/common/Cargo.toml
index a117dce645ae6..2cc1d81f1a38d 100644
--- a/src/common/Cargo.toml
+++ b/src/common/Cargo.toml
@@ -55,7 +55,7 @@ futures = { version = "0.3", default-features = false, features = ["alloc"] }
 governor = { version = "0.6", default-features = false, features = ["std"] }
 hashbrown = "0.14"
 hex = "0.4.3"
-http = "0.2"
+http = "1"
 humantime = "2.1"
 hytra = { workspace = true }
 itertools = { workspace = true }
diff --git a/src/common/common_service/Cargo.toml b/src/common/common_service/Cargo.toml
index cb43702f3f9e6..87206ab7cbc1d 100644
--- a/src/common/common_service/Cargo.toml
+++ b/src/common/common_service/Cargo.toml
@@ -18,7 +18,7 @@ normal = ["workspace-hack"]
 async-trait = "0.1"
 axum = { workspace = true }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
-hyper = "0.14" # required by tonic
+http = "1"
 prometheus = { version = "0.13" }
 risingwave_common = { workspace = true }
 risingwave_pb = { workspace = true }
diff --git a/src/common/common_service/src/tracing.rs b/src/common/common_service/src/tracing.rs
index 3ee4a64231c29..de6f43bbf33f3 100644
--- a/src/common/common_service/src/tracing.rs
+++ b/src/common/common_service/src/tracing.rs
@@ -15,8 +15,8 @@
 use std::task::{Context, Poll};
 
 use futures::Future;
-use hyper::Body;
 use risingwave_common::util::tracing::TracingContext;
+use tonic::body::BoxBody;
 use tower::{Layer, Service};
 use tracing::Instrument;
 
@@ -49,9 +49,9 @@ pub struct TracingExtract<S> {
     inner: S,
 }
 
-impl<S> Service<hyper::Request<Body>> for TracingExtract<S>
+impl<S> Service<http::Request<BoxBody>> for TracingExtract<S>
 where
-    S: Service<hyper::Request<Body>> + Clone + Send + 'static,
+    S: Service<http::Request<BoxBody>> + Clone + Send + 'static,
     S::Future: Send + 'static,
 {
     type Error = S::Error;
@@ -63,7 +63,7 @@ where
         self.inner.poll_ready(cx)
     }
 
-    fn call(&mut self, req: hyper::Request<Body>) -> Self::Future {
+    fn call(&mut self, req: http::Request<BoxBody>) -> Self::Future {
         // This is necessary because tonic internally uses `tower::buffer::Buffer`.
         // See https://github.com/tower-rs/tower/issues/547#issuecomment-767629149
         // for details on why this is necessary
diff --git a/src/common/metrics/Cargo.toml b/src/common/metrics/Cargo.toml
index 4f3e8b20936b2..0c32b557cebb2 100644
--- a/src/common/metrics/Cargo.toml
+++ b/src/common/metrics/Cargo.toml
@@ -15,12 +15,16 @@ ignored = ["workspace-hack"]
 normal = ["workspace-hack"]
 
 [dependencies]
+auto_impl = "1"
 bytes = "1"
 clap = { workspace = true }
 easy-ext = "1"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
-http = "0.2"
-hyper = { version = "0.14", features = ["client"] }                           # used by tonic
+http = "1"
+http-02 = { package = "http", version = "0.2" }
+hyper = { version = "1" }
+hyper-014 = { package = "hyper", version = "0.14" }
+hyper-util = { version = "0.1", features = ["client-legacy"] }
 hytra = { workspace = true }
 itertools = { workspace = true }
 parking_lot = { workspace = true }
@@ -32,13 +36,13 @@ serde = { version = "1", features = ["derive"] }
 thiserror-ext = { workspace = true }
 tokio = { version = "0.2", package = "madsim-tokio" }
 tonic = { workspace = true }
+tower-layer = "0.3.2"
+tower-service = "0.3.2"
 tracing = "0.1"
 tracing-subscriber = "0.3.17"
 
 [target.'cfg(not(madsim))'.dependencies]
-http-body = "0.4.5"
-tower-layer = "0.3.2"
-tower-service = "0.3.2"
+http-body = "1"
 [target.'cfg(target_os = "linux")'.dependencies]
 procfs = { version = "0.16", default-features = false }
 libc = "0.2"
diff --git a/src/common/metrics/src/monitor/connection.rs b/src/common/metrics/src/monitor/connection.rs
index e5774a3f16d7d..aa7c8c8d4baa3 100644
--- a/src/common/metrics/src/monitor/connection.rs
+++ b/src/common/metrics/src/monitor/connection.rs
@@ -24,10 +24,9 @@ use std::time::Duration;
 
 use futures::FutureExt;
 use http::Uri;
-use hyper::client::connect::dns::{GaiAddrs, GaiFuture, GaiResolver, Name};
-use hyper::client::connect::Connection;
-use hyper::client::HttpConnector;
-use hyper::service::Service;
+use hyper_util::client::legacy::connect::dns::{GaiAddrs, GaiFuture, GaiResolver, Name};
+use hyper_util::client::legacy::connect::{Connected, Connection, HttpConnector};
+use hyper_util::rt::TokioIo;
 use itertools::Itertools;
 use pin_project_lite::pin_project;
 use prometheus::{
@@ -37,11 +36,13 @@ use prometheus::{
 use thiserror_ext::AsReport;
 use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
 use tonic::transport::{Channel, Endpoint};
+use tower_service::Service;
 use tracing::{debug, info, warn};
 
 use crate::monitor::GLOBAL_METRICS_REGISTRY;
 use crate::{register_guarded_int_counter_vec_with_registry, LabelGuardedIntCounterVec};
 
+#[auto_impl::auto_impl(&mut)]
 pub trait MonitorAsyncReadWrite {
     fn on_read(&mut self, _size: usize) {}
     fn on_eof(&mut self) {}
@@ -74,6 +75,14 @@ impl<C, M> MonitoredConnection<C, M> {
         let this = this.project();
         (this.inner, this.monitor)
     }
+
+    /// Delegate async read/write traits between tokio and hyper.
+    fn hyper_tokio_delegate(
+        self: Pin<&mut Self>,
+    ) -> TokioIo<MonitoredConnection<TokioIo<Pin<&mut C>>, &mut M>> {
+        let (inner, monitor) = MonitoredConnection::project_into(self);
+        TokioIo::new(MonitoredConnection::new(TokioIo::new(inner), monitor))
+    }
 }
 
 impl<C: AsyncRead, M: MonitorAsyncReadWrite> AsyncRead for MonitoredConnection<C, M> {
@@ -112,6 +121,16 @@ impl<C: AsyncRead, M: MonitorAsyncReadWrite> AsyncRead for MonitoredConnection<C
     }
 }
 
+impl<C: hyper::rt::Read, M: MonitorAsyncReadWrite> hyper::rt::Read for MonitoredConnection<C, M> {
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: hyper::rt::ReadBufCursor<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        hyper::rt::Read::poll_read(std::pin::pin!(self.hyper_tokio_delegate()), cx, buf)
+    }
+}
+
 impl<C: AsyncWrite, M: MonitorAsyncReadWrite> AsyncWrite for MonitoredConnection<C, M> {
     fn poll_write(
         self: Pin<&mut Self>,
@@ -186,8 +205,41 @@ impl<C: AsyncWrite, M: MonitorAsyncReadWrite> AsyncWrite for MonitoredConnection
     }
 }
 
+impl<C: hyper::rt::Write, M: MonitorAsyncReadWrite> hyper::rt::Write for MonitoredConnection<C, M> {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<Result<usize, std::io::Error>> {
+        hyper::rt::Write::poll_write(std::pin::pin!(self.hyper_tokio_delegate()), cx, buf)
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), std::io::Error>> {
+        hyper::rt::Write::poll_flush(std::pin::pin!(self.hyper_tokio_delegate()), cx)
+    }
+
+    fn poll_shutdown(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        hyper::rt::Write::poll_shutdown(std::pin::pin!(self.hyper_tokio_delegate()), cx)
+    }
+
+    fn is_write_vectored(&self) -> bool {
+        self.inner.is_write_vectored()
+    }
+
+    fn poll_write_vectored(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        bufs: &[std::io::IoSlice<'_>],
+    ) -> Poll<Result<usize, std::io::Error>> {
+        hyper::rt::Write::poll_write_vectored(std::pin::pin!(self.hyper_tokio_delegate()), cx, bufs)
+    }
+}
+
 impl<C: Connection, M> Connection for MonitoredConnection<C, M> {
-    fn connected(&self) -> hyper::client::connect::Connected {
+    fn connected(&self) -> Connected {
         self.inner.connected()
     }
 }
@@ -275,6 +327,58 @@ where
     }
 }
 
+// Compatibility implementation for hyper 0.14 ecosystem.
+// Should be the same as those with imports from `http::Uri` and `hyper_util::client::legacy`.
+// TODO(http-bump): remove this after there is no more dependency on hyper 0.14.
+mod compat {
+    use http_02::Uri;
+    use hyper_014::client::connect::{Connected, Connection};
+
+    use super::*;
+
+    impl<C: Service<Uri>, M: MonitorNewConnection + Clone + 'static> Service<Uri>
+        for MonitoredConnection<C, M>
+    where
+        C::Future: 'static,
+    {
+        type Error = C::Error;
+        type Response = MonitoredConnection<C::Response, M::ConnectionMonitor>;
+
+        type Future = impl Future<Output = Result<Self::Response, Self::Error>> + 'static;
+
+        fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
+            let ret = self.inner.poll_ready(cx);
+            if let Poll::Ready(Err(_)) = &ret {
+                self.monitor.on_err("<poll_ready>".to_string());
+            }
+            ret
+        }
+
+        fn call(&mut self, uri: Uri) -> Self::Future {
+            let endpoint = format!("{:?}", uri.host());
+            let monitor = self.monitor.clone();
+            self.inner
+                .call(uri)
+                .map(move |result: Result<_, _>| match result {
+                    Ok(resp) => Ok(MonitoredConnection::new(
+                        resp,
+                        monitor.new_connection_monitor(endpoint),
+                    )),
+                    Err(e) => {
+                        monitor.on_err(endpoint);
+                        Err(e)
+                    }
+                })
+        }
+    }
+
+    impl<C: Connection, M> Connection for MonitoredConnection<C, M> {
+        fn connected(&self) -> Connected {
+            self.inner.connected()
+        }
+    }
+}
+
 #[derive(Clone)]
 pub struct ConnectionMetrics {
     connection_count: IntGaugeVec,
@@ -534,18 +638,16 @@ impl<L> tonic::transport::server::Router<L> {
         signal: impl Future<Output = ()>,
     ) -> impl Future<Output = ()>
     where
-        L: tower_layer::Layer<tonic::transport::server::Routes>,
-        L::Service: Service<
-                http::request::Request<hyper::Body>,
-                Response = http::response::Response<ResBody>,
-            > + Clone
+        L: tower_layer::Layer<tonic::service::Routes>,
+        L::Service: Service<http::Request<tonic::body::BoxBody>, Response = http::Response<ResBody>>
+            + Clone
             + Send
             + 'static,
-        <<L as tower_layer::Layer<tonic::transport::server::Routes>>::Service as Service<
-            http::request::Request<hyper::Body>,
+        <<L as tower_layer::Layer<tonic::service::Routes>>::Service as Service<
+            http::Request<tonic::body::BoxBody>,
         >>::Future: Send + 'static,
-        <<L as tower_layer::Layer<tonic::transport::server::Routes>>::Service as Service<
-            http::request::Request<hyper::Body>,
+        <<L as tower_layer::Layer<tonic::service::Routes>>::Service as Service<
+            http::Request<tonic::body::BoxBody>,
         >>::Error: Into<Box<dyn std::error::Error + Send + Sync>> + Send,
         ResBody: http_body::Body<Data = bytes::Bytes> + Send + 'static,
         ResBody::Error: Into<Box<dyn std::error::Error + Send + Sync>>,
diff --git a/src/common/src/array/arrow/arrow_impl.rs b/src/common/src/array/arrow/arrow_impl.rs
index f4ca022ffd7fa..7d69b50afed49 100644
--- a/src/common/src/array/arrow/arrow_impl.rs
+++ b/src/common/src/array/arrow/arrow_impl.rs
@@ -42,6 +42,8 @@
 
 use std::fmt::Write;
 
+use arrow_array::cast::AsArray;
+use arrow_array_iceberg::array;
 use arrow_buffer::OffsetBuffer;
 use chrono::{DateTime, NaiveDateTime, NaiveTime};
 use itertools::Itertools;
@@ -113,6 +115,7 @@ pub trait ToArrow {
             ArrayImpl::Serial(array) => self.serial_to_arrow(array),
             ArrayImpl::List(array) => self.list_to_arrow(data_type, array),
             ArrayImpl::Struct(array) => self.struct_to_arrow(data_type, array),
+            ArrayImpl::Map(array) => self.map_to_arrow(data_type, array),
         }?;
         if arrow_array.data_type() != data_type {
             arrow_cast::cast(&arrow_array, data_type).map_err(ArrayError::to_arrow)
@@ -267,6 +270,33 @@ pub trait ToArrow {
         )))
     }
 
+    #[inline]
+    fn map_to_arrow(
+        &self,
+        data_type: &arrow_schema::DataType,
+        array: &MapArray,
+    ) -> Result<arrow_array::ArrayRef, ArrayError> {
+        let arrow_schema::DataType::Map(field, ordered) = data_type else {
+            return Err(ArrayError::to_arrow("Invalid map type"));
+        };
+        if *ordered {
+            return Err(ArrayError::to_arrow("Sorted map is not supported"));
+        }
+        let values = self
+            .struct_to_arrow(field.data_type(), array.as_struct())?
+            .as_struct()
+            .clone();
+        let offsets = OffsetBuffer::new(array.offsets().iter().map(|&o| o as i32).collect());
+        let nulls = (!array.null_bitmap().all()).then(|| array.null_bitmap().into());
+        Ok(Arc::new(arrow_array::MapArray::new(
+            field.clone(),
+            offsets,
+            values,
+            nulls,
+            *ordered,
+        )))
+    }
+
     /// Convert RisingWave data type to Arrow data type.
     ///
     /// This function returns a `Field` instead of `DataType` because some may be converted to
@@ -297,6 +327,7 @@ pub trait ToArrow {
             DataType::Jsonb => return Ok(self.jsonb_type_to_arrow(name)),
             DataType::Struct(fields) => self.struct_type_to_arrow(fields)?,
             DataType::List(datatype) => self.list_type_to_arrow(datatype)?,
+            DataType::Map(datatype) => self.map_type_to_arrow(datatype)?,
         };
         Ok(arrow_schema::Field::new(name, data_type, true))
     }
@@ -413,6 +444,20 @@ pub trait ToArrow {
                 .try_collect::<_, _, ArrayError>()?,
         ))
     }
+
+    #[inline]
+    fn map_type_to_arrow(&self, map_type: &MapType) -> Result<arrow_schema::DataType, ArrayError> {
+        let sorted = false;
+        let list_type = map_type.clone().into_list();
+        Ok(arrow_schema::DataType::Map(
+            Arc::new(arrow_schema::Field::new(
+                "entries",
+                self.list_type_to_arrow(&list_type)?,
+                true,
+            )),
+            sorted,
+        ))
+    }
 }
 
 /// Defines how to convert Arrow arrays to RisingWave arrays.
diff --git a/src/common/src/array/list_array.rs b/src/common/src/array/list_array.rs
index 7fc1fdecee6fe..c30229852c0aa 100644
--- a/src/common/src/array/list_array.rs
+++ b/src/common/src/array/list_array.rs
@@ -56,6 +56,7 @@ impl ArrayBuilder for ListArrayBuilder {
 
     #[cfg(test)]
     fn new(capacity: usize) -> Self {
+        // TODO: deprecate this
         Self::with_type(
             capacity,
             // Default datatype
@@ -249,6 +250,12 @@ impl ListArray {
             array.values.is_empty(),
             "Must have no buffer in a list array"
         );
+        debug_assert!(
+            (array.array_type == PbArrayType::List as i32)
+                || (array.array_type == PbArrayType::Map as i32),
+            "invalid array type for list: {}",
+            array.array_type
+        );
         let bitmap: Bitmap = array.get_null_bitmap()?.into();
         let array_data = array.get_list_array_data()?.to_owned();
         let flatten_len = match array_data.offsets.last() {
@@ -406,15 +413,15 @@ impl ListValue {
     }
 
     pub fn memcmp_deserialize(
-        datatype: &DataType,
+        item_datatype: &DataType,
         deserializer: &mut memcomparable::Deserializer<impl Buf>,
     ) -> memcomparable::Result<Self> {
         let bytes = serde_bytes::ByteBuf::deserialize(deserializer)?;
         let mut inner_deserializer = memcomparable::Deserializer::new(bytes.as_slice());
-        let mut builder = datatype.create_array_builder(0);
+        let mut builder = item_datatype.create_array_builder(0);
         while inner_deserializer.has_remaining() {
             builder.append(memcmp_encoding::deserialize_datum_in_composite(
-                datatype,
+                item_datatype,
                 &mut inner_deserializer,
             )?)
         }
@@ -500,6 +507,7 @@ impl From<ListValue> for ArrayImpl {
     }
 }
 
+/// A slice of an array
 #[derive(Copy, Clone)]
 pub struct ListRef<'a> {
     array: &'a ArrayImpl,
@@ -589,6 +597,24 @@ impl<'a> ListRef<'a> {
             _ => None,
         }
     }
+
+    /// # Panics
+    /// Panics if the list is not a map's internal representation (See [`super::MapArray`]).
+    pub(super) fn as_map_kv(self) -> (ListRef<'a>, ListRef<'a>) {
+        let (k, v) = self.array.as_struct().fields().collect_tuple().unwrap();
+        (
+            ListRef {
+                array: k,
+                start: self.start,
+                end: self.end,
+            },
+            ListRef {
+                array: v,
+                start: self.start,
+                end: self.end,
+            },
+        )
+    }
 }
 
 impl PartialEq for ListRef<'_> {
@@ -650,10 +676,12 @@ impl ToText for ListRef<'_> {
                     && (s.is_empty()
                         || s.to_ascii_lowercase() == "null"
                         || s.contains([
-                            '"', '\\', '{', '}', ',',
+                            '"', '\\', ',',
+                            // whilespace:
                             // PostgreSQL `array_isspace` includes '\x0B' but rust
                             // [`char::is_ascii_whitespace`] does not.
-                            ' ', '\t', '\n', '\r', '\x0B', '\x0C',
+                            ' ', '\t', '\n', '\r', '\x0B', '\x0C', // list-specific:
+                            '{', '}',
                         ]));
                 if need_quote {
                     f(&"\"")?;
diff --git a/src/common/src/array/map_array.rs b/src/common/src/array/map_array.rs
new file mode 100644
index 0000000000000..6e9c819a14638
--- /dev/null
+++ b/src/common/src/array/map_array.rs
@@ -0,0 +1,445 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Ordering;
+use std::fmt::{self, Debug, Display};
+
+use bytes::{Buf, BufMut};
+use itertools::Itertools;
+use risingwave_common_estimate_size::EstimateSize;
+use risingwave_pb::data::{PbArray, PbArrayType};
+use serde::Serializer;
+
+use super::{
+    Array, ArrayBuilder, ArrayImpl, ArrayResult, DatumRef, DefaultOrdered, ListArray,
+    ListArrayBuilder, ListRef, ListValue, MapType, ScalarRef, ScalarRefImpl, StructArray,
+};
+use crate::bitmap::Bitmap;
+use crate::types::{DataType, Scalar, ToText};
+use crate::util::memcmp_encoding;
+
+#[derive(Debug, Clone, EstimateSize)]
+pub struct MapArrayBuilder {
+    inner: ListArrayBuilder,
+}
+
+impl ArrayBuilder for MapArrayBuilder {
+    type ArrayType = MapArray;
+
+    #[cfg(not(test))]
+    fn new(_capacity: usize) -> Self {
+        panic!("please use `MapArrayBuilder::with_type` instead");
+    }
+
+    #[cfg(test)]
+    fn new(capacity: usize) -> Self {
+        Self::with_type(
+            capacity,
+            DataType::Map(MapType::from_kv(DataType::Varchar, DataType::Varchar)),
+        )
+    }
+
+    fn with_type(capacity: usize, ty: DataType) -> Self {
+        let inner = ListArrayBuilder::with_type(capacity, ty.into_map().into_list());
+        Self { inner }
+    }
+
+    fn append_n(&mut self, n: usize, value: Option<MapRef<'_>>) {
+        self.inner.append_n(n, value.map(|v| v.into_inner()));
+    }
+
+    fn append_array(&mut self, other: &MapArray) {
+        self.inner.append_array(&other.inner);
+    }
+
+    fn pop(&mut self) -> Option<()> {
+        self.inner.pop()
+    }
+
+    fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    fn finish(self) -> MapArray {
+        let inner = self.inner.finish();
+        MapArray { inner }
+    }
+}
+
+/// `MapArray` is physically just a `List<Struct<key: K, value: V>>` array, but with some additional restrictions.
+///
+/// Type:
+/// - `key`'s datatype can only be string & integral types. (See [`MapType::check_key_type_valid`].)
+/// - `value` can be any type.
+///
+/// Value (for each map value in the array):
+/// - `key`s are non-null and unique.
+///
+/// - `key`s and `value`s must be of the same length.
+///   For a `MapArray`, it's sliced by the `ListArray`'s offsets, so it essentially means the
+///   `key` and `value` children arrays have the same length.
+///
+/// - The lists are NOT sorted by `key`.
+///
+/// - `Eq` / `Hash` / `Ord` for map:
+///
+///   It's controversial due to the physicial representation is just an unordered list.
+///   In many systems (e.g., `DuckDB` and `ClickHouse`), `{"k1":"v1","k2":"v2"} != {"k2":"v2","k1":"v1"}`.
+///   But the reverse definition might be more intuitive, especially when ingesting Avro/Protobuf data.
+///
+///   To avoid controversy, we wanted to ban all usages and make the implementation `unreachable!()`,
+///   but it's hard since these implementations can be used in different places:
+///   * Explicit in User-facing functions (e.g., comparison operators). These could be avoided completely.
+///   * Implicit in Keys (group by / order by / primary key). These could also be banned, but it's harder.
+///   * Some internal usages. One example is `_row_id`. See <https://github.com/risingwavelabs/risingwave/issues/7981#issuecomment-2257661749>.
+///     It might be solvable, but we are not sure whether it's depended somewhere else.
+///
+///   Considering these, it might be better to still choose a _well-defined_ behavior instead
+///   of using `unreachable`. We should try to have a consistent definition for these operations to minimize possible surprises.
+///   And we could still try our best to ban it to prevent misuse.
+///
+///   Currently we choose the second behavior. i.e., first sort the map by key, then compare/hash.
+///   Note that `Eq` is intuitive, but `Ord` still looks strange. We assume no users really care about
+///   which map is larger, but just provide a implementation to prevent undefined behavior.
+///
+///   See more discussion in <https://github.com/risingwavelabs/risingwave/issues/7981>.
+///
+///
+/// Note that decisions above are not definitive. Just be conservative at the beginning.
+#[derive(Debug, Clone, Eq)]
+pub struct MapArray {
+    pub(super) inner: ListArray,
+}
+
+impl EstimateSize for MapArray {
+    fn estimated_heap_size(&self) -> usize {
+        self.inner.estimated_heap_size()
+    }
+}
+
+impl Array for MapArray {
+    type Builder = MapArrayBuilder;
+    type OwnedItem = MapValue;
+    type RefItem<'a> = MapRef<'a>;
+
+    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
+        let list = self.inner.raw_value_at_unchecked(idx);
+        MapRef::new_unchecked(list)
+    }
+
+    fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    fn to_protobuf(&self) -> PbArray {
+        let mut array = self.inner.to_protobuf();
+        array.array_type = PbArrayType::Map as i32;
+        array
+    }
+
+    fn null_bitmap(&self) -> &Bitmap {
+        self.inner.null_bitmap()
+    }
+
+    fn into_null_bitmap(self) -> Bitmap {
+        self.inner.into_null_bitmap()
+    }
+
+    fn set_bitmap(&mut self, bitmap: Bitmap) {
+        self.inner.set_bitmap(bitmap)
+    }
+
+    fn data_type(&self) -> DataType {
+        let list_value_type = self.inner.values().data_type();
+        DataType::Map(MapType::from_list_entries(list_value_type))
+    }
+}
+
+impl MapArray {
+    pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
+        let inner = ListArray::from_protobuf(array)?.into_list();
+        Ok(Self { inner }.into())
+    }
+
+    /// Return the inner struct array of the list array.
+    pub fn as_struct(&self) -> &StructArray {
+        self.inner.values().as_struct()
+    }
+
+    /// Returns the offsets of this map.
+    pub fn offsets(&self) -> &[u32] {
+        self.inner.offsets()
+    }
+}
+
+pub use scalar::{MapRef, MapValue};
+
+/// We can enforce the invariants (see [`MapArray`]) in too many places
+/// (both `MapValue`, `MapRef` and `MapArray`).
+///
+/// So we define the types and constructors in a separated `mod`
+/// to prevent direct construction.
+/// We only check the invariants in the constructors.
+/// After they are constructed, we assume the invariants holds.
+mod scalar {
+    use super::*;
+
+    /// Refer to [`MapArray`] for the invariants of a map value.
+    #[derive(Clone, Eq, EstimateSize)]
+    pub struct MapValue(ListValue);
+
+    /// A map is just a slice of the underlying struct array.
+    ///
+    /// Refer to [`MapArray`] for the invariants of a map value.
+    ///
+    /// XXX: perhaps we can make it `MapRef<'a, 'b>(ListRef<'a>, ListRef<'b>);`.
+    /// Then we can build a map ref from 2 list refs without copying the data.
+    /// Currently it's impossible.
+    /// <https://github.com/risingwavelabs/risingwave/issues/17863>
+    #[derive(Copy, Clone, Eq)]
+    pub struct MapRef<'a>(ListRef<'a>);
+
+    impl MapValue {
+        pub fn inner(&self) -> &ListValue {
+            &self.0
+        }
+
+        pub fn into_inner(self) -> ListValue {
+            self.0
+        }
+
+        /// # Panics
+        /// Panics if [map invariants](`super::MapArray`) are violated.
+        pub fn from_list_entries(list: ListValue) -> Self {
+            // validates list type is valid
+            _ = MapType::from_list_entries(list.data_type());
+            // TODO: validate the values is valid
+            MapValue(list)
+        }
+
+        /// # Panics
+        /// Panics if [map invariants](`super::MapArray`) are violated.
+        pub fn try_from_kv(key: ListValue, value: ListValue) -> Result<Self, String> {
+            if key.len() != value.len() {
+                return Err("map keys and values have different length".to_string());
+            }
+            let unique_keys = key.iter().unique().collect_vec();
+            if unique_keys.len() != key.len() {
+                return Err("map keys must be unique".to_string());
+            }
+            if unique_keys.contains(&None) {
+                return Err("map keys must not be NULL".to_string());
+            }
+
+            let len = key.len();
+            let key_type = key.data_type();
+            let value_type = value.data_type();
+            let struct_array = StructArray::new(
+                MapType::struct_type_for_map(key_type, value_type),
+                vec![key.into_array().into_ref(), value.into_array().into_ref()],
+                Bitmap::ones(len),
+            );
+            Ok(MapValue(ListValue::new(struct_array.into())))
+        }
+    }
+
+    impl<'a> MapRef<'a> {
+        /// # Safety
+        /// The caller must ensure the invariants of a map value.
+        pub unsafe fn new_unchecked(list: ListRef<'a>) -> Self {
+            MapRef(list)
+        }
+
+        pub fn inner(&self) -> &ListRef<'a> {
+            &self.0
+        }
+
+        pub fn into_inner(self) -> ListRef<'a> {
+            self.0
+        }
+
+        pub fn into_kv(self) -> (ListRef<'a>, ListRef<'a>) {
+            self.0.as_map_kv()
+        }
+    }
+
+    impl Scalar for MapValue {
+        type ScalarRefType<'a> = MapRef<'a>;
+
+        fn as_scalar_ref(&self) -> MapRef<'_> {
+            // MapValue is assumed to be valid, so we just construct directly without check invariants.
+            MapRef(self.0.as_scalar_ref())
+        }
+    }
+
+    impl<'a> ScalarRef<'a> for MapRef<'a> {
+        type ScalarType = MapValue;
+
+        fn to_owned_scalar(&self) -> MapValue {
+            // MapRef is assumed to be valid, so we just construct directly without check invariants.
+            MapValue(self.0.to_owned_scalar())
+        }
+
+        fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H) {
+            for (k, v) in self.iter_sorted() {
+                super::super::hash_datum(Some(k), state);
+                super::super::hash_datum(v, state);
+            }
+        }
+    }
+}
+
+/// Refer to [`MapArray`] for the semantics of the comparison.
+mod cmp {
+    use super::*;
+    use crate::array::DefaultOrd;
+    impl PartialEq for MapArray {
+        fn eq(&self, other: &Self) -> bool {
+            self.iter().eq(other.iter())
+        }
+    }
+
+    impl PartialEq for MapValue {
+        fn eq(&self, other: &Self) -> bool {
+            self.as_scalar_ref().eq(&other.as_scalar_ref())
+        }
+    }
+
+    impl PartialOrd for MapValue {
+        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+            Some(self.cmp(other))
+        }
+    }
+
+    impl Ord for MapValue {
+        fn cmp(&self, other: &Self) -> Ordering {
+            self.as_scalar_ref().cmp(&other.as_scalar_ref())
+        }
+    }
+
+    impl PartialEq for MapRef<'_> {
+        fn eq(&self, other: &Self) -> bool {
+            self.iter_sorted().eq(other.iter_sorted())
+        }
+    }
+
+    impl Ord for MapRef<'_> {
+        fn cmp(&self, other: &Self) -> Ordering {
+            self.iter_sorted()
+                .cmp_by(other.iter_sorted(), |(k1, v1), (k2, v2)| {
+                    k1.default_cmp(&k2).then_with(|| v1.default_cmp(&v2))
+                })
+        }
+    }
+
+    impl PartialOrd for MapRef<'_> {
+        fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+            Some(self.cmp(other))
+        }
+    }
+}
+
+impl Debug for MapValue {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.as_scalar_ref().fmt(f)
+    }
+}
+
+impl Display for MapValue {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.as_scalar_ref().write(f)
+    }
+}
+
+impl<'a> MapRef<'a> {
+    /// Iterates over the elements of the map.
+    pub fn iter(
+        self,
+    ) -> impl DoubleEndedIterator + ExactSizeIterator<Item = (ScalarRefImpl<'a>, DatumRef<'a>)> + 'a
+    {
+        self.inner().iter().map(|list_elem| {
+            let list_elem = list_elem.expect("the list element in map should not be null");
+            let struct_ = list_elem.into_struct();
+            let (k, v) = struct_
+                .iter_fields_ref()
+                .next_tuple()
+                .expect("the struct in map should have exactly 2 fields");
+            (k.expect("map key should not be null"), v)
+        })
+    }
+
+    pub fn iter_sorted(
+        self,
+    ) -> impl DoubleEndedIterator + ExactSizeIterator<Item = (ScalarRefImpl<'a>, DatumRef<'a>)> + 'a
+    {
+        self.iter().sorted_by_key(|(k, _v)| DefaultOrdered(*k))
+    }
+
+    /// Note: Map should not be used as key. But we don't want to panic.
+    /// See [`MapArray`] for the semantics. See also the `Ord` implementation.
+    /// TODO: ban it in fe <https://github.com/risingwavelabs/risingwave/issues/7981>
+    pub fn memcmp_serialize(
+        self,
+        serializer: &mut memcomparable::Serializer<impl BufMut>,
+    ) -> memcomparable::Result<()> {
+        let mut inner_serializer = memcomparable::Serializer::new(vec![]);
+        for (k, v) in self.iter_sorted() {
+            memcmp_encoding::serialize_datum_in_composite(Some(k), &mut inner_serializer)?;
+            memcmp_encoding::serialize_datum_in_composite(v, &mut inner_serializer)?;
+        }
+        serializer.serialize_bytes(&inner_serializer.into_inner())
+    }
+}
+
+impl MapValue {
+    /// Note: Map should not be used as key. But we don't want to panic.
+    /// See [`MapArray`] for the semantics. See also the `Ord` implementation.
+    /// TODO: ban it in fe <https://github.com/risingwavelabs/risingwave/issues/7981>
+    pub fn memcmp_deserialize(
+        datatype: &MapType,
+        deserializer: &mut memcomparable::Deserializer<impl Buf>,
+    ) -> memcomparable::Result<Self> {
+        let list = ListValue::memcmp_deserialize(&datatype.clone().into_struct(), deserializer)?;
+        Ok(Self::from_list_entries(list))
+    }
+}
+
+impl Debug for MapRef<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list().entries(self.inner().iter()).finish()
+    }
+}
+
+impl ToText for MapRef<'_> {
+    fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
+        // Note: This is arbitrarily decided...
+        write!(
+            f,
+            "{{{}}}",
+            self.iter().format_with(",", |(key, value), f| {
+                let key = key.to_text();
+                let value = value.to_text();
+                // TODO: consider quote like list and struct
+                f(&format_args!("\"{}\":{}", key, value))
+            })
+        )
+    }
+
+    fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
+        match ty {
+            DataType::Map { .. } => self.write(f),
+            _ => unreachable!(),
+        }
+    }
+}
diff --git a/src/common/src/array/mod.rs b/src/common/src/array/mod.rs
index 89b3b06266786..ae6f7d0fa144b 100644
--- a/src/common/src/array/mod.rs
+++ b/src/common/src/array/mod.rs
@@ -26,6 +26,7 @@ pub mod interval_array;
 mod iterator;
 mod jsonb_array;
 pub mod list_array;
+mod map_array;
 mod num256_array;
 mod primitive_array;
 mod proto_reader;
@@ -53,6 +54,7 @@ pub use interval_array::{IntervalArray, IntervalArrayBuilder};
 pub use iterator::ArrayIterator;
 pub use jsonb_array::{JsonbArray, JsonbArrayBuilder};
 pub use list_array::{ListArray, ListArrayBuilder, ListRef, ListValue};
+pub use map_array::{MapArray, MapArrayBuilder, MapRef, MapValue};
 use paste::paste;
 pub use primitive_array::{PrimitiveArray, PrimitiveArrayBuilder, PrimitiveArrayItemType};
 use risingwave_common_estimate_size::EstimateSize;
@@ -104,6 +106,7 @@ pub trait ArrayBuilder: Send + Sync + Sized + 'static {
     type ArrayType: Array<Builder = Self>;
 
     /// Create a new builder with `capacity`.
+    /// TODO: remove this function from the trait. Let it be methods of each concrete builders.
     fn new(capacity: usize) -> Self;
 
     /// # Panics
@@ -135,6 +138,8 @@ pub trait ArrayBuilder: Send + Sync + Sized + 'static {
 
     /// Pop an element from the builder.
     ///
+    /// It's used in `rollback` in source parser.
+    ///
     /// # Returns
     ///
     /// Returns `None` if there is no elements in the builder.
@@ -331,6 +336,10 @@ macro_rules! array_impl_enum {
 
 for_all_array_variants! { array_impl_enum }
 
+// We cannot put the From implementations in impl_convert,
+// because then we can't prove for all `T: PrimitiveArrayItemType`,
+// it's implemented.
+
 impl<T: PrimitiveArrayItemType> From<PrimitiveArray<T>> for ArrayImpl {
     fn from(arr: PrimitiveArray<T>) -> Self {
         T::erase_array_type(arr)
@@ -379,6 +388,12 @@ impl From<BytesArray> for ArrayImpl {
     }
 }
 
+impl From<MapArray> for ArrayImpl {
+    fn from(arr: MapArray) -> Self {
+        Self::Map(arr)
+    }
+}
+
 /// `impl_convert` implements several conversions for `Array` and `ArrayBuilder`.
 /// * `ArrayImpl -> &Array` with `impl.as_int16()`.
 /// * `ArrayImpl -> Array` with `impl.into_int16()`.
@@ -390,6 +405,9 @@ macro_rules! impl_convert {
         $(
             paste! {
                 impl ArrayImpl {
+                    /// # Panics
+                    ///
+                    /// Panics if type mismatches.
                     pub fn [<as_ $suffix_name>](&self) -> &$array {
                         match self {
                             Self::$variant_name(ref array) => array,
@@ -397,6 +415,9 @@ macro_rules! impl_convert {
                         }
                     }
 
+                    /// # Panics
+                    ///
+                    /// Panics if type mismatches.
                     pub fn [<into_ $suffix_name>](self) -> $array {
                         match self {
                             Self::$variant_name(array) => array,
@@ -405,6 +426,7 @@ macro_rules! impl_convert {
                     }
                 }
 
+                // FIXME: panic in From here is not proper.
                 impl <'a> From<&'a ArrayImpl> for &'a $array {
                     fn from(array: &'a ArrayImpl) -> Self {
                         match array {
diff --git a/src/common/src/array/proto_reader.rs b/src/common/src/array/proto_reader.rs
index 3238368ed1041..f5f61ab9ce893 100644
--- a/src/common/src/array/proto_reader.rs
+++ b/src/common/src/array/proto_reader.rs
@@ -43,6 +43,7 @@ impl ArrayImpl {
             PbArrayType::List => ListArray::from_protobuf(array)?,
             PbArrayType::Bytea => read_string_array::<BytesValueReader>(array, cardinality)?,
             PbArrayType::Int256 => Int256Array::from_protobuf(array, cardinality)?,
+            PbArrayType::Map => MapArray::from_protobuf(array)?,
         };
         Ok(array)
     }
diff --git a/src/common/src/array/struct_array.rs b/src/common/src/array/struct_array.rs
index 22aae00c84f4c..9c3bd23653815 100644
--- a/src/common/src/array/struct_array.rs
+++ b/src/common/src/array/struct_array.rs
@@ -498,15 +498,25 @@ impl ToText for StructRef<'_> {
 }
 
 /// Double quote a string if it contains any special characters.
-fn quote_if_need(input: &str, writer: &mut impl Write) -> std::fmt::Result {
+pub fn quote_if_need(input: &str, writer: &mut impl Write) -> std::fmt::Result {
+    // Note: for struct here, 'null' as a string is not quoted, but for list it's quoted:
+    // ```sql
+    // select row('a','a b','null'), array['a','a b','null'];
+    // ----
+    // (a,"a b",null) {a,"a b","null"}
+    // ```
     if !input.is_empty() // non-empty
-        && !input.contains([
-            '"', '\\', '(', ')', ',',
-            // PostgreSQL `array_isspace` includes '\x0B' but rust
-            // [`char::is_ascii_whitespace`] does not.
-            ' ', '\t', '\n', '\r', '\x0B', '\x0C',
-        ])
-    {
+    && !input.contains(
+        [
+    '"', '\\', ',',
+    // whilespace:
+    // PostgreSQL `array_isspace` includes '\x0B' but rust
+    // [`char::is_ascii_whitespace`] does not.
+    ' ', '\t', '\n', '\r', '\x0B', '\x0C',
+    // struct-specific:
+    '(',')'
+]
+    ) {
         return writer.write_str(input);
     }
 
diff --git a/src/common/src/config.rs b/src/common/src/config.rs
index 083a8b77b4f33..eccb4df538bfb 100644
--- a/src/common/src/config.rs
+++ b/src/common/src/config.rs
@@ -1842,11 +1842,11 @@ pub mod default {
         }
 
         pub fn memory_controller_threshold_graceful() -> f64 {
-            0.8
+            0.81
         }
 
         pub fn memory_controller_threshold_stable() -> f64 {
-            0.7
+            0.72
         }
 
         pub fn memory_controller_eviction_factor_aggressive() -> f64 {
diff --git a/src/common/src/hash/key.rs b/src/common/src/hash/key.rs
index 61a41e9a7365d..96928e69f4a83 100644
--- a/src/common/src/hash/key.rs
+++ b/src/common/src/hash/key.rs
@@ -33,7 +33,7 @@ use risingwave_common_estimate_size::EstimateSize;
 use smallbitset::Set64;
 use static_assertions::const_assert_eq;
 
-use crate::array::{ListValue, StructValue};
+use crate::array::{ListValue, MapValue, StructValue};
 use crate::types::{
     DataType, Date, Decimal, Int256, Int256Ref, JsonbVal, Scalar, ScalarRef, ScalarRefImpl, Serial,
     Time, Timestamp, Timestamptz, F32, F64,
@@ -627,6 +627,7 @@ impl_value_encoding_hash_key_serde!(JsonbVal);
 // use the memcmp encoding for safety.
 impl_memcmp_encoding_hash_key_serde!(StructValue);
 impl_memcmp_encoding_hash_key_serde!(ListValue);
+impl_memcmp_encoding_hash_key_serde!(MapValue);
 
 #[cfg(test)]
 mod tests {
diff --git a/src/common/src/test_utils/rand_array.rs b/src/common/src/test_utils/rand_array.rs
index 0e2e1d92e28c3..a7c13e3178f26 100644
--- a/src/common/src/test_utils/rand_array.rs
+++ b/src/common/src/test_utils/rand_array.rs
@@ -24,10 +24,10 @@ use rand::prelude::Distribution;
 use rand::rngs::SmallRng;
 use rand::{Rng, SeedableRng};
 
-use crate::array::{Array, ArrayBuilder, ArrayRef, ListValue, StructValue};
+use crate::array::{Array, ArrayBuilder, ArrayRef, ListValue, MapValue, StructValue};
 use crate::types::{
-    Date, Decimal, Int256, Interval, JsonbVal, NativeType, Scalar, Serial, Time, Timestamp,
-    Timestamptz,
+    DataType, Date, Decimal, Int256, Interval, JsonbVal, MapType, NativeType, Scalar, Serial, Time,
+    Timestamp, Timestamptz,
 };
 
 pub trait RandValue {
@@ -151,6 +151,15 @@ impl RandValue for ListValue {
     }
 }
 
+impl RandValue for MapValue {
+    fn rand_value<R: Rng>(_rand: &mut R) -> Self {
+        // dummy value
+        MapValue::from_list_entries(ListValue::empty(&DataType::Struct(
+            MapType::struct_type_for_map(DataType::Varchar, DataType::Varchar),
+        )))
+    }
+}
+
 pub fn rand_array<A, R>(rand: &mut R, size: usize, null_ratio: f64) -> A
 where
     A: Array,
diff --git a/src/common/src/test_utils/rand_chunk.rs b/src/common/src/test_utils/rand_chunk.rs
index 3e537fd9b6a49..9c604b6205cc3 100644
--- a/src/common/src/test_utils/rand_chunk.rs
+++ b/src/common/src/test_utils/rand_chunk.rs
@@ -43,10 +43,11 @@ pub fn gen_chunk(data_types: &[DataType], size: usize, seed: u64, null_ratio: f6
             }
             DataType::Interval => seed_rand_array_ref::<IntervalArray>(size, seed, null_ratio),
             DataType::Int256 => seed_rand_array_ref::<Int256Array>(size, seed, null_ratio),
-            DataType::Struct(_) | DataType::Bytea | DataType::Jsonb => {
-                todo!()
-            }
-            DataType::List(_) => {
+            DataType::Struct(_)
+            | DataType::Bytea
+            | DataType::Jsonb
+            | DataType::List(_)
+            | DataType::Map(_) => {
                 todo!()
             }
         });
diff --git a/src/common/src/types/macros.rs b/src/common/src/types/macros.rs
index 520e4ab8f45ee..1dd29156dd651 100644
--- a/src/common/src/types/macros.rs
+++ b/src/common/src/types/macros.rs
@@ -58,6 +58,7 @@ macro_rules! for_all_variants {
             { Serial,       Serial,       serial,       $crate::types::Serial,      $crate::types::Serial,              $crate::array::SerialArray,         $crate::array::SerialArrayBuilder       },
             { Struct,       Struct,       struct,       $crate::types::StructValue, $crate::types::StructRef<'scalar>,  $crate::array::StructArray,         $crate::array::StructArrayBuilder       },
             { List,         List,         list,         $crate::types::ListValue,   $crate::types::ListRef<'scalar>,    $crate::array::ListArray,           $crate::array::ListArrayBuilder         },
+            { Map,          Map,          map,          $crate::types::MapValue,    $crate::types::MapRef<'scalar>,     $crate::array::MapArray,            $crate::array::MapArrayBuilder         },
             { Bytea,        Bytea,        bytea,        Box<[u8]>,                  &'scalar [u8],                      $crate::array::BytesArray,          $crate::array::BytesArrayBuilder        }
         }
     };
diff --git a/src/common/src/types/map_type.rs b/src/common/src/types/map_type.rs
new file mode 100644
index 0000000000000..4d9ec3dc5f143
--- /dev/null
+++ b/src/common/src/types/map_type.rs
@@ -0,0 +1,142 @@
+// Copyright 2024 RisingWave Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::fmt::Formatter;
+
+use anyhow::Context;
+
+use super::*;
+
+/// Refer to [`super::super::array::MapArray`] for the invariants of a map value.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct MapType(Box<(DataType, DataType)>);
+
+impl From<MapType> for DataType {
+    fn from(value: MapType) -> Self {
+        DataType::Map(value)
+    }
+}
+
+impl MapType {
+    /// # Panics
+    /// Panics if the key type is not valid for a map.
+    pub fn from_kv(key: DataType, value: DataType) -> Self {
+        Self::check_key_type_valid(&key).unwrap();
+        Self(Box::new((key, value)))
+    }
+
+    pub fn try_from_kv(key: DataType, value: DataType) -> Result<Self, anyhow::Error> {
+        Self::check_key_type_valid(&key)?;
+        Ok(Self(Box::new((key, value))))
+    }
+
+    /// # Panics
+    /// Panics if the key type is not valid for a map, or the
+    /// entries type is not a valid struct type.
+    pub fn from_list_entries(list_entries_type: DataType) -> Self {
+        let struct_type = list_entries_type.as_struct();
+        let (k, v) = struct_type
+            .iter()
+            .collect_tuple()
+            .expect("the underlying struct for map must have exactly two fields");
+        // the field names are not strictly enforced
+        // Currently this panics for SELECT * FROM t
+        // if cfg!(debug_assertions) {
+        //     itertools::assert_equal(struct_type.names(), ["key", "value"]);
+        // }
+        Self::from_kv(k.1.clone(), v.1.clone())
+    }
+
+    /// # Panics
+    /// Panics if the key type is not valid for a map.
+    pub fn struct_type_for_map(key_type: DataType, value_type: DataType) -> StructType {
+        MapType::check_key_type_valid(&key_type).unwrap();
+        StructType::new(vec![("key", key_type), ("value", value_type)])
+    }
+
+    pub fn key(&self) -> &DataType {
+        &self.0 .0
+    }
+
+    pub fn value(&self) -> &DataType {
+        &self.0 .1
+    }
+
+    pub fn into_struct(self) -> DataType {
+        let (key, value) = *self.0;
+        DataType::Struct(Self::struct_type_for_map(key, value))
+    }
+
+    pub fn into_list(self) -> DataType {
+        DataType::List(Box::new(self.into_struct()))
+    }
+
+    /// String and integral types are allowed.
+    ///
+    /// This is similar to [Protobuf](https://protobuf.dev/programming-guides/proto3/#maps)'s
+    /// decision.
+    ///
+    /// Note that this isn't definitive.
+    /// Just be conservative at the beginning, but not too restrictive (like only allowing strings).
+    pub fn check_key_type_valid(data_type: &DataType) -> anyhow::Result<()> {
+        let ok = match data_type {
+            DataType::Int16 | DataType::Int32 | DataType::Int64 => true,
+            DataType::Varchar => true,
+            DataType::Boolean
+            | DataType::Float32
+            | DataType::Float64
+            | DataType::Decimal
+            | DataType::Date
+            | DataType::Time
+            | DataType::Timestamp
+            | DataType::Timestamptz
+            | DataType::Interval
+            | DataType::Struct(_)
+            | DataType::List(_)
+            | DataType::Bytea
+            | DataType::Jsonb
+            | DataType::Serial
+            | DataType::Int256
+            | DataType::Map(_) => false,
+        };
+        if !ok {
+            Err(anyhow::anyhow!("invalid map key type: {data_type}"))
+        } else {
+            Ok(())
+        }
+    }
+}
+
+impl FromStr for MapType {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        if !(s.starts_with("map(") && s.ends_with(')')) {
+            return Err(anyhow::anyhow!("expect map(...,...)"));
+        };
+        if let Some((key, value)) = s[4..s.len() - 1].split(',').collect_tuple() {
+            let key = key.parse().context("failed to parse map key type")?;
+            let value = value.parse().context("failed to parse map value type")?;
+            MapType::try_from_kv(key, value)
+        } else {
+            Err(anyhow::anyhow!("expect map(...,...)"))
+        }
+    }
+}
+
+impl std::fmt::Display for MapType {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "map({},{})", self.key(), self.value())
+    }
+}
diff --git a/src/common/src/types/mod.rs b/src/common/src/types/mod.rs
index e53a4597301b0..e76bfaba384f8 100644
--- a/src/common/src/types/mod.rs
+++ b/src/common/src/types/mod.rs
@@ -37,7 +37,8 @@ use thiserror_ext::AsReport;
 use crate::array::{
     ArrayBuilderImpl, ArrayError, ArrayResult, PrimitiveArrayItemType, NULL_VAL_FOR_HASH,
 };
-pub use crate::array::{ListRef, ListValue, StructRef, StructValue};
+// Complex type's value is based on the array
+pub use crate::array::{ListRef, ListValue, MapRef, MapValue, StructRef, StructValue};
 use crate::cast::{str_to_bool, str_to_bytea};
 use crate::error::BoxedError;
 use crate::{
@@ -53,6 +54,7 @@ mod from_sql;
 mod interval;
 mod jsonb;
 mod macros;
+mod map_type;
 mod native_type;
 mod num256;
 mod ops;
@@ -78,6 +80,7 @@ pub use self::datetime::{Date, Time, Timestamp};
 pub use self::decimal::{Decimal, PowError as DecimalPowError};
 pub use self::interval::{test_utils, DateTimeField, Interval, IntervalDisplay};
 pub use self::jsonb::{JsonbRef, JsonbVal};
+pub use self::map_type::MapType;
 pub use self::native_type::*;
 pub use self::num256::{Int256, Int256Ref};
 pub use self::ops::{CheckedAdd, IsNegative};
@@ -99,8 +102,13 @@ pub type F32 = ordered_float::OrderedFloat<f32>;
 pub type F64 = ordered_float::OrderedFloat<f64>;
 
 /// The set of datatypes that are supported in RisingWave.
-// `EnumDiscriminants` will generate a `DataTypeName` enum with the same variants,
-// but without data fields.
+///
+/// # Trait implementations
+///
+/// - `EnumDiscriminants` generates [`DataTypeName`] enum with the same variants,
+///   but without data fields.
+/// - `FromStr` is only used internally for tests.
+///   The generated implementation isn't efficient, and doesn't handle whitespaces, etc.
 #[derive(
     Debug, Display, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, EnumDiscriminants, FromStr,
 )]
@@ -166,8 +174,12 @@ pub enum DataType {
     #[display("rw_int256")]
     #[from_str(regex = "(?i)^rw_int256$")]
     Int256,
+    #[display("{0}")]
+    #[from_str(regex = "(?i)^(?P<0>.+)$")]
+    Map(MapType),
 }
 
+// For DataType::List
 impl std::str::FromStr for Box<DataType> {
     type Err = BoxedError;
 
@@ -200,8 +212,8 @@ impl TryFrom<DataTypeName> for DataType {
             DataTypeName::Time => Ok(DataType::Time),
             DataTypeName::Interval => Ok(DataType::Interval),
             DataTypeName::Jsonb => Ok(DataType::Jsonb),
-            DataTypeName::Struct | DataTypeName::List => {
-                Err("Functions returning struct or list can not be inferred. Please use `FunctionCall::new_unchecked`.")
+            DataTypeName::Struct | DataTypeName::List | DataTypeName::Map => {
+                Err("Functions returning composite types can not be inferred. Please use `FunctionCall::new_unchecked`.")
             }
         }
     }
@@ -236,6 +248,12 @@ impl From<&PbDataType> for DataType {
                 // The first (and only) item is the list element type.
                 Box::new((&proto.field_type[0]).into()),
             ),
+            PbTypeName::Map => {
+                // Map is physically the same as a list.
+                // So the first (and only) item is the list element type.
+                let list_entries_type: DataType = (&proto.field_type[0]).into();
+                DataType::Map(MapType::from_list_entries(list_entries_type))
+            }
             PbTypeName::Int256 => DataType::Int256,
         }
     }
@@ -263,6 +281,7 @@ impl From<DataTypeName> for PbTypeName {
             DataTypeName::Struct => PbTypeName::Struct,
             DataTypeName::List => PbTypeName::List,
             DataTypeName::Int256 => PbTypeName::Int256,
+            DataTypeName::Map => PbTypeName::Map,
         }
     }
 }
@@ -324,6 +343,10 @@ impl DataType {
             DataType::List(datatype) => {
                 pb.field_type = vec![datatype.to_protobuf()];
             }
+            DataType::Map(datatype) => {
+                // Same as List<Struct<K,V>>
+                pb.field_type = vec![datatype.clone().into_struct().to_protobuf()];
+            }
             DataType::Boolean
             | DataType::Int16
             | DataType::Int32
@@ -366,6 +389,10 @@ impl DataType {
         matches!(self, DataType::Struct(_))
     }
 
+    pub fn is_map(&self) -> bool {
+        matches!(self, DataType::Map(_))
+    }
+
     pub fn is_int(&self) -> bool {
         matches!(self, DataType::Int16 | DataType::Int32 | DataType::Int64)
     }
@@ -383,14 +410,32 @@ impl DataType {
         Self::Struct(StructType::from_parts(field_names, fields))
     }
 
+    pub fn new_unnamed_struct(fields: Vec<DataType>) -> Self {
+        Self::Struct(StructType::unnamed(fields))
+    }
+
     pub fn as_struct(&self) -> &StructType {
         match self {
             DataType::Struct(t) => t,
-            _ => panic!("expect struct type"),
+            t => panic!("expect struct type, got {t}"),
         }
     }
 
-    /// Returns the inner type of a list type.
+    pub fn as_map(&self) -> &MapType {
+        match self {
+            DataType::Map(t) => t,
+            t => panic!("expect map type, got {t}"),
+        }
+    }
+
+    pub fn into_map(self) -> MapType {
+        match self {
+            DataType::Map(t) => t,
+            t => panic!("expect map type, got {t}"),
+        }
+    }
+
+    /// Returns the inner element's type of a list type.
     ///
     /// # Panics
     ///
@@ -398,11 +443,13 @@ impl DataType {
     pub fn as_list(&self) -> &DataType {
         match self {
             DataType::List(t) => t,
-            _ => panic!("expect list type"),
+            t => panic!("expect list type, got {t}"),
         }
     }
 
-    /// Return a new type that removes the outer list.
+    /// Return a new type that removes the outer list, and get the innermost element type.
+    ///
+    /// Use [`DataType::as_list`] if you only want the element type of a list.
     ///
     /// ```
     /// use risingwave_common::types::DataType::*;
@@ -447,6 +494,10 @@ impl From<DataType> for PbDataType {
 mod private {
     use super::*;
 
+    // Note: put pub trait inside a private mod just makes the name private,
+    // The trait methods will still be publicly available...
+    // a.k.a. ["Voldemort type"](https://rust-lang.github.io/rfcs/2145-type-privacy.html#lint-3-voldemort-types-its-reachable-but-i-cant-name-it)
+
     /// Common trait bounds of scalar and scalar reference types.
     ///
     /// NOTE(rc): `Hash` is not in the trait bound list, it's implemented as [`ScalarRef::hash_scalar`].
@@ -610,7 +661,7 @@ macro_rules! impl_self_as_scalar_ref {
         )*
     };
 }
-impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_> }
+impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_>, MapRef<'_> }
 
 /// `for_all_native_types` includes all native variants of our scalar types.
 ///
@@ -831,7 +882,7 @@ impl ScalarImpl {
                     .ok_or_else(|| "invalid value of Jsonb".to_string())?,
             ),
             DataType::Int256 => Self::Int256(Int256::from_binary(bytes)?),
-            DataType::Struct(_) | DataType::List(_) => {
+            DataType::Struct(_) | DataType::List(_) | DataType::Map(_) => {
                 return Err(format!("unsupported data type: {}", data_type).into());
             }
         };
@@ -864,6 +915,9 @@ impl ScalarImpl {
             DataType::Struct(_) => StructValue::from_str(s, data_type)?.into(),
             DataType::Jsonb => JsonbVal::from_str(s)?.into(),
             DataType::Bytea => str_to_bytea(s)?.into(),
+            DataType::Map(_) => {
+                todo!()
+            }
         })
     }
 }
@@ -930,7 +984,7 @@ impl ScalarRefImpl<'_> {
         self.to_text_with_type(data_type)
     }
 
-    /// Serialize the scalar.
+    /// Serialize the scalar into the `memcomparable` format.
     pub fn serialize(
         &self,
         ser: &mut memcomparable::Serializer<impl BufMut>,
@@ -961,6 +1015,7 @@ impl ScalarRefImpl<'_> {
             Self::Jsonb(v) => v.memcmp_serialize(ser)?,
             Self::Struct(v) => v.memcmp_serialize(ser)?,
             Self::List(v) => v.memcmp_serialize(ser)?,
+            Self::Map(v) => v.memcmp_serialize(ser)?,
         };
         Ok(())
     }
@@ -1015,6 +1070,7 @@ impl ScalarImpl {
             Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?),
             Ty::Struct(t) => StructValue::memcmp_deserialize(t.types(), de)?.to_scalar_value(),
             Ty::List(t) => ListValue::memcmp_deserialize(t, de)?.to_scalar_value(),
+            Ty::Map(t) => MapValue::memcmp_deserialize(t, de)?.to_scalar_value(),
         })
     }
 
@@ -1194,6 +1250,10 @@ mod tests {
                     ScalarImpl::List(ListValue::from_iter([233i64, 2333])),
                     DataType::List(Box::new(DataType::Int64)),
                 ),
+                DataTypeName::Map => {
+                    // map is not hashable
+                    continue;
+                }
             };
 
             test(Some(scalar), data_type.clone());
diff --git a/src/common/src/types/postgres_type.rs b/src/common/src/types/postgres_type.rs
index ae147e9c9660e..d85f08ed59cc3 100644
--- a/src/common/src/types/postgres_type.rs
+++ b/src/common/src/types/postgres_type.rs
@@ -54,6 +54,12 @@ pub struct UnsupportedOid(i32);
 
 /// Get type information compatible with Postgres type, such as oid, type length.
 impl DataType {
+    /// For a fixed-size type, typlen is the number of bytes in the internal representation of the type.
+    /// But for a variable-length type, typlen is negative.
+    /// -1 indicates a “varlena” type (one that has a length word),
+    /// -2 indicates a null-terminated C string.
+    ///
+    /// <https://www.postgresql.org/docs/15/catalog-pg-type.html#:~:text=of%20the%20type-,typlen,-int2>
     pub fn type_len(&self) -> i16 {
         macro_rules! impl_type_len {
             ($( { $enum:ident | $oid:literal | $oid_array:literal | $name:ident | $input:ident | $len:literal } )*) => {
@@ -63,7 +69,7 @@ impl DataType {
                     )*
                     DataType::Serial => 8,
                     DataType::Int256 => -1,
-                    DataType::List(_) | DataType::Struct(_) => -1,
+                    DataType::List(_) | DataType::Struct(_) | DataType::Map(_) => -1,
                 }
             }
         }
@@ -96,6 +102,7 @@ impl DataType {
         for_all_base_types! { impl_from_oid }
     }
 
+    /// Refer to [`Self::from_oid`]
     pub fn to_oid(&self) -> i32 {
         macro_rules! impl_to_oid {
             ($( { $enum:ident | $oid:literal | $oid_array:literal | $name:ident | $input:ident | $len:literal } )*) => {
@@ -111,10 +118,14 @@ impl DataType {
                         DataType::Serial => 1016,
                         DataType::Struct(_) => -1,
                         DataType::List { .. } => unreachable!("Never reach here!"),
+                        DataType::Map(_) => 1304,
                     }
                     DataType::Serial => 20,
+                    // XXX: what does the oid mean here? Why we don't have from_oid for them?
                     DataType::Int256 => 1301,
+                    DataType::Map(_) => 1303,
                     // TODO: Support to give a new oid for custom struct type. #9434
+                    // 1043 is varchar
                     DataType::Struct(_) => 1043,
                 }
             }
@@ -133,6 +144,7 @@ impl DataType {
                     DataType::List(_) => "list",
                     DataType::Serial => "serial",
                     DataType::Int256 => "rw_int256",
+                    DataType::Map(_) => "map",
                 }
             }
         }
diff --git a/src/common/src/types/struct_type.rs b/src/common/src/types/struct_type.rs
index a18f452af7a74..edc4b73311533 100644
--- a/src/common/src/types/struct_type.rs
+++ b/src/common/src/types/struct_type.rs
@@ -37,11 +37,11 @@ impl Debug for StructType {
 
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 struct StructTypeInner {
-    // Details about a struct type. There are 2 cases for a struct:
-    // 1. `field_names.len() == field_types.len()`: it represents a struct with named fields,
-    //     e.g. `STRUCT<i INT, j VARCHAR>`.
-    // 2. `field_names.len() == 0`: it represents a struct with unnamed fields,
-    //     e.g. `ROW(1, 2)`.
+    /// Details about a struct type. There are 2 cases for a struct:
+    /// 1. `field_names.len() == field_types.len()`: it represents a struct with named fields,
+    ///     e.g. `STRUCT<i INT, j VARCHAR>`.
+    /// 2. `field_names.len() == 0`: it represents a struct with unnamed fields,
+    ///     e.g. `ROW(1, 2)`.
     field_names: Box<[String]>,
     field_types: Box<[DataType]>,
 }
@@ -71,6 +71,8 @@ impl StructType {
     }
 
     pub(super) fn from_parts(field_names: Vec<String>, field_types: Vec<DataType>) -> Self {
+        // TODO: enable this assertion
+        // debug_assert!(field_names.len() == field_types.len());
         Self(Arc::new(StructTypeInner {
             field_types: field_types.into(),
             field_names: field_names.into(),
diff --git a/src/common/src/types/to_binary.rs b/src/common/src/types/to_binary.rs
index 56eea301f3f61..da7f75f0a2a3f 100644
--- a/src/common/src/types/to_binary.rs
+++ b/src/common/src/types/to_binary.rs
@@ -102,6 +102,7 @@ impl ToBinary for ScalarRefImpl<'_> {
                 issue = 7949,
                 "the pgwire extended-mode encoding for {ty} is unsupported"
             ),
+            ScalarRefImpl::Map(_) => todo!(),
         }
     }
 }
diff --git a/src/common/src/types/to_sql.rs b/src/common/src/types/to_sql.rs
index 3ece8a574c450..57aab11daf4d7 100644
--- a/src/common/src/types/to_sql.rs
+++ b/src/common/src/types/to_sql.rs
@@ -46,6 +46,7 @@ impl ToSql for ScalarImpl {
             ScalarImpl::Int256(_) | ScalarImpl::Struct(_) | ScalarImpl::List(_) => {
                 bail_not_implemented!("the postgres encoding for {ty} is unsupported")
             }
+            ScalarImpl::Map(_) => todo!(),
         }
     }
 
diff --git a/src/common/src/util/memcmp_encoding.rs b/src/common/src/util/memcmp_encoding.rs
index 5a5ad598093af..c9de13531b7fe 100644
--- a/src/common/src/util/memcmp_encoding.rs
+++ b/src/common/src/util/memcmp_encoding.rs
@@ -151,9 +151,9 @@ fn calculate_encoded_size_inner(
                 deserializer.deserialize_decimal()?;
                 0 // the len is not used since decimal is not a fixed length type
             }
-            // these two types is var-length and should only be determine at runtime.
+            // these types are var-length and should only be determine at runtime.
             // TODO: need some test for this case (e.g. e2e test)
-            DataType::List { .. } => deserializer.skip_bytes()?,
+            DataType::List { .. } | DataType::Map(_) => deserializer.skip_bytes()?,
             DataType::Struct(t) => t
                 .types()
                 .map(|field| {
diff --git a/src/common/src/util/value_encoding/mod.rs b/src/common/src/util/value_encoding/mod.rs
index 322c542557ed7..3b4167331cb7e 100644
--- a/src/common/src/util/value_encoding/mod.rs
+++ b/src/common/src/util/value_encoding/mod.rs
@@ -13,7 +13,8 @@
 // limitations under the License.
 
 //! Value encoding is an encoding format which converts the data into a binary form (not
-//! memcomparable).
+//! memcomparable, i.e., Key encoding).
+
 use bytes::{Buf, BufMut};
 use chrono::{Datelike, Timelike};
 use either::{for_both, Either};
@@ -226,6 +227,7 @@ fn serialize_scalar(value: ScalarRefImpl<'_>, buf: &mut impl BufMut) {
         ScalarRefImpl::Jsonb(v) => serialize_str(&v.value_serialize(), buf),
         ScalarRefImpl::Struct(s) => serialize_struct(s, buf),
         ScalarRefImpl::List(v) => serialize_list(v, buf),
+        ScalarRefImpl::Map(m) => serialize_list(m.into_inner(), buf),
     }
 }
 
@@ -251,6 +253,7 @@ fn estimate_serialize_scalar_size(value: ScalarRefImpl<'_>) -> usize {
         ScalarRefImpl::Jsonb(v) => v.capacity(),
         ScalarRefImpl::Struct(s) => estimate_serialize_struct_size(s),
         ScalarRefImpl::List(v) => estimate_serialize_list_size(v),
+        ScalarRefImpl::Map(v) => estimate_serialize_list_size(v.into_inner()),
     }
 }
 
@@ -354,6 +357,11 @@ fn deserialize_value(ty: &DataType, data: &mut impl Buf) -> Result<ScalarImpl> {
         DataType::Struct(struct_def) => deserialize_struct(struct_def, data)?,
         DataType::Bytea => ScalarImpl::Bytea(deserialize_bytea(data).into()),
         DataType::List(item_type) => deserialize_list(item_type, data)?,
+        DataType::Map(map_type) => {
+            // FIXME: clone type everytime here is inefficient
+            let list = deserialize_list(&map_type.clone().into_struct(), data)?.into_list();
+            ScalarImpl::Map(MapValue::from_list_entries(list))
+        }
     })
 }
 
diff --git a/src/common/src/vnode_mapping/vnode_placement.rs b/src/common/src/vnode_mapping/vnode_placement.rs
index aea08a1b74352..328287708a8a2 100644
--- a/src/common/src/vnode_mapping/vnode_placement.rs
+++ b/src/common/src/vnode_mapping/vnode_placement.rs
@@ -231,7 +231,7 @@ mod tests {
         let worker_1 = WorkerNode {
             id: 1,
             parallelism: 1,
-            property: Some(serving_property.clone()),
+            property: Some(serving_property),
             ..Default::default()
         };
 
@@ -246,7 +246,7 @@ mod tests {
         let worker_2 = WorkerNode {
             id: 2,
             parallelism: 50,
-            property: Some(serving_property.clone()),
+            property: Some(serving_property),
             ..Default::default()
         };
 
diff --git a/src/compute/Cargo.toml b/src/compute/Cargo.toml
index a3f74792982f2..ed1758029092b 100644
--- a/src/compute/Cargo.toml
+++ b/src/compute/Cargo.toml
@@ -23,7 +23,8 @@ either = "1"
 foyer = { workspace = true }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 futures-async-stream = { workspace = true }
-hyper = "0.14" # required by tonic
+http = "1"
+hyper = "1"
 itertools = { workspace = true }
 maplit = "1.0.2"
 pprof = { version = "0.13", features = ["flamegraph"] }
@@ -54,7 +55,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "signal",
     "fs",
 ] }
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tonic = { workspace = true }
 tower = { version = "0.4", features = ["util", "load-shed"] }
 tracing = "0.1"
diff --git a/src/compute/src/rpc/service/monitor_service.rs b/src/compute/src/rpc/service/monitor_service.rs
index a9a41d753ac96..0acc30e0c2430 100644
--- a/src/compute/src/rpc/service/monitor_service.rs
+++ b/src/compute/src/rpc/service/monitor_service.rs
@@ -389,8 +389,7 @@ pub mod grpc_middleware {
 
     use either::Either;
     use futures::Future;
-    use hyper::Body;
-    use tonic::transport::NamedService;
+    use tonic::body::BoxBody;
     use tower::{Layer, Service};
 
     /// Manages the await-trees of `gRPC` requests that are currently served by the compute node.
@@ -438,10 +437,9 @@ pub mod grpc_middleware {
         next_id: Arc<AtomicU64>,
     }
 
-    impl<S> Service<hyper::Request<Body>> for AwaitTreeMiddleware<S>
+    impl<S> Service<http::Request<BoxBody>> for AwaitTreeMiddleware<S>
     where
-        S: Service<hyper::Request<Body>> + Clone + Send + 'static,
-        S::Future: Send + 'static,
+        S: Service<http::Request<BoxBody>> + Clone,
     {
         type Error = S::Error;
         type Response = S::Response;
@@ -452,7 +450,7 @@ pub mod grpc_middleware {
             self.inner.poll_ready(cx)
         }
 
-        fn call(&mut self, req: hyper::Request<Body>) -> Self::Future {
+        fn call(&mut self, req: http::Request<BoxBody>) -> Self::Future {
             let Some(registry) = self.registry.clone() else {
                 return Either::Left(self.inner.call(req));
             };
@@ -479,7 +477,8 @@ pub mod grpc_middleware {
         }
     }
 
-    impl<S: NamedService> NamedService for AwaitTreeMiddleware<S> {
+    #[cfg(not(madsim))]
+    impl<S: tonic::server::NamedService> tonic::server::NamedService for AwaitTreeMiddleware<S> {
         const NAME: &'static str = S::NAME;
     }
 }
diff --git a/src/config/example.toml b/src/config/example.toml
index 866a56017982e..b4da5515e9ff5 100644
--- a/src/config/example.toml
+++ b/src/config/example.toml
@@ -113,8 +113,8 @@ stream_exchange_concurrent_dispatchers = 0
 stream_dml_channel_initial_permits = 32768
 stream_hash_agg_max_dirty_groups_heap_size = 67108864
 stream_memory_controller_threshold_aggressive = 0.9
-stream_memory_controller_threshold_graceful = 0.8
-stream_memory_controller_threshold_stable = 0.7
+stream_memory_controller_threshold_graceful = 0.81
+stream_memory_controller_threshold_stable = 0.72
 stream_memory_controller_eviction_factor_aggressive = 2.0
 stream_memory_controller_eviction_factor_graceful = 1.5
 stream_memory_controller_eviction_factor_stable = 1.0
diff --git a/src/connector/Cargo.toml b/src/connector/Cargo.toml
index e504457a75a2c..38e82ccdf76ea 100644
--- a/src/connector/Cargo.toml
+++ b/src/connector/Cargo.toml
@@ -61,10 +61,10 @@ futures = { version = "0.3", default-features = false, features = ["alloc"] }
 futures-async-stream = { workspace = true }
 gcp-bigquery-client = "0.18.0"
 glob = "0.3"
-google-cloud-bigquery = { version = "0.9.0", features = ["auth"] }
-google-cloud-gax = "0.17.0"
-google-cloud-googleapis = { version = "0.13", features = ["pubsub", "bigquery"] }
-google-cloud-pubsub = "0.25"
+google-cloud-bigquery = { version = "0.12.0", features = ["auth"] }
+google-cloud-gax = "0.19.0"
+google-cloud-googleapis = { version = "0.15", features = ["pubsub", "bigquery"] }
+google-cloud-pubsub = "0.28"
 http = "0.2"
 iceberg = { workspace = true }
 iceberg-catalog-rest = { workspace = true }
@@ -100,8 +100,8 @@ pg_bigdecimal = { git = "https://github.com/risingwavelabs/rust-pg_bigdecimal",
 postgres-openssl = "0.5.0"
 prometheus = { version = "0.13", features = ["process"] }
 prost = { workspace = true, features = ["no-recursion-limit"] }
-prost-reflect = "0.13"
-prost-types = "0.12"
+prost-reflect = "0.14"
+prost-types = "0.13"
 protobuf-native = "0.2.2"
 pulsar = { version = "6.3", default-features = false, features = [
     "tokio-runtime",
@@ -154,7 +154,7 @@ thiserror-ext = { workspace = true }
 tiberius = { git = "https://github.com/risingwavelabs/tiberius.git", rev = "f834f2deeb9e2fb08afaf73865f330cf31a3876a", default-features = false, features = [
     "chrono",
     "sql-browser-tokio",
-    "vendored-openssl",
+    "rustls",
     "rust_decimal",
     "bigdecimal",
     "tds73",
@@ -172,7 +172,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
 ] }
 tokio-postgres = { version = "0.7", features = ["with-uuid-1"] }
 tokio-retry = "0.3"
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tokio-util = { workspace = true, features = ["codec", "io"] }
 tonic = { workspace = true }
 tracing = "0.1"
diff --git a/src/connector/codec/src/decoder/avro/mod.rs b/src/connector/codec/src/decoder/avro/mod.rs
index 93d16a32508db..738535ec9410c 100644
--- a/src/connector/codec/src/decoder/avro/mod.rs
+++ b/src/connector/codec/src/decoder/avro/mod.rs
@@ -25,8 +25,8 @@ use risingwave_common::array::{ListValue, StructValue};
 use risingwave_common::bail;
 use risingwave_common::log::LogSuppresser;
 use risingwave_common::types::{
-    DataType, Date, DatumCow, Interval, JsonbVal, ScalarImpl, Time, Timestamp, Timestamptz,
-    ToOwnedDatum,
+    DataType, Date, DatumCow, Interval, JsonbVal, MapValue, ScalarImpl, Time, Timestamp,
+    Timestamptz, ToOwnedDatum,
 };
 use risingwave_common::util::iter_util::ZipEqFast;
 
@@ -318,6 +318,34 @@ impl<'a> AvroParseOptions<'a> {
             (DataType::Varchar, Value::Uuid(uuid)) => {
                 uuid.as_hyphenated().to_string().into_boxed_str().into()
             }
+            (DataType::Map(map_type), Value::Map(map)) => {
+                let schema = self.extract_inner_schema(None);
+                let mut builder = map_type
+                    .clone()
+                    .into_struct()
+                    .create_array_builder(map.len());
+                // Since the map is HashMap, we can ensure
+                // key is non-null and unique, keys and values have the same length.
+
+                // NOTE: HashMap's iter order is non-deterministic, but MapValue's
+                // order matters. We sort by key here to have deterministic order
+                // in tests. We might consider removing this, or make all MapValue sorted
+                // in the future.
+                for (k, v) in map.iter().sorted_by_key(|(k, _v)| *k) {
+                    let value_datum = Self {
+                        schema,
+                        relax_numeric: self.relax_numeric,
+                    }
+                    .convert_to_datum(v, map_type.value())?
+                    .to_owned_datum();
+                    builder.append(
+                        StructValue::new(vec![Some(k.as_str().into()), value_datum])
+                            .to_owned_datum(),
+                    );
+                }
+                let list = ListValue::new(builder.finish());
+                MapValue::from_list_entries(list).into()
+            }
 
             (_expected, _got) => Err(create_error())?,
         };
diff --git a/src/connector/codec/src/decoder/avro/schema.rs b/src/connector/codec/src/decoder/avro/schema.rs
index 324b7fd426a56..7e86a1cc11dd1 100644
--- a/src/connector/codec/src/decoder/avro/schema.rs
+++ b/src/connector/codec/src/decoder/avro/schema.rs
@@ -20,7 +20,7 @@ use apache_avro::AvroResult;
 use itertools::Itertools;
 use risingwave_common::error::NotImplemented;
 use risingwave_common::log::LogSuppresser;
-use risingwave_common::types::{DataType, Decimal};
+use risingwave_common::types::{DataType, Decimal, MapType};
 use risingwave_common::{bail, bail_not_implemented};
 use risingwave_pb::plan_common::{AdditionalColumn, ColumnDesc, ColumnDescVersion};
 
@@ -57,8 +57,7 @@ impl ResolvedAvroSchema {
 #[derive(Debug, Copy, Clone)]
 pub enum MapHandling {
     Jsonb,
-    // TODO: <https://github.com/risingwavelabs/risingwave/issues/13387>
-    // Map
+    Map,
 }
 
 impl MapHandling {
@@ -69,6 +68,7 @@ impl MapHandling {
     ) -> anyhow::Result<Option<Self>> {
         let mode = match options.get(Self::OPTION_KEY).map(std::ops::Deref::deref) {
             Some("jsonb") => Self::Jsonb,
+            Some("map") => Self::Map,
             Some(v) => bail!("unrecognized {} value {}", Self::OPTION_KEY, v),
             None => return Ok(None),
         };
@@ -266,12 +266,10 @@ fn avro_type_mapping(
                         );
                     }
                 }
-                None => {
-                    // We require it to be specified, because we don't want to have a bad default behavior.
-                    // But perhaps changing the default behavior won't be a breaking change,
-                    // because it affects only on creation time, what the result ColumnDesc will be, and the ColumnDesc will be persisted.
-                    // This is unlike timestamp.handing.mode, which affects parser's behavior on the runtime.
-                    bail!("`map.handling.mode` not specified in ENCODE AVRO (...). Currently supported modes: `jsonb`")
+                Some(MapHandling::Map) | None => {
+                    let value = avro_type_mapping(value_schema.as_ref(), map_handling)
+                        .context("failed to convert Avro map type")?;
+                    DataType::Map(MapType::from_kv(DataType::Varchar, value))
                 }
             }
         }
diff --git a/src/connector/codec/tests/integration_tests/avro.rs b/src/connector/codec/tests/integration_tests/avro.rs
index 11421c151d7a5..11275f45e9783 100644
--- a/src/connector/codec/tests/integration_tests/avro.rs
+++ b/src/connector/codec/tests/integration_tests/avro.rs
@@ -885,3 +885,117 @@ fn test_union() {
             ])"#]],
     );
 }
+
+#[test]
+fn test_map() {
+    let schema = r#"
+{
+    "type": "record",
+    "namespace": "com.redpanda.examples.avro",
+    "name": "ClickEvent",
+    "fields": [
+        {
+            "name": "map_str",
+            "type": {
+                "type": "map",
+                "values": "string"
+            },
+            "default": {}
+        },
+        {
+            "name": "map_map_int",
+            "type": {
+                "type": "map",
+                "values": {
+                    "type": "map",
+                    "values": "int"
+                }
+            }
+        }
+    ]
+}
+    "#;
+
+    let data = &[
+        // {"map_str": {"a":"1","b":"2"}, "map_map_int": {"m1": {"a":1,"b":2}, "m2": {"c":3,"d":4}}}
+        "0402610278026202790004046d310402610202620400046d32040263060264080000",
+        // {"map_map_int": {}}
+        "0000",
+    ];
+
+    check(
+        schema,
+        data,
+        Config {
+            map_handling: None,
+            data_encoding: TestDataEncoding::HexBinary,
+        },
+        expect![[r#"
+            [
+                map_str(#1): Map(Varchar,Varchar),
+                map_map_int(#2): Map(Varchar,Map(Varchar,Int32)),
+            ]"#]],
+        expect![[r#"
+            Owned([
+                StructValue(
+                    Utf8("a"),
+                    Utf8("x"),
+                ),
+                StructValue(
+                    Utf8("b"),
+                    Utf8("y"),
+                ),
+            ])
+            Owned([
+                StructValue(
+                    Utf8("m1"),
+                    [
+                        StructValue(
+                            Utf8("a"),
+                            Int32(1),
+                        ),
+                        StructValue(
+                            Utf8("b"),
+                            Int32(2),
+                        ),
+                    ],
+                ),
+                StructValue(
+                    Utf8("m2"),
+                    [
+                        StructValue(
+                            Utf8("c"),
+                            Int32(3),
+                        ),
+                        StructValue(
+                            Utf8("d"),
+                            Int32(4),
+                        ),
+                    ],
+                ),
+            ])
+            ----
+            Owned([])
+            Owned([])"#]],
+    );
+
+    check(
+        schema,
+        data,
+        Config {
+            map_handling: Some(MapHandling::Jsonb),
+            data_encoding: TestDataEncoding::HexBinary,
+        },
+        expect![[r#"
+            [
+                map_str(#1): Jsonb,
+                map_map_int(#2): Jsonb,
+            ]"#]],
+        expect![[r#"
+            Owned(Jsonb(JsonbRef({"a": "x", "b": "y"})))
+            Owned(Jsonb(JsonbRef({"m1": {"a": Number(1), "b": Number(2)}, "m2": {"c": Number(3), "d": Number(4)}})))
+            ----
+            Owned(Jsonb(JsonbRef({})))
+            Owned(Jsonb(JsonbRef({})))"#]],
+    );
+}
diff --git a/src/connector/codec/tests/integration_tests/utils.rs b/src/connector/codec/tests/integration_tests/utils.rs
index cecb0796c455a..dd375656c51e3 100644
--- a/src/connector/codec/tests/integration_tests/utils.rs
+++ b/src/connector/codec/tests/integration_tests/utils.rs
@@ -44,6 +44,14 @@ impl<'a> std::fmt::Debug for DataTypeTestDisplay<'a> {
                 .debug_tuple("List")
                 .field(&DataTypeTestDisplay(t))
                 .finish(),
+            DataType::Map(m) => {
+                write!(
+                    f,
+                    "Map({:?},{:?})",
+                    &DataTypeTestDisplay(m.key()),
+                    &DataTypeTestDisplay(m.value())
+                )
+            }
             _ => {
                 // do not use alternative display for simple types
                 write!(f, "{:?}", self.0)
@@ -76,6 +84,10 @@ impl<'a> std::fmt::Debug for ScalarRefImplTestDisplay<'a> {
                 .debug_list()
                 .entries(l.iter().map(DatumRefTestDisplay))
                 .finish(),
+            ScalarRefImpl::Map(m) => f
+                .debug_list()
+                .entries(m.inner().iter().map(DatumRefTestDisplay))
+                .finish(),
             _ => {
                 // do not use alternative display for simple types
                 write!(f, "{:?}", self.0)
diff --git a/src/connector/src/connector_common/common.rs b/src/connector/src/connector_common/common.rs
index 0933f38dc14e0..1c911c5a3992f 100644
--- a/src/connector/src/connector_common/common.rs
+++ b/src/connector/src/connector_common/common.rs
@@ -46,6 +46,10 @@ pub const PRIVATE_LINK_TARGETS_KEY: &str = "privatelink.targets";
 
 const AWS_MSK_IAM_AUTH: &str = "AWS_MSK_IAM";
 
+/// The environment variable to disable using default credential from environment.
+/// It's recommended to set this variable to `false` in cloud hosting environment.
+const DISABLE_DEFAULT_CREDENTIAL: &str = "DISABLE_DEFAULT_CREDENTIAL";
+
 #[derive(Debug, Clone, Deserialize)]
 pub struct AwsPrivateLinkItem {
     pub az_id: Option<String>,
@@ -57,6 +61,7 @@ use aws_config::sts::AssumeRoleProvider;
 use aws_credential_types::provider::SharedCredentialsProvider;
 use aws_types::region::Region;
 use aws_types::SdkConfig;
+use risingwave_common::util::env_var::env_var_is_true;
 
 /// A flatten config map for aws auth.
 #[derive(Deserialize, Debug, Clone, WithOptions)]
@@ -104,7 +109,7 @@ impl AwsAuthProps {
         }
     }
 
-    fn build_credential_provider(&self) -> ConnectorResult<SharedCredentialsProvider> {
+    async fn build_credential_provider(&self) -> ConnectorResult<SharedCredentialsProvider> {
         if self.access_key.is_some() && self.secret_key.is_some() {
             Ok(SharedCredentialsProvider::new(
                 aws_credential_types::Credentials::from_keys(
@@ -113,6 +118,10 @@ impl AwsAuthProps {
                     self.session_token.clone(),
                 ),
             ))
+        } else if !env_var_is_true(DISABLE_DEFAULT_CREDENTIAL) {
+            Ok(SharedCredentialsProvider::new(
+                aws_config::default_provider::credentials::default_provider().await,
+            ))
         } else {
             bail!("Both \"access_key\" and \"secret_key\" are required.")
         }
@@ -140,7 +149,7 @@ impl AwsAuthProps {
     pub async fn build_config(&self) -> ConnectorResult<SdkConfig> {
         let region = self.build_region().await?;
         let credentials_provider = self
-            .with_role_provider(self.build_credential_provider()?)
+            .with_role_provider(self.build_credential_provider().await?)
             .await?;
         let mut config_loader = aws_config::from_env()
             .region(region)
diff --git a/src/connector/src/parser/mysql.rs b/src/connector/src/parser/mysql.rs
index a28dddc9aa65a..fe9b77c643de7 100644
--- a/src/connector/src/parser/mysql.rs
+++ b/src/connector/src/parser/mysql.rs
@@ -127,8 +127,10 @@ pub fn mysql_row_to_owned_row(mysql_row: &mut MysqlRow, schema: &Schema) -> Owne
                 | DataType::Struct(_)
                 | DataType::List(_)
                 | DataType::Int256
-                | DataType::Serial => {
+                | DataType::Serial
+                | DataType::Map(_) => {
                     // Interval, Struct, List, Int256 are not supported
+                    // XXX: is this branch reachable?
                     if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
                         tracing::warn!(column = rw_field.name, ?rw_field.data_type, suppressed_count, "unsupported data type, set to null");
                     }
diff --git a/src/connector/src/parser/postgres.rs b/src/connector/src/parser/postgres.rs
index da17ea256ba3c..f55fe28f878f9 100644
--- a/src/connector/src/parser/postgres.rs
+++ b/src/connector/src/parser/postgres.rs
@@ -116,7 +116,8 @@ fn postgres_cell_to_scalar_impl(
                 }
             }
         },
-        DataType::Struct(_) | DataType::Serial => {
+        DataType::Struct(_) | DataType::Serial | DataType::Map(_) => {
+            // Is this branch reachable?
             // Struct and Serial are not supported
             tracing::warn!(name, ?data_type, "unsupported data type, set to null");
             None
diff --git a/src/connector/src/sink/big_query.rs b/src/connector/src/sink/big_query.rs
index 7b3c3a7d02386..22146e86d0d1d 100644
--- a/src/connector/src/sink/big_query.rs
+++ b/src/connector/src/sink/big_query.rs
@@ -261,6 +261,7 @@ impl BigQuerySink {
             DataType::Int256 => Err(SinkError::BigQuery(anyhow::anyhow!(
                 "Bigquery cannot support Int256"
             ))),
+            DataType::Map(_) => todo!(),
         }
     }
 
@@ -310,6 +311,7 @@ impl BigQuerySink {
                     "Bigquery cannot support Int256"
                 )))
             }
+            DataType::Map(_) => todo!(),
         };
         Ok(tfs)
     }
@@ -819,6 +821,7 @@ fn build_protobuf_field(
                 "Don't support Float32 and Int256"
             )))
         }
+        DataType::Map(_) => todo!(),
     }
     Ok((field, None))
 }
diff --git a/src/connector/src/sink/clickhouse.rs b/src/connector/src/sink/clickhouse.rs
index d715e93b8d6c4..4337f2b9d76b7 100644
--- a/src/connector/src/sink/clickhouse.rs
+++ b/src/connector/src/sink/clickhouse.rs
@@ -473,6 +473,9 @@ impl ClickHouseSink {
             risingwave_common::types::DataType::Int256 => Err(SinkError::ClickHouse(
                 "clickhouse can not support Int256".to_string(),
             )),
+            risingwave_common::types::DataType::Map(_) => Err(SinkError::ClickHouse(
+                "clickhouse can not support Map".to_string(),
+            )),
         };
         if !is_match? {
             return Err(SinkError::ClickHouse(format!(
@@ -1020,6 +1023,11 @@ impl ClickHouseFieldWithNull {
                     "clickhouse can not support Bytea".to_string(),
                 ))
             }
+            ScalarRefImpl::Map(_) => {
+                return Err(SinkError::ClickHouse(
+                    "clickhouse can not support Map".to_string(),
+                ))
+            }
         };
         let data = if clickhouse_schema_feature.can_null {
             vec![ClickHouseFieldWithNull::WithSome(data)]
diff --git a/src/connector/src/sink/doris.rs b/src/connector/src/sink/doris.rs
index 7745f2e9e98b1..0571c9a2bd6bc 100644
--- a/src/connector/src/sink/doris.rs
+++ b/src/connector/src/sink/doris.rs
@@ -188,6 +188,9 @@ impl DorisSink {
             risingwave_common::types::DataType::Int256 => {
                 Err(SinkError::Doris("doris can not support Int256".to_string()))
             }
+            risingwave_common::types::DataType::Map(_) => {
+                Err(SinkError::Doris("doris can not support Map".to_string()))
+            }
         }
     }
 }
diff --git a/src/connector/src/sink/dynamodb.rs b/src/connector/src/sink/dynamodb.rs
index 808d2a8c98e95..6d73bf2d478c8 100644
--- a/src/connector/src/sink/dynamodb.rs
+++ b/src/connector/src/sink/dynamodb.rs
@@ -398,6 +398,7 @@ fn map_data_type(
             }
             AttributeValue::M(map)
         }
+        DataType::Map(_) => todo!(),
     };
     Ok(attr)
 }
diff --git a/src/connector/src/sink/encoder/avro.rs b/src/connector/src/sink/encoder/avro.rs
index 8122126727298..4a2060f0a8c6c 100644
--- a/src/connector/src/sink/encoder/avro.rs
+++ b/src/connector/src/sink/encoder/avro.rs
@@ -454,6 +454,10 @@ fn encode_field<D: MaybeData>(
         DataType::Int256 => {
             return no_match_err();
         }
+        DataType::Map(_) => {
+            // TODO:
+            return no_match_err();
+        }
     };
 
     D::handle_union(value, opt_idx)
diff --git a/src/connector/src/sink/encoder/json.rs b/src/connector/src/sink/encoder/json.rs
index 3652f38bacbb2..6dc8809f42933 100644
--- a/src/connector/src/sink/encoder/json.rs
+++ b/src/connector/src/sink/encoder/json.rs
@@ -401,6 +401,7 @@ pub(crate) fn schema_type_mapping(rw_type: &DataType) -> &'static str {
         DataType::Jsonb => "string",
         DataType::Serial => "string",
         DataType::Int256 => "string",
+        DataType::Map(_) => "map",
     }
 }
 
diff --git a/src/connector/src/sink/encoder/proto.rs b/src/connector/src/sink/encoder/proto.rs
index a0e4d41dc58de..8046606b5690c 100644
--- a/src/connector/src/sink/encoder/proto.rs
+++ b/src/connector/src/sink/encoder/proto.rs
@@ -420,6 +420,10 @@ fn encode_field<D: MaybeData>(
         DataType::Int256 => {
             return no_match_err();
         }
+        DataType::Map(_) => {
+            // TODO:
+            return no_match_err();
+        }
     };
 
     Ok(value)
diff --git a/src/connector/src/sink/formatter/debezium_json.rs b/src/connector/src/sink/formatter/debezium_json.rs
index a9bf0404f473e..9fff8a9b8ba6b 100644
--- a/src/connector/src/sink/formatter/debezium_json.rs
+++ b/src/connector/src/sink/formatter/debezium_json.rs
@@ -314,6 +314,7 @@ pub(crate) fn field_to_json(field: &Field) -> Value {
         // we do the same here
         risingwave_common::types::DataType::Struct(_) => ("string", ""),
         risingwave_common::types::DataType::List { .. } => ("string", ""),
+        risingwave_common::types::DataType::Map(_) => ("string", ""),
     };
 
     if name.is_empty() {
diff --git a/src/connector/src/sink/formatter/mod.rs b/src/connector/src/sink/formatter/mod.rs
index b2e93cba763ea..6da8a1e0d2008 100644
--- a/src/connector/src/sink/formatter/mod.rs
+++ b/src/connector/src/sink/formatter/mod.rs
@@ -85,6 +85,10 @@ pub enum SinkFormatterImpl {
     UpsertTextJson(UpsertFormatter<TextEncoder, JsonEncoder>),
     UpsertAvro(UpsertFormatter<AvroEncoder, AvroEncoder>),
     UpsertTextAvro(UpsertFormatter<TextEncoder, AvroEncoder>),
+    // `UpsertFormatter<ProtoEncoder, ProtoEncoder>` is intentionally left out
+    // to avoid using `ProtoEncoder` as key:
+    // <https://docs.confluent.io/platform/7.7/control-center/topics/schema.html#c3-schemas-best-practices-key-value-pairs>
+    UpsertTextProto(UpsertFormatter<TextEncoder, ProtoEncoder>),
     UpsertTemplate(UpsertFormatter<TemplateEncoder, TemplateEncoder>),
     UpsertTextTemplate(UpsertFormatter<TextEncoder, TemplateEncoder>),
     // debezium
@@ -356,6 +360,7 @@ impl SinkFormatterImpl {
                 (F::Upsert, E::Json, None) => Impl::UpsertJson(build(p).await?),
                 (F::Upsert, E::Avro, Some(E::Text)) => Impl::UpsertTextAvro(build(p).await?),
                 (F::Upsert, E::Avro, None) => Impl::UpsertAvro(build(p).await?),
+                (F::Upsert, E::Protobuf, Some(E::Text)) => Impl::UpsertTextProto(build(p).await?),
                 (F::Upsert, E::Template, Some(E::Text)) => {
                     Impl::UpsertTextTemplate(build(p).await?)
                 }
@@ -399,6 +404,7 @@ macro_rules! dispatch_sink_formatter_impl {
             SinkFormatterImpl::UpsertTextJson($name) => $body,
             SinkFormatterImpl::UpsertAvro($name) => $body,
             SinkFormatterImpl::UpsertTextAvro($name) => $body,
+            SinkFormatterImpl::UpsertTextProto($name) => $body,
             SinkFormatterImpl::DebeziumJson($name) => $body,
             SinkFormatterImpl::AppendOnlyTextTemplate($name) => $body,
             SinkFormatterImpl::AppendOnlyTemplate($name) => $body,
@@ -423,6 +429,7 @@ macro_rules! dispatch_sink_formatter_str_key_impl {
             SinkFormatterImpl::UpsertTextJson($name) => $body,
             SinkFormatterImpl::UpsertAvro(_) => unreachable!(),
             SinkFormatterImpl::UpsertTextAvro($name) => $body,
+            SinkFormatterImpl::UpsertTextProto($name) => $body,
             SinkFormatterImpl::DebeziumJson($name) => $body,
             SinkFormatterImpl::AppendOnlyTextTemplate($name) => $body,
             SinkFormatterImpl::AppendOnlyTemplate($name) => $body,
diff --git a/src/connector/src/sink/kinesis.rs b/src/connector/src/sink/kinesis.rs
index 3b5d49da46037..b10f25d962124 100644
--- a/src/connector/src/sink/kinesis.rs
+++ b/src/connector/src/sink/kinesis.rs
@@ -15,18 +15,17 @@
 use std::collections::BTreeMap;
 
 use anyhow::{anyhow, Context};
-use aws_sdk_kinesis::operation::put_records::builders::PutRecordsFluentBuilder;
+use aws_sdk_kinesis::operation::put_records::PutRecordsOutput;
 use aws_sdk_kinesis::primitives::Blob;
-use aws_sdk_kinesis::types::PutRecordsRequestEntry;
+use aws_sdk_kinesis::types::{PutRecordsRequestEntry, PutRecordsResultEntry};
 use aws_sdk_kinesis::Client as KinesisClient;
 use futures::{FutureExt, TryFuture};
+use itertools::Itertools;
 use risingwave_common::array::StreamChunk;
 use risingwave_common::catalog::Schema;
 use risingwave_common::session_config::sink_decouple::SinkDecouple;
 use serde_derive::Deserialize;
 use serde_with::serde_as;
-use tokio_retry::strategy::{jitter, ExponentialBackoff};
-use tokio_retry::Retry;
 use with_options::WithOptions;
 
 use super::catalog::SinkFormatDesc;
@@ -155,9 +154,9 @@ pub struct KinesisSinkWriter {
 }
 
 struct KinesisSinkPayloadWriter {
-    // builder should always be `Some`. Making it an option so that we can call
-    // builder methods that take the builder ownership as input and return with a new builder.
-    builder: Option<PutRecordsFluentBuilder>,
+    client: KinesisClient,
+    entries: Vec<(PutRecordsRequestEntry, usize)>,
+    stream_name: String,
 }
 
 impl KinesisSinkWriter {
@@ -191,39 +190,173 @@ impl KinesisSinkWriter {
     }
 
     fn new_payload_writer(&self) -> KinesisSinkPayloadWriter {
-        let builder = self
-            .client
-            .put_records()
-            .stream_name(&self.config.common.stream_name);
         KinesisSinkPayloadWriter {
-            builder: Some(builder),
+            client: self.client.clone(),
+            entries: vec![],
+            stream_name: self.config.common.stream_name.clone(),
         }
     }
 }
 
 mod opaque_type {
+    use std::cmp::min;
+    use std::time::Duration;
+
+    use thiserror_ext::AsReport;
+    use tokio::time::sleep;
+    use tokio_retry::strategy::{jitter, ExponentialBackoff};
+    use tracing::warn;
+
     use super::*;
     pub type KinesisSinkPayloadWriterDeliveryFuture =
         impl TryFuture<Ok = (), Error = SinkError> + Unpin + Send + 'static;
 
     impl KinesisSinkPayloadWriter {
         pub(super) fn finish(self) -> KinesisSinkPayloadWriterDeliveryFuture {
+            // For reference to the behavior of `put_records`
+            // https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/kinesis/client/put_records.html
+
             async move {
-                let builder = self.builder.expect("should not be None");
-                let context_fmt = format!(
-                    "failed to put record to {}",
-                    builder
-                        .get_stream_name()
-                        .as_ref()
-                        .expect("should have set stream name")
-                );
-                Retry::spawn(
-                    ExponentialBackoff::from_millis(100).map(jitter).take(3),
-                    || builder.clone().send(),
-                )
-                .await
-                .with_context(|| context_fmt.clone())
-                .map_err(SinkError::Kinesis)?;
+                // From the doc of `put_records`:
+                // Each PutRecords request can support up to 500 records. Each record in the request can be as large as 1 MiB,
+                // up to a limit of 5 MiB for the entire request, including partition keys. Each shard can support writes up to
+                // 1,000 records per second, up to a maximum data write total of 1 MiB per second.
+
+                const MAX_RECORD_COUNT: usize = 500;
+                const MAX_SINGLE_RECORD_PAYLOAD_SIZE: usize = 1 << 20;
+                const MAX_TOTAL_RECORD_PAYLOAD_SIZE: usize = 5 * (1 << 20);
+                // Allow at most 3 times of retry when not making any progress to avoid endless retry
+                const MAX_NO_PROGRESS_RETRY_COUNT: usize = 3;
+
+                let mut remaining_no_progress_retry_count = MAX_NO_PROGRESS_RETRY_COUNT;
+                let total_count = self.entries.len();
+                let mut start_idx = 0;
+
+                let mut throttle_delay = None;
+
+                while start_idx < total_count {
+                    // 1. Prepare the records to be sent
+
+                    // The maximum possible number of records that can be sent in this iteration.
+                    // Can be smaller than this number when the total payload size exceeds `MAX_TOTAL_RECORD_PAYLOAD_SIZE`
+                    let max_record_count = min(MAX_RECORD_COUNT, total_count - start_idx);
+                    let mut records = Vec::with_capacity(max_record_count);
+                    let mut total_payload_size = 0;
+                    for i in start_idx..(start_idx + max_record_count) {
+                        let (record, size) = &self.entries[i];
+                        if *size >= MAX_SINGLE_RECORD_PAYLOAD_SIZE {
+                            warn!(
+                                size,
+                                partition = record.partition_key,
+                                "encounter a large single record"
+                            );
+                        }
+                        if total_payload_size + *size < MAX_TOTAL_RECORD_PAYLOAD_SIZE {
+                            total_payload_size += *size;
+                            records.push(record.clone());
+                        } else {
+                            break;
+                        }
+                    }
+                    if records.is_empty() {
+                        // at least include one record even if its size exceed `MAX_TOTAL_RECORD_PAYLOAD_SIZE`
+                        records.push(self.entries[start_idx].0.clone());
+                    }
+
+                    // 2. send the records and handle the result
+                    let record_count = records.len();
+                    match self
+                        .client
+                        .put_records()
+                        .stream_name(&self.stream_name)
+                        .set_records(Some(records))
+                        .send()
+                        .await
+                    {
+                        Ok(output) => {
+                            if record_count != output.records.len() {
+                                return Err(SinkError::Kinesis(anyhow!("request record count {} not match the response record count {}", record_count, output.records.len())));
+                            }
+                            // From the doc of `put_records`:
+                            // A single record failure does not stop the processing of subsequent records. As a result,
+                            // PutRecords doesn’t guarantee the ordering of records. If you need to read records in the same
+                            // order they are written to the stream, use PutRecord instead of PutRecords, and write to the same shard.
+
+                            // Therefore, to ensure at least once and eventual consistency, we figure out the first failed entry, and retry
+                            // all the following entries even if the following entries may have been successfully processed.
+                            if let Some((first_failed_idx, result_entry)) = Self::first_failed_entry(output) {
+                                // first_failed_idx is also the number of successful entries
+                                let partially_sent_count = first_failed_idx;
+                                if partially_sent_count > 0 {
+                                    warn!(
+                                        partially_sent_count,
+                                        record_count,
+                                        "records are partially sent. code: [{}], message: [{}]",
+                                        result_entry.error_code.unwrap_or_default(),
+                                        result_entry.error_message.unwrap_or_default()
+                                    );
+                                    start_idx += partially_sent_count;
+                                    // reset retry count when having progress
+                                    remaining_no_progress_retry_count = MAX_NO_PROGRESS_RETRY_COUNT;
+                                } else if let Some(err_code) = &result_entry.error_code && err_code == "ProvisionedThroughputExceededException" {
+                                    // From the doc of `put_records`:
+                                    // The ErrorCode parameter reflects the type of error and can be one of the following values:
+                                    // ProvisionedThroughputExceededException or InternalFailure. ErrorMessage provides more detailed
+                                    // information about the ProvisionedThroughputExceededException exception including the account ID,
+                                    // stream name, and shard ID of the record that was throttled.
+                                    let throttle_delay = throttle_delay.get_or_insert_with(|| ExponentialBackoff::from_millis(100).factor(2).max_delay(Duration::from_secs(2)).map(jitter)).next().expect("should not be none");
+                                    warn!(err_string = ?result_entry.error_message, ?throttle_delay, "throttle");
+                                    sleep(throttle_delay).await;
+                                } else  {
+                                    // no progress due to some internal error
+                                    assert_eq!(first_failed_idx, 0);
+                                    remaining_no_progress_retry_count -= 1;
+                                    if remaining_no_progress_retry_count == 0 {
+                                        return Err(SinkError::Kinesis(anyhow!(
+                                            "failed to send records. sent {} out of {}, last err: code: [{}], message: [{}]",
+                                            start_idx,
+                                            total_count,
+                                            result_entry.error_code.unwrap_or_default(),
+                                            result_entry.error_message.unwrap_or_default()
+                                        )));
+                                    } else {
+                                        warn!(
+                                            remaining_no_progress_retry_count,
+                                            sent = start_idx,
+                                            total_count,
+                                            "failed to send records. code: [{}], message: [{}]",
+                                            result_entry.error_code.unwrap_or_default(),
+                                            result_entry.error_message.unwrap_or_default()
+                                        )
+                                    }
+                                }
+                            } else {
+                                start_idx += record_count;
+                                // reset retry count when having progress
+                                remaining_no_progress_retry_count = MAX_NO_PROGRESS_RETRY_COUNT;
+                                // reset throttle delay when records can be fully sent.
+                                throttle_delay = None;
+                            }
+                        }
+                        Err(e) => {
+                            remaining_no_progress_retry_count -= 1;
+                            if remaining_no_progress_retry_count == 0 {
+                                return Err(SinkError::Kinesis(anyhow!(e).context(format!(
+                                    "failed to send records. sent {} out of {}",
+                                    start_idx, total_count,
+                                ))));
+                            } else {
+                                warn!(
+                                    remaining_no_progress_retry_count,
+                                    sent = start_idx,
+                                    total_count,
+                                    "failed to send records. err: [{:?}]",
+                                    e.as_report(),
+                                )
+                            }
+                        }
+                    }
+                }
                 Ok(())
             }
             .boxed()
@@ -233,16 +366,32 @@ mod opaque_type {
 pub use opaque_type::KinesisSinkPayloadWriterDeliveryFuture;
 
 impl KinesisSinkPayloadWriter {
+    fn first_failed_entry(output: PutRecordsOutput) -> Option<(usize, PutRecordsResultEntry)> {
+        // From the doc of `put_records`:
+        // A successfully processed record includes ShardId and SequenceNumber values. The ShardId parameter
+        // identifies the shard in the stream where the record is stored. The SequenceNumber parameter is an
+        // identifier assigned to the put record, unique to all records in the stream.
+        //
+        // An unsuccessfully processed record includes ErrorCode and ErrorMessage values. ErrorCode reflects
+        // the type of error and can be one of the following values: ProvisionedThroughputExceededException or
+        // InternalFailure. ErrorMessage provides more detailed information about the ProvisionedThroughputExceededException
+        // exception including the account ID, stream name, and shard ID of the record that was throttled.
+        output
+            .records
+            .into_iter()
+            .find_position(|entry| entry.shard_id.is_none())
+    }
+
     fn put_record(&mut self, key: String, payload: Vec<u8>) {
-        self.builder = Some(
-            self.builder.take().expect("should not be None").records(
-                PutRecordsRequestEntry::builder()
-                    .partition_key(key)
-                    .data(Blob::new(payload))
-                    .build()
-                    .expect("should not fail because we have set `data` and `partition_key`"),
-            ),
-        );
+        let size = key.len() + payload.len();
+        self.entries.push((
+            PutRecordsRequestEntry::builder()
+                .partition_key(key)
+                .data(Blob::new(payload))
+                .build()
+                .expect("should not fail because we have set `data` and `partition_key`"),
+            size,
+        ))
     }
 }
 
diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs
index b847c6348234f..606965a8424d7 100644
--- a/src/connector/src/sink/remote.rs
+++ b/src/connector/src/sink/remote.rs
@@ -216,7 +216,7 @@ async fn validate_remote_sink(param: &SinkParam, sink_name: &str) -> ConnectorRe
                     )))
                 }
             },
-            DataType::Serial | DataType::Int256 => Err(SinkError::Remote(anyhow!(
+            DataType::Serial | DataType::Int256 | DataType::Map(_) => Err(SinkError::Remote(anyhow!(
                             "remote sink supports Int16, Int32, Int64, Float32, Float64, Boolean, Decimal, Time, Date, Interval, Jsonb, Timestamp, Timestamptz, Bytea, List and Varchar, (Es sink support Struct) got {:?}: {:?}",
                             col.name,
                             col.data_type,
diff --git a/src/connector/src/sink/sqlserver.rs b/src/connector/src/sink/sqlserver.rs
index 0295d3d3de9ad..d40e2a2647b24 100644
--- a/src/connector/src/sink/sqlserver.rs
+++ b/src/connector/src/sink/sqlserver.rs
@@ -587,6 +587,7 @@ fn bind_params(
                 ScalarRefImpl::List(_) => return Err(data_type_not_supported("List")),
                 ScalarRefImpl::Int256(_) => return Err(data_type_not_supported("Int256")),
                 ScalarRefImpl::Serial(_) => return Err(data_type_not_supported("Serial")),
+                ScalarRefImpl::Map(_) => return Err(data_type_not_supported("Map")),
             },
             None => match schema[col_idx].data_type {
                 DataType::Boolean => {
@@ -634,6 +635,7 @@ fn bind_params(
                 DataType::Jsonb => return Err(data_type_not_supported("Jsonb")),
                 DataType::Serial => return Err(data_type_not_supported("Serial")),
                 DataType::Int256 => return Err(data_type_not_supported("Int256")),
+                DataType::Map(_) => return Err(data_type_not_supported("Map")),
             },
         };
     }
@@ -667,6 +669,7 @@ fn check_data_type_compatibility(data_type: &DataType) -> Result<()> {
         DataType::Jsonb => Err(data_type_not_supported("Jsonb")),
         DataType::Serial => Err(data_type_not_supported("Serial")),
         DataType::Int256 => Err(data_type_not_supported("Int256")),
+        DataType::Map(_) => Err(data_type_not_supported("Map")),
     }
 }
 
diff --git a/src/connector/src/sink/starrocks.rs b/src/connector/src/sink/starrocks.rs
index e5881ee9f747e..84d3f95131758 100644
--- a/src/connector/src/sink/starrocks.rs
+++ b/src/connector/src/sink/starrocks.rs
@@ -246,6 +246,9 @@ impl StarrocksSink {
             risingwave_common::types::DataType::Int256 => Err(SinkError::Starrocks(
                 "INT256 is not supported for Starrocks sink.".to_string(),
             )),
+            risingwave_common::types::DataType::Map(_) => Err(SinkError::Starrocks(
+                "MAP is not supported for Starrocks sink.".to_string(),
+            )),
         }
     }
 }
diff --git a/src/connector/src/source/cdc/source/reader.rs b/src/connector/src/source/cdc/source/reader.rs
index 135daa5e04804..b29ef1312bbd9 100644
--- a/src/connector/src/source/cdc/source/reader.rs
+++ b/src/connector/src/source/cdc/source/reader.rs
@@ -161,7 +161,7 @@ impl<T: CdcSourceTypeTrait> SplitReader for CdcSplitReader<T> {
                 }
             };
             if !inited {
-                bail!("failed to start cdc connector");
+                bail!("failed to start cdc connector.\nHINT: increase `cdc_source_wait_streaming_start_timeout` session variable to a large value and retry.");
             }
         }
         tracing::info!(?source_id, "cdc connector started");
diff --git a/src/connector/src/source/kinesis/source/reader.rs b/src/connector/src/source/kinesis/source/reader.rs
index 4bac1fb1e49d2..123dc3a763ba8 100644
--- a/src/connector/src/source/kinesis/source/reader.rs
+++ b/src/connector/src/source/kinesis/source/reader.rs
@@ -91,7 +91,10 @@ impl SplitReader for KinesisSplitReader {
         if !matches!(start_position, KinesisOffset::Timestamp(_))
             && properties.timestamp_offset.is_some()
         {
-            bail!("scan.startup.mode needs to be set to 'timestamp' if you want to start with a specific timestamp");
+            // cannot bail! here because all new split readers will fail to start if user set 'scan.startup.mode' to 'timestamp'
+            tracing::warn!("scan.startup.mode needs to be set to 'timestamp' if you want to start with a specific timestamp, starting shard {} from the beginning",
+                split.id()
+            );
         }
 
         let stream_name = properties.common.stream_name.clone();
@@ -328,40 +331,6 @@ mod tests {
     use crate::connector_common::KinesisCommon;
     use crate::source::SourceContext;
 
-    #[tokio::test]
-    async fn test_reject_redundant_seq_props() {
-        let properties = KinesisProperties {
-            common: KinesisCommon {
-                assume_role_arn: None,
-                credentials_access_key: None,
-                credentials_secret_access_key: None,
-                stream_name: "kinesis_debug".to_string(),
-                stream_region: "cn-northwest-1".to_string(),
-                endpoint: None,
-                session_token: None,
-                assume_role_external_id: None,
-            },
-
-            scan_startup_mode: None,
-            timestamp_offset: Some(123456789098765432),
-
-            unknown_fields: Default::default(),
-        };
-        let client = KinesisSplitReader::new(
-            properties,
-            vec![KinesisSplit {
-                shard_id: "shardId-000000000001".to_string().into(),
-                start_position: KinesisOffset::Earliest,
-                end_position: KinesisOffset::None,
-            }],
-            Default::default(),
-            SourceContext::dummy().into(),
-            None,
-        )
-        .await;
-        assert!(client.is_err());
-    }
-
     #[tokio::test]
     #[ignore]
     async fn test_single_thread_kinesis_reader() -> Result<()> {
diff --git a/src/ctl/src/cmd_impl/hummock/list_version.rs b/src/ctl/src/cmd_impl/hummock/list_version.rs
index 0f88d881ddb69..460dd88621eb9 100644
--- a/src/ctl/src/cmd_impl/hummock/list_version.rs
+++ b/src/ctl/src/cmd_impl/hummock/list_version.rs
@@ -51,7 +51,8 @@ pub async fn list_version(
     } else {
         println!(
             "Version {} max_committed_epoch {}",
-            version.id, version.max_committed_epoch
+            version.id,
+            version.visible_table_committed_epoch()
         );
 
         for (cg, levels) in &version.levels {
diff --git a/src/ctl/src/cmd_impl/hummock/pause_resume.rs b/src/ctl/src/cmd_impl/hummock/pause_resume.rs
index 85791267bfd8a..9fdb9bc0cab3d 100644
--- a/src/ctl/src/cmd_impl/hummock/pause_resume.rs
+++ b/src/ctl/src/cmd_impl/hummock/pause_resume.rs
@@ -23,7 +23,8 @@ pub async fn disable_commit_epoch(context: &CtlContext) -> anyhow::Result<()> {
     println!(
         "Disabled.\
         Current version: id {}, max_committed_epoch {}",
-        version.id, version.max_committed_epoch
+        version.id,
+        version.visible_table_committed_epoch()
     );
     Ok(())
 }
diff --git a/src/ctl/src/cmd_impl/meta/migration.rs b/src/ctl/src/cmd_impl/meta/migration.rs
index fb786a505d16c..a6c488922022d 100644
--- a/src/ctl/src/cmd_impl/meta/migration.rs
+++ b/src/ctl/src/cmd_impl/meta/migration.rs
@@ -741,7 +741,7 @@ pub async fn migrate(from: EtcdBackend, target: String, force_clean: bool) -> an
                 .map(|vd| hummock_version_delta::ActiveModel {
                     id: Set(vd.id.to_u64() as _),
                     prev_id: Set(vd.prev_id.to_u64() as _),
-                    max_committed_epoch: Set(vd.max_committed_epoch as _),
+                    max_committed_epoch: Set(vd.visible_table_committed_epoch() as _),
                     safe_epoch: Set(vd.visible_table_safe_epoch() as _),
                     trivial_move: Set(vd.trivial_move),
                     full_version_delta: Set((&vd.to_protobuf()).into()),
diff --git a/src/ctl/src/cmd_impl/table/scan.rs b/src/ctl/src/cmd_impl/table/scan.rs
index 0689e315f74cb..e5bba170bf97a 100644
--- a/src/ctl/src/cmd_impl/table/scan.rs
+++ b/src/ctl/src/cmd_impl/table/scan.rs
@@ -124,7 +124,10 @@ async fn do_scan(table: TableCatalog, hummock: MonitoredStateStore<HummockStorag
     print_table_catalog(&table);
 
     println!("Rows:");
-    let read_epoch = hummock.inner().get_pinned_version().max_committed_epoch();
+    let read_epoch = hummock
+        .inner()
+        .get_pinned_version()
+        .visible_table_committed_epoch();
     let storage_table = make_storage_table(hummock, &table)?;
     let stream = storage_table
         .batch_iter(
diff --git a/src/error/src/tonic.rs b/src/error/src/tonic.rs
index 4e3476c460fd6..f17b6b8ea9d44 100644
--- a/src/error/src/tonic.rs
+++ b/src/error/src/tonic.rs
@@ -244,7 +244,7 @@ mod tests {
         };
 
         let server_status = original.to_status(tonic::Code::Internal, "test");
-        let body = server_status.to_http();
+        let body = server_status.into_http();
         let client_status = tonic::Status::from_header_map(body.headers()).unwrap();
 
         let wrapper = TonicStatusWrapper::new(client_status);
diff --git a/src/expr/core/src/error.rs b/src/expr/core/src/error.rs
index e02c5f4521cf5..4bceb284fbfd9 100644
--- a/src/expr/core/src/error.rs
+++ b/src/expr/core/src/error.rs
@@ -88,6 +88,7 @@ pub enum ExprError {
     #[error("More than one row returned by {0} used as an expression")]
     MaxOneRow(&'static str),
 
+    /// TODO: deprecate in favor of `Function`
     #[error(transparent)]
     Internal(
         #[from]
@@ -111,6 +112,7 @@ pub enum ExprError {
     InvalidState(String),
 
     /// Function error message returned by UDF.
+    /// TODO: replace with `Function`
     #[error("{0}")]
     Custom(String),
 
diff --git a/src/expr/core/src/sig/mod.rs b/src/expr/core/src/sig/mod.rs
index c3f57acd69f56..ae5af5b57c649 100644
--- a/src/expr/core/src/sig/mod.rs
+++ b/src/expr/core/src/sig/mod.rs
@@ -395,6 +395,8 @@ pub enum SigDataType {
     AnyArray,
     /// Accepts any struct data type
     AnyStruct,
+    /// TODO: not all type can be used as a map key.
+    AnyMap,
 }
 
 impl From<DataType> for SigDataType {
@@ -410,6 +412,7 @@ impl std::fmt::Display for SigDataType {
             Self::Any => write!(f, "any"),
             Self::AnyArray => write!(f, "anyarray"),
             Self::AnyStruct => write!(f, "anystruct"),
+            Self::AnyMap => write!(f, "anymap"),
         }
     }
 }
@@ -422,6 +425,7 @@ impl SigDataType {
             Self::Any => true,
             Self::AnyArray => dt.is_array(),
             Self::AnyStruct => dt.is_struct(),
+            Self::AnyMap => dt.is_map(),
         }
     }
 
diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml
index a9c955f91dcca..e493037c200b7 100644
--- a/src/expr/impl/Cargo.toml
+++ b/src/expr/impl/Cargo.toml
@@ -44,7 +44,7 @@ educe = "0.6"
 fancy-regex = "0.13"
 futures-async-stream = { workspace = true }
 futures-util = "0.3"
-ginepro = "0.7"
+ginepro = "0.7" # TODO(http-bump): bump to 0.8 once arrow-udf switches to tonic 0.12
 hex = "0.4"
 icelake = { workspace = true }
 itertools = { workspace = true }
@@ -71,7 +71,7 @@ sql-json-path = { version = "0.1", features = ["jsonbb"] }
 thiserror = "1"
 thiserror-ext = { workspace = true }
 tokio = { version = "0.2", package = "madsim-tokio", features = ["time"] }
-tonic = { version = "0.10", optional = true }
+tonic = { version = "0.10", optional = true } # TODO(http-bump): bump once arrow-udf switches to tonic 0.12
 tracing = "0.1"
 zstd = { version = "0.13", default-features = false, optional = true }
 
diff --git a/src/expr/impl/src/scalar/array.rs b/src/expr/impl/src/scalar/array.rs
index aaefd17bba07d..08de9714ce058 100644
--- a/src/expr/impl/src/scalar/array.rs
+++ b/src/expr/impl/src/scalar/array.rs
@@ -14,20 +14,103 @@
 
 use risingwave_common::array::{ListValue, StructValue};
 use risingwave_common::row::Row;
-use risingwave_common::types::ToOwnedDatum;
+use risingwave_common::types::{
+    DataType, ListRef, MapRef, MapType, MapValue, ScalarRefImpl, ToOwnedDatum,
+};
 use risingwave_expr::expr::Context;
-use risingwave_expr::function;
+use risingwave_expr::{function, ExprError};
 
-#[function("array(...) -> anyarray", type_infer = "panic")]
+use super::array_positions::array_position;
+
+#[function("array(...) -> anyarray", type_infer = "unreachable")]
 fn array(row: impl Row, ctx: &Context) -> ListValue {
     ListValue::from_datum_iter(ctx.return_type.as_list(), row.iter())
 }
 
-#[function("row(...) -> struct", type_infer = "panic")]
+#[function("row(...) -> struct", type_infer = "unreachable")]
 fn row_(row: impl Row) -> StructValue {
     StructValue::new(row.iter().map(|d| d.to_owned_datum()).collect())
 }
 
+fn map_type_infer(args: &[DataType]) -> Result<DataType, ExprError> {
+    let map = MapType::try_from_kv(args[0].as_list().clone(), args[1].as_list().clone())?;
+    Ok(map.into())
+}
+
+/// # Example
+///
+/// ```slt
+/// query T
+/// select map_from_entries(null::int[], array[1,2,3]);
+/// ----
+/// NULL
+///
+/// query T
+/// select map_from_entries(array['a','b','c'], array[1,2,3]);
+/// ----
+/// {"a":1,"b":2,"c":3}
+/// ```
+#[function(
+    "map_from_entries(anyarray, anyarray) -> anymap",
+    type_infer = "map_type_infer"
+)]
+fn map(key: ListRef<'_>, value: ListRef<'_>) -> Result<MapValue, ExprError> {
+    MapValue::try_from_kv(key.to_owned(), value.to_owned()).map_err(ExprError::Custom)
+}
+
+/// # Example
+///
+/// ```slt
+/// query T
+/// select map_access(map_from_entries(array[1,2,3], array[100,200,300]), 3);
+/// ----
+/// 300
+///
+/// query T
+/// select map_access(map_from_entries(array[1,2,3], array[100,200,300]), '3');
+/// ----
+/// 300
+///
+/// query error
+/// select map_access(map_from_entries(array[1,2,3], array[100,200,300]), 1.0);
+/// ----
+/// db error: ERROR: Failed to run the query
+///
+/// Caused by these errors (recent errors listed first):
+///   1: Failed to bind expression: map_access(map_from_entries(ARRAY[1, 2, 3], ARRAY[100, 200, 300]), 1.0)
+///   2: Bind error: Cannot access numeric in map(integer,integer)
+///
+///
+/// query T
+/// select map_access(map_from_entries(array['a','b','c'], array[1,2,3]), 'a');
+/// ----
+/// 1
+///
+/// query T
+/// select map_access(map_from_entries(array['a','b','c'], array[1,2,3]), 'd');
+/// ----
+/// NULL
+///
+/// query T
+/// select map_access(map_from_entries(array['a','b','c'], array[1,2,3]), null);
+/// ----
+/// NULL
+/// ```
+#[function("map_access(anymap, any) -> any")]
+fn map_access<'a>(
+    map: MapRef<'a>,
+    key: ScalarRefImpl<'_>,
+) -> Result<Option<ScalarRefImpl<'a>>, ExprError> {
+    // FIXME: DatumRef in return value is not support by the macro yet.
+
+    let (keys, values) = map.into_kv();
+    let idx = array_position(keys, Some(key))?;
+    match idx {
+        Some(idx) => Ok(values.get((idx - 1) as usize).unwrap()),
+        None => Ok(None),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use risingwave_common::array::DataChunk;
diff --git a/src/expr/impl/src/scalar/array_positions.rs b/src/expr/impl/src/scalar/array_positions.rs
index cbae53c001439..22c5f67d40e0e 100644
--- a/src/expr/impl/src/scalar/array_positions.rs
+++ b/src/expr/impl/src/scalar/array_positions.rs
@@ -66,7 +66,10 @@ use risingwave_expr::{function, ExprError, Result};
 /// 2
 /// ```
 #[function("array_position(anyarray, any) -> int4")]
-fn array_position(array: ListRef<'_>, element: Option<ScalarRefImpl<'_>>) -> Result<Option<i32>> {
+pub(super) fn array_position(
+    array: ListRef<'_>,
+    element: Option<ScalarRefImpl<'_>>,
+) -> Result<Option<i32>> {
     array_position_common(array, element, 0)
 }
 
diff --git a/src/expr/impl/src/scalar/case.rs b/src/expr/impl/src/scalar/case.rs
index f7fb9d89ef41b..1c92e76ce4e30 100644
--- a/src/expr/impl/src/scalar/case.rs
+++ b/src/expr/impl/src/scalar/case.rs
@@ -208,7 +208,7 @@ impl Expression for ConstantLookupExpression {
     }
 }
 
-#[build_function("constant_lookup(...) -> any", type_infer = "panic")]
+#[build_function("constant_lookup(...) -> any", type_infer = "unreachable")]
 fn build_constant_lookup_expr(
     return_type: DataType,
     children: Vec<BoxedExpression>,
@@ -249,7 +249,7 @@ fn build_constant_lookup_expr(
     )))
 }
 
-#[build_function("case(...) -> any", type_infer = "panic")]
+#[build_function("case(...) -> any", type_infer = "unreachable")]
 fn build_case_expr(
     return_type: DataType,
     children: Vec<BoxedExpression>,
diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs
index 0c93c0ed15dd9..e0dd1a8bb3fc8 100644
--- a/src/expr/impl/src/scalar/cast.rs
+++ b/src/expr/impl/src/scalar/cast.rs
@@ -21,7 +21,7 @@ use itertools::Itertools;
 use risingwave_common::array::{ArrayImpl, DataChunk, ListRef, ListValue, StructRef, StructValue};
 use risingwave_common::cast;
 use risingwave_common::row::OwnedRow;
-use risingwave_common::types::{Int256, JsonbRef, ToText, F64};
+use risingwave_common::types::{Int256, JsonbRef, MapRef, MapValue, ToText, F64};
 use risingwave_common::util::iter_util::ZipEqFast;
 use risingwave_expr::expr::{build_func, Context, ExpressionBoxExt, InputRefExpression};
 use risingwave_expr::{function, ExprError, Result};
@@ -189,13 +189,13 @@ pub fn str_to_bytea(elem: &str) -> Result<Box<[u8]>> {
     cast::str_to_bytea(elem).map_err(|err| ExprError::Parse(err.into()))
 }
 
-#[function("cast(varchar) -> anyarray", type_infer = "panic")]
+#[function("cast(varchar) -> anyarray", type_infer = "unreachable")]
 fn str_to_list(input: &str, ctx: &Context) -> Result<ListValue> {
     ListValue::from_str(input, &ctx.return_type).map_err(|err| ExprError::Parse(err.into()))
 }
 
 /// Cast array with `source_elem_type` into array with `target_elem_type` by casting each element.
-#[function("cast(anyarray) -> anyarray", type_infer = "panic")]
+#[function("cast(anyarray) -> anyarray", type_infer = "unreachable")]
 fn list_cast(input: ListRef<'_>, ctx: &Context) -> Result<ListValue> {
     let cast = build_func(
         PbType::Cast,
@@ -213,7 +213,7 @@ fn list_cast(input: ListRef<'_>, ctx: &Context) -> Result<ListValue> {
 }
 
 /// Cast struct of `source_elem_type` to `target_elem_type` by casting each element.
-#[function("cast(struct) -> struct", type_infer = "panic")]
+#[function("cast(struct) -> struct", type_infer = "unreachable")]
 fn struct_cast(input: StructRef<'_>, ctx: &Context) -> Result<StructValue> {
     let fields = (input.iter_fields_ref())
         .zip_eq_fast(ctx.arg_types[0].as_struct().types())
@@ -241,6 +241,17 @@ fn struct_cast(input: StructRef<'_>, ctx: &Context) -> Result<StructValue> {
     Ok(StructValue::new(fields))
 }
 
+/// Cast array with `source_elem_type` into array with `target_elem_type` by casting each element.
+#[function("cast(anymap) -> anymap", type_infer = "unreachable")]
+fn map_cast(map: MapRef<'_>, ctx: &Context) -> Result<MapValue> {
+    let new_ctx = Context {
+        arg_types: vec![ctx.arg_types[0].clone().as_map().clone().into_list()],
+        return_type: ctx.return_type.as_map().clone().into_list(),
+        variadic: ctx.variadic,
+    };
+    list_cast(map.into_inner(), &new_ctx).map(MapValue::from_list_entries)
+}
+
 #[cfg(test)]
 mod tests {
     use chrono::NaiveDateTime;
diff --git a/src/expr/impl/src/scalar/coalesce.rs b/src/expr/impl/src/scalar/coalesce.rs
index af3d753867559..6176a54a23d16 100644
--- a/src/expr/impl/src/scalar/coalesce.rs
+++ b/src/expr/impl/src/scalar/coalesce.rs
@@ -74,7 +74,7 @@ impl Expression for CoalesceExpression {
     }
 }
 
-#[build_function("coalesce(...) -> any", type_infer = "panic")]
+#[build_function("coalesce(...) -> any", type_infer = "unreachable")]
 fn build(return_type: DataType, children: Vec<BoxedExpression>) -> Result<BoxedExpression> {
     Ok(Box::new(CoalesceExpression {
         return_type,
diff --git a/src/expr/impl/src/scalar/external/iceberg.rs b/src/expr/impl/src/scalar/external/iceberg.rs
index 902545d01c25d..5fbc9b003305a 100644
--- a/src/expr/impl/src/scalar/external/iceberg.rs
+++ b/src/expr/impl/src/scalar/external/iceberg.rs
@@ -75,7 +75,7 @@ impl risingwave_expr::expr::Expression for IcebergTransform {
     }
 }
 
-#[build_function("iceberg_transform(varchar, any) -> any", type_infer = "panic")]
+#[build_function("iceberg_transform(varchar, any) -> any", type_infer = "unreachable")]
 fn build(return_type: DataType, mut children: Vec<BoxedExpression>) -> Result<BoxedExpression> {
     let transform_type = {
         let datum = children[0].eval_const()?.unwrap();
diff --git a/src/expr/impl/src/scalar/field.rs b/src/expr/impl/src/scalar/field.rs
index 1d26fe9c85dbb..681b4ab6caacf 100644
--- a/src/expr/impl/src/scalar/field.rs
+++ b/src/expr/impl/src/scalar/field.rs
@@ -54,7 +54,7 @@ impl Expression for FieldExpression {
     }
 }
 
-#[build_function("field(struct, int4) -> any", type_infer = "panic")]
+#[build_function("field(struct, int4) -> any", type_infer = "unreachable")]
 fn build(return_type: DataType, children: Vec<BoxedExpression>) -> Result<BoxedExpression> {
     // Field `func_call_node` have 2 child nodes, the first is Field `FuncCall` or
     // `InputRef`, the second is i32 `Literal`.
diff --git a/src/expr/impl/src/scalar/jsonb_record.rs b/src/expr/impl/src/scalar/jsonb_record.rs
index b1d399d35a5f9..b85feb9190d2a 100644
--- a/src/expr/impl/src/scalar/jsonb_record.rs
+++ b/src/expr/impl/src/scalar/jsonb_record.rs
@@ -115,7 +115,7 @@ fn jsonb_populate_recordset<'a>(
 /// ----
 /// 1 [1,2,3] {1,2,3} NULL (123,"a b c")
 /// ```
-#[function("jsonb_to_record(jsonb) -> struct", type_infer = "panic")]
+#[function("jsonb_to_record(jsonb) -> struct", type_infer = "unreachable")]
 fn jsonb_to_record(jsonb: JsonbRef<'_>, ctx: &Context) -> Result<StructValue> {
     let output_type = ctx.return_type.as_struct();
     jsonb.to_struct(output_type).map_err(parse_err)
@@ -135,7 +135,10 @@ fn jsonb_to_record(jsonb: JsonbRef<'_>, ctx: &Context) -> Result<StructValue> {
 /// 1 foo
 /// 2 NULL
 /// ```
-#[function("jsonb_to_recordset(jsonb) -> setof struct", type_infer = "panic")]
+#[function(
+    "jsonb_to_recordset(jsonb) -> setof struct",
+    type_infer = "unreachable"
+)]
 fn jsonb_to_recordset<'a>(
     jsonb: JsonbRef<'a>,
     ctx: &'a Context,
diff --git a/src/expr/impl/src/scalar/to_jsonb.rs b/src/expr/impl/src/scalar/to_jsonb.rs
index bb381954cc76b..c11d4474dc43b 100644
--- a/src/expr/impl/src/scalar/to_jsonb.rs
+++ b/src/expr/impl/src/scalar/to_jsonb.rs
@@ -16,8 +16,8 @@ use std::fmt::Debug;
 
 use jsonbb::Builder;
 use risingwave_common::types::{
-    DataType, Date, Decimal, Int256Ref, Interval, JsonbRef, JsonbVal, ListRef, ScalarRefImpl,
-    Serial, StructRef, Time, Timestamp, Timestamptz, ToText, F32, F64,
+    DataType, Date, Decimal, Int256Ref, Interval, JsonbRef, JsonbVal, ListRef, MapRef,
+    ScalarRefImpl, Serial, StructRef, Time, Timestamp, Timestamptz, ToText, F32, F64,
 };
 use risingwave_common::util::iter_util::ZipEqDebug;
 use risingwave_expr::expr::Context;
@@ -72,6 +72,7 @@ impl ToJsonb for ScalarRefImpl<'_> {
             Timestamptz(v) => v.add_to(ty, builder),
             Struct(v) => v.add_to(ty, builder),
             List(v) => v.add_to(ty, builder),
+            Map(v) => v.add_to(ty, builder),
         }
     }
 }
@@ -227,6 +228,20 @@ impl ToJsonb for ListRef<'_> {
     }
 }
 
+impl ToJsonb for MapRef<'_> {
+    fn add_to(self, data_type: &DataType, builder: &mut Builder) -> Result<()> {
+        let value_type = data_type.as_map().value();
+        builder.begin_object();
+        for (k, v) in self.iter() {
+            // XXX: is to_text here reasonable?
+            builder.add_string(&k.to_text());
+            v.add_to(value_type, builder)?;
+        }
+        builder.end_object();
+        Ok(())
+    }
+}
+
 impl ToJsonb for StructRef<'_> {
     fn add_to(self, data_type: &DataType, builder: &mut Builder) -> Result<()> {
         builder.begin_object();
diff --git a/src/expr/impl/src/udf/wasm.rs b/src/expr/impl/src/udf/wasm.rs
index bd84cfa004326..5a0dd0420b4d9 100644
--- a/src/expr/impl/src/udf/wasm.rs
+++ b/src/expr/impl/src/udf/wasm.rs
@@ -279,5 +279,6 @@ fn datatype_name(ty: &DataType) -> String {
                 .map(|(name, ty)| format!("{}:{}", name, datatype_name(ty)))
                 .join(",")
         ),
+        DataType::Map(_m) => todo!("map in wasm udf"),
     }
 }
diff --git a/src/expr/macro/src/gen.rs b/src/expr/macro/src/gen.rs
index ba51f4ba6bf54..ce5c8a884abdf 100644
--- a/src/expr/macro/src/gen.rs
+++ b/src/expr/macro/src/gen.rs
@@ -83,11 +83,13 @@ impl FunctionAttr {
         attrs
     }
 
-    /// Generate the type infer function.
+    /// Generate the type infer function: `fn(&[DataType]) -> Result<DataType>`
     fn generate_type_infer_fn(&self) -> Result<TokenStream2> {
         if let Some(func) = &self.type_infer {
-            if func == "panic" {
-                return Ok(quote! { |_| panic!("type inference function is not implemented") });
+            if func == "unreachable" {
+                return Ok(
+                    quote! { |_| unreachable!("type inference for this function should be specially handled in frontend, and should not call sig.type_infer") },
+                );
             }
             // use the user defined type inference function
             return Ok(func.parse().unwrap());
@@ -115,6 +117,11 @@ impl FunctionAttr {
                 // infer as the type of "struct" argument
                 return Ok(quote! { |args| Ok(args[#i].clone()) });
             }
+        } else if self.ret == "anymap" {
+            if let Some(i) = self.args.iter().position(|t| t == "anymap") {
+                // infer as the type of "anymap" argument
+                return Ok(quote! { |args| Ok(args[#i].clone()) });
+            }
         } else {
             // the return type is fixed
             let ty = data_type(&self.ret);
@@ -122,13 +129,17 @@ impl FunctionAttr {
         }
         Err(Error::new(
             Span::call_site(),
-            "type inference function is required",
+            "type inference function cannot be automatically derived. You should provide: `type_infer = \"|args| Ok(...)\"`",
         ))
     }
 
-    /// Generate a descriptor of the scalar or table function.
+    /// Generate a descriptor (`FuncSign`) of the scalar or table function.
     ///
     /// The types of arguments and return value should not contain wildcard.
+    ///
+    /// # Arguments
+    /// `build_fn`: whether the user provided a function is a build function.
+    /// (from the `#[build_function]` macro)
     pub fn generate_function_descriptor(
         &self,
         user_fn: &UserFunctionAttr,
@@ -156,6 +167,7 @@ impl FunctionAttr {
         } else if self.rewritten {
             quote! { |_, _| Err(ExprError::UnsupportedFunction(#name.into())) }
         } else {
+            // This is the core logic for `#[function]`
             self.generate_build_scalar_function(user_fn, true)?
         };
         let type_infer_fn = self.generate_type_infer_fn()?;
@@ -1302,6 +1314,7 @@ fn sig_data_type(ty: &str) -> TokenStream2 {
     match ty {
         "any" => quote! { SigDataType::Any },
         "anyarray" => quote! { SigDataType::AnyArray },
+        "anymap" => quote! { SigDataType::AnyMap },
         "struct" => quote! { SigDataType::AnyStruct },
         _ if ty.starts_with("struct") && ty.contains("any") => quote! { SigDataType::AnyStruct },
         _ => {
@@ -1320,6 +1333,12 @@ fn data_type(ty: &str) -> TokenStream2 {
         return quote! { DataType::Struct(#ty.parse().expect("invalid struct type")) };
     }
     let variant = format_ident!("{}", types::data_type(ty));
+    // TODO: enable the check
+    // assert!(
+    //     !matches!(ty, "any" | "anyarray" | "anymap" | "struct"),
+    //     "{ty}, {variant}"
+    // );
+
     quote! { DataType::#variant }
 }
 
diff --git a/src/expr/macro/src/lib.rs b/src/expr/macro/src/lib.rs
index 3a905165c2ee2..8fd03e344db89 100644
--- a/src/expr/macro/src/lib.rs
+++ b/src/expr/macro/src/lib.rs
@@ -30,7 +30,7 @@ mod utils;
 
 /// Defining the RisingWave SQL function from a Rust function.
 ///
-/// [Online version of this doc.](https://risingwavelabs.github.io/risingwave/risingwave_expr_macro/attr.function.html)
+/// [Online version of this doc.](https://risingwavelabs.github.io/risingwave/rustdoc/risingwave_expr_macro/attr.function.html)
 ///
 /// # Table of Contents
 ///
@@ -70,8 +70,8 @@ mod utils;
 /// name ( [arg_types],* [...] ) [ -> [setof] return_type ]
 /// ```
 ///
-/// Where `name` is the function name in `snake_case`, which must match the function name defined
-/// in `prost`.
+/// Where `name` is the function name in `snake_case`, which must match the function name (in `UPPER_CASE`) defined
+/// in `proto/expr.proto`.
 ///
 /// `arg_types` is a comma-separated list of argument types. The allowed data types are listed in
 /// in the `name` column of the appendix's [type matrix]. Wildcards or `auto` can also be used, as
@@ -98,7 +98,7 @@ mod utils;
 /// }
 /// ```
 ///
-/// ## Type Expansion
+/// ## Type Expansion with `*`
 ///
 /// Types can be automatically expanded to multiple types using wildcards. Here are some examples:
 ///
@@ -115,13 +115,17 @@ mod utils;
 /// #[function("cast(varchar) -> int64")]
 /// ```
 ///
-/// Please note the difference between `*` and `any`. `*` will generate a function for each type,
+/// Please note the difference between `*` and `any`: `*` will generate a function for each type,
 /// whereas `any` will only generate one function with a dynamic data type `Scalar`.
+/// This is similar to `impl T` and `dyn T` in Rust. The performance of using `*` would be much better than `any`.
+/// But we do not always prefer `*` due to better performance. In some cases, using `any` is more convenient.
+/// For example, in array functions, the element type of `ListValue` is `Scalar(Ref)Impl`.
+/// It is unnecessary to convert it from/into various `T`.
 ///
-/// ## Automatic Type Inference
+/// ## Automatic Type Inference with `auto`
 ///
 /// Correspondingly, the return type can be denoted as `auto` to be automatically inferred based on
-/// the input types. It will be inferred as the smallest type that can accommodate all input types.
+/// the input types. It will be inferred as the _smallest type_ that can accommodate all input types.
 ///
 /// For example, `#[function("add(*int, *int) -> auto")]` will be expanded to:
 ///
@@ -142,10 +146,10 @@ mod utils;
 /// #[function("neg(int64) -> int64")]
 /// ```
 ///
-/// ## Custom Type Inference Function
+/// ## Custom Type Inference Function with `type_infer`
 ///
 /// A few functions might have a return type that dynamically changes based on the input argument
-/// types, such as `unnest`.
+/// types, such as `unnest`. This is mainly for composite types like `anyarray`, `struct`, and `anymap`.
 ///
 /// In such cases, the `type_infer` option can be used to specify a function to infer the return
 /// type based on the input argument types. Its function signature is
@@ -163,7 +167,7 @@ mod utils;
 /// )]
 /// ```
 ///
-/// This type inference function will be invoked at the frontend.
+/// This type inference function will be invoked at the frontend (`infer_type_with_sigmap`).
 ///
 /// # Rust Function Signature
 ///
@@ -182,8 +186,9 @@ mod utils;
 ///
 /// ## Nullable Arguments
 ///
-/// The functions above will only be called when all arguments are not null. If null arguments need
-/// to be considered, the `Option` type can be used:
+/// The functions above will only be called when all arguments are not null.
+/// It will return null if any argument is null.
+/// If null arguments need to be considered, the `Option` type can be used:
 ///
 /// ```ignore
 /// #[function("trim_array(anyarray, int32) -> anyarray")]
@@ -192,11 +197,11 @@ mod utils;
 ///
 /// This function will be called when `n` is null, but not when `array` is null.
 ///
-/// ## Return Value
+/// ## Return `NULL`s and Errors
 ///
 /// Similarly, the return value type can be one of the following:
 ///
-/// - `T`: Indicates that a non-null value is always returned, and errors will not occur.
+/// - `T`: Indicates that a non-null value is always returned (for non-null inputs), and errors will not occur.
 /// - `Option<T>`: Indicates that a null value may be returned, but errors will not occur.
 /// - `Result<T>`: Indicates that an error may occur, but a null value will not be returned.
 /// - `Result<Option<T>>`: Indicates that a null value may be returned, and an error may also occur.
@@ -419,6 +424,16 @@ pub fn function(attr: TokenStream, item: TokenStream) -> TokenStream {
     }
 }
 
+/// Different from `#[function]`, which implements the `Expression` trait for a rust scalar function,
+/// `#[build_function]` is used when you already implemented `Expression` manually.
+///
+/// The expected input is a "build" function:
+/// ```ignore
+/// fn(data_type: DataType, children: Vec<BoxedExpression>) -> Result<BoxedExpression>
+/// ```
+///
+/// It generates the function descriptor using the "build" function and
+/// registers the description to the `FUNC_SIG_MAP`.
 #[proc_macro_attribute]
 pub fn build_function(attr: TokenStream, item: TokenStream) -> TokenStream {
     fn inner(attr: TokenStream, item: TokenStream) -> Result<TokenStream2> {
diff --git a/src/expr/macro/src/types.rs b/src/expr/macro/src/types.rs
index f2219a1c34bd6..4f07162d038a0 100644
--- a/src/expr/macro/src/types.rs
+++ b/src/expr/macro/src/types.rs
@@ -35,6 +35,7 @@ const TYPE_MATRIX: &str = "
     jsonb       Jsonb       JsonbArray          JsonbVal        JsonbRef<'_>        _
     anyarray    List        ListArray           ListValue       ListRef<'_>         _
     struct      Struct      StructArray         StructValue     StructRef<'_>       _
+    anymap      Map         MapArray            MapValue        MapRef<'_>          _
     any         ???         ArrayImpl           ScalarImpl      ScalarRefImpl<'_>   _
 ";
 
@@ -81,7 +82,7 @@ fn lookup_matrix(mut ty: &str, idx: usize) -> &str {
             None
         }
     });
-    s.unwrap_or_else(|| panic!("unknown type: {}", ty))
+    s.unwrap_or_else(|| panic!("failed to lookup type matrix: unknown type: {}", ty))
 }
 
 /// Expands a type wildcard string into a list of concrete types.
diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml
index 89d29e076a38f..3a95eab660b09 100644
--- a/src/frontend/Cargo.toml
+++ b/src/frontend/Cargo.toml
@@ -93,7 +93,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "signal",
     "fs",
 ] }
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tonic = { workspace = true }
 tracing = "0.1"
 uuid = "1"
diff --git a/src/frontend/planner_test/src/lib.rs b/src/frontend/planner_test/src/lib.rs
index abb291cc37d7e..675b99ad0e145 100644
--- a/src/frontend/planner_test/src/lib.rs
+++ b/src/frontend/planner_test/src/lib.rs
@@ -319,7 +319,7 @@ impl TestCase {
         let object_to_create = if is_table { "TABLE" } else { "SOURCE" };
         format!(
             r#"CREATE {} {}
-    WITH (connector = 'kafka', kafka.topic = 'abc', kafka.servers = 'localhost:1001')
+    WITH (connector = 'kafka', kafka.topic = 'abc', kafka.brokers = 'localhost:1001')
     FORMAT {} ENCODE {} (message = '.test.TestRecord', schema.location = 'file://"#,
             object_to_create, connector_name, connector_format, connector_encode
         )
diff --git a/src/frontend/planner_test/tests/testdata/input/agg.yaml b/src/frontend/planner_test/tests/testdata/input/agg.yaml
index 25f62054f1e66..f00c9f2b4065a 100644
--- a/src/frontend/planner_test/tests/testdata/input/agg.yaml
+++ b/src/frontend/planner_test/tests/testdata/input/agg.yaml
@@ -1053,6 +1053,13 @@
   sql: |
     CREATE TABLE t (v1 int, v2 int);
     SELECT sum(v1) as s1, approx_percentile(0.2, 0.01) WITHIN GROUP (order by v1 desc) from t;
+  expected_outputs:
+    - logical_plan
+    - stream_plan
+- name: test simple approx_percentile with different approx_percentile interleaved with stateless + stateful simple aggs
+  sql: |
+    CREATE TABLE t (v1 int, v2 int);
+    SELECT sum(v1) as s1, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v1) as x, count(*), max(v2) as m2, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v2) as y from t;
   expected_outputs:
     - logical_plan
     - stream_plan
\ No newline at end of file
diff --git a/src/frontend/planner_test/tests/testdata/output/agg.yaml b/src/frontend/planner_test/tests/testdata/output/agg.yaml
index b6c692fd47364..f6d1af67b331e 100644
--- a/src/frontend/planner_test/tests/testdata/output/agg.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/agg.yaml
@@ -2040,3 +2040,33 @@
             └─StreamShare { id: 2 }
               └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t._row_id] }
                 └─StreamTableScan { table: t, columns: [t.v1, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
+- name: test simple approx_percentile with different approx_percentile interleaved with stateless + stateful simple aggs
+  sql: |
+    CREATE TABLE t (v1 int, v2 int);
+    SELECT sum(v1) as s1, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v1) as x, count(*), max(v2) as m2, approx_percentile(0.5, 0.01) WITHIN GROUP (order by v2) as y from t;
+  logical_plan: |-
+    LogicalProject { exprs: [sum(t.v1), approx_percentile($expr1), count, max(t.v2), approx_percentile($expr2)] }
+    └─LogicalAgg { aggs: [sum(t.v1), approx_percentile($expr1), count, max(t.v2), approx_percentile($expr2)] }
+      └─LogicalProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2] }
+        └─LogicalScan { table: t, columns: [t.v1, t.v2, t._row_id] }
+  stream_plan: |-
+    StreamMaterialize { columns: [s1, x, count, m2, y], stream_key: [], pk_columns: [], pk_conflict: NoCheck }
+    └─StreamRowMerge { output: [sum(sum(t.v1)):Int64, approx_percentile:Float64, sum0(count):Int64, max(max(t.v2)):Int32, approx_percentile:Float64] }
+      ├─StreamRowMerge { output: [approx_percentile:Float64, approx_percentile:Float64] }
+      │ ├─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 }
+      │ │ └─StreamExchange { dist: Single }
+      │ │   └─StreamLocalApproxPercentile { percentile_col: $expr1, quantile: 0.5:Float64, relative_error: 0.01:Float64 }
+      │ │     └─StreamShare { id: 2 }
+      │ │       └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] }
+      │ │         └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
+      │ └─StreamGlobalApproxPercentile { quantile: 0.5:Float64, relative_error: 0.01:Float64 }
+      │   └─StreamExchange { dist: Single }
+      │     └─StreamLocalApproxPercentile { percentile_col: $expr2, quantile: 0.5:Float64, relative_error: 0.01:Float64 }
+      │       └─StreamShare { id: 2 }
+      │         └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr1, t.v2, t.v2::Float64 as $expr2, t._row_id] }
+      │           └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
+      └─StreamSimpleAgg { aggs: [sum(sum(t.v1)), sum0(count), max(max(t.v2)), count] }
+        └─StreamExchange { dist: Single }
+          └─StreamHashAgg { group_key: [$expr5], aggs: [sum(t.v1), count, max(t.v2)] }
+            └─StreamProject { exprs: [t.v1, t.v1::Float64 as $expr3, t.v2, t.v2::Float64 as $expr4, t._row_id, Vnode(t._row_id) as $expr5] }
+              └─StreamTableScan { table: t, columns: [t.v1, t.v2, t._row_id], stream_scan_type: ArrangementBackfill, stream_key: [t._row_id], pk: [_row_id], dist: UpstreamHashShard(t._row_id) }
diff --git a/src/frontend/planner_test/tests/testdata/output/insert.yaml b/src/frontend/planner_test/tests/testdata/output/insert.yaml
index 8f51f07d458e7..26dcfca24eaee 100644
--- a/src/frontend/planner_test/tests/testdata/output/insert.yaml
+++ b/src/frontend/planner_test/tests/testdata/output/insert.yaml
@@ -80,7 +80,11 @@
   sql: |
     create table t (v1 real, v2 int);
     insert into t values (22.33, true);
-  binder_error: 'Bind error: cannot cast type "boolean" to "integer" in Assign context'
+  binder_error: |
+    failed to cast the 2nd column
+
+    Caused by:
+      cannot cast type "boolean" to "integer" in Assign context
 - name: simple insert
   sql: |
     create table t (v1 int, v2 int);
@@ -175,7 +179,11 @@
   sql: |
     create table t (v1 timestamp, v2 real);
     insert into t select time '01:02:03', 4.5 from t;
-  binder_error: 'Bind error: cannot cast type "time without time zone" to "timestamp without time zone" in Assign context'
+  binder_error: |
+    failed to cast the 1st column
+
+    Caused by:
+      cannot cast type "time without time zone" to "timestamp without time zone" in Assign context
 - name: insert into select mismatch columns length
   sql: |
     create table t (v1 int, v2 real);
diff --git a/src/frontend/src/binder/expr/function/aggregate.rs b/src/frontend/src/binder/expr/function/aggregate.rs
index 77538b799bad2..1e7b76bf7629e 100644
--- a/src/frontend/src/binder/expr/function/aggregate.rs
+++ b/src/frontend/src/binder/expr/function/aggregate.rs
@@ -53,7 +53,7 @@ impl Binder {
     ) -> Result<ExprImpl> {
         self.ensure_aggregate_allowed()?;
 
-        let distinct = f.distinct;
+        let distinct = f.arg_list.distinct;
         let filter_expr = f.filter.clone();
 
         let (direct_args, args, order_by) = if matches!(kind, agg_kinds::ordered_set!()) {
@@ -105,14 +105,14 @@ impl Binder {
 
         assert!(matches!(kind, agg_kinds::ordered_set!()));
 
-        if !f.order_by.is_empty() {
+        if !f.arg_list.order_by.is_empty() {
             return Err(ErrorCode::InvalidInputSyntax(format!(
                 "ORDER BY is not allowed for ordered-set aggregation `{}`",
                 kind
             ))
             .into());
         }
-        if f.distinct {
+        if f.arg_list.distinct {
             return Err(ErrorCode::InvalidInputSyntax(format!(
                 "DISTINCT is not allowed for ordered-set aggregation `{}`",
                 kind
@@ -128,6 +128,7 @@ impl Binder {
         })?;
 
         let mut direct_args: Vec<_> = f
+            .arg_list
             .args
             .into_iter()
             .map(|arg| self.bind_function_arg(arg))
@@ -207,19 +208,21 @@ impl Binder {
         }
 
         let args: Vec<_> = f
+            .arg_list
             .args
             .iter()
             .map(|arg| self.bind_function_arg(arg.clone()))
             .flatten_ok()
             .try_collect()?;
         let order_by = OrderBy::new(
-            f.order_by
+            f.arg_list
+                .order_by
                 .into_iter()
                 .map(|e| self.bind_order_by_expr(e))
                 .try_collect()?,
         );
 
-        if f.distinct {
+        if f.arg_list.distinct {
             if matches!(
                 kind,
                 AggKind::Builtin(PbAggKind::ApproxCountDistinct)
diff --git a/src/frontend/src/binder/expr/function/builtin_scalar.rs b/src/frontend/src/binder/expr/function/builtin_scalar.rs
index b38a36586d1e7..824f08cf36b73 100644
--- a/src/frontend/src/binder/expr/function/builtin_scalar.rs
+++ b/src/frontend/src/binder/expr/function/builtin_scalar.rs
@@ -129,6 +129,18 @@ impl Binder {
             )
         }
 
+        // `CURRENT_DATABASE` is the name of the database you are currently connected to.
+        // `CURRENT_CATALOG` is a synonym for `CURRENT_DATABASE`.
+        fn current_database() -> Handle {
+            guard_by_len(
+                0,
+                raw(|binder, _inputs| Ok(ExprImpl::literal_varchar(binder.db_name.clone()))),
+            )
+        }
+
+        // XXX: can we unify this with FUNC_SIG_MAP?
+        // For raw_call here, it seems unnecessary to declare it again here.
+        // For some functions, we have validation logic here. Is it still useful now?
         static HANDLES: LazyLock<HashMap<&'static str, Handle>> = LazyLock::new(|| {
             [
                 (
@@ -387,6 +399,9 @@ impl Binder {
                 ("jsonb_path_query_array", raw_call(ExprType::JsonbPathQueryArray)),
                 ("jsonb_path_query_first", raw_call(ExprType::JsonbPathQueryFirst)),
                 ("jsonb_set", raw_call(ExprType::JsonbSet)),
+                // map
+                ("map_from_entries", raw_call(ExprType::MapFromEntries)),
+                ("map_access",raw_call(ExprType::MapAccess)),
                 // Functions that return a constant value
                 ("pi", pi()),
                 // greatest and least
@@ -404,9 +419,8 @@ impl Binder {
                         Ok(ExprImpl::literal_varchar(v))
                     })),
                 ),
-                ("current_database", guard_by_len(0, raw(|binder, _inputs| {
-                    Ok(ExprImpl::literal_varchar(binder.db_name.clone()))
-                }))),
+                ("current_catalog", current_database()),
+                ("current_database", current_database()),
                 ("current_schema", guard_by_len(0, raw(|binder, _inputs| {
                     return Ok(binder
                         .first_valid_schema()
@@ -692,6 +706,7 @@ impl Binder {
             return Ok(FunctionCall::new(func, inputs)?.into());
         }
 
+        // Note: for raw_call, we only check name here. The type check is done later.
         match HANDLES.get(function_name) {
             Some(handle) => handle(self, inputs),
             None => {
diff --git a/src/frontend/src/binder/expr/function/mod.rs b/src/frontend/src/binder/expr/function/mod.rs
index 9c2b9e1c644e1..17738f4dbd5be 100644
--- a/src/frontend/src/binder/expr/function/mod.rs
+++ b/src/frontend/src/binder/expr/function/mod.rs
@@ -43,6 +43,7 @@ const SYS_FUNCTION_WITHOUT_ARGS: &[&str] = &[
     "user",
     "current_user",
     "current_role",
+    "current_catalog",
     "current_schema",
     "current_timestamp",
 ];
@@ -108,6 +109,7 @@ impl Binder {
         }
 
         let mut inputs: Vec<_> = f
+            .arg_list
             .args
             .iter()
             .map(|arg| self.bind_function_arg(arg.clone()))
@@ -135,7 +137,11 @@ impl Binder {
                 }
                 UserDefinedFunction::new(func.clone(), scalar_inputs).into()
             } else {
-                self.bind_builtin_scalar_function(&function_name, scalar_inputs, f.variadic)?
+                self.bind_builtin_scalar_function(
+                    &function_name,
+                    scalar_inputs,
+                    f.arg_list.variadic,
+                )?
             };
             return self.bind_aggregate_function(f, AggKind::WrapScalar(scalar.to_expr_proto()));
         }
@@ -180,7 +186,9 @@ impl Binder {
 
                 // The actual inline logic for sql udf
                 // Note that we will always create new udf context for each sql udf
-                let Ok(context) = UdfContext::create_udf_context(&f.args, &Arc::clone(func)) else {
+                let Ok(context) =
+                    UdfContext::create_udf_context(&f.arg_list.args, &Arc::clone(func))
+                else {
                     return Err(ErrorCode::InvalidInputSyntax(
                         "failed to create the `udf_context`, please recheck your function definition and syntax".to_string()
                     )
@@ -265,7 +273,7 @@ impl Binder {
             return self.bind_aggregate_function(f, AggKind::Builtin(kind));
         }
 
-        if f.distinct || !f.order_by.is_empty() || f.filter.is_some() {
+        if f.arg_list.distinct || !f.arg_list.order_by.is_empty() || f.filter.is_some() {
             return Err(ErrorCode::InvalidInputSyntax(format!(
                     "DISTINCT, ORDER BY or FILTER is only allowed in aggregation functions, but `{}` is not an aggregation function", function_name
                 )
@@ -303,17 +311,18 @@ impl Binder {
             return Ok(TableFunction::new(function_type, inputs)?.into());
         }
 
-        self.bind_builtin_scalar_function(function_name.as_str(), inputs, f.variadic)
+        self.bind_builtin_scalar_function(function_name.as_str(), inputs, f.arg_list.variadic)
     }
 
     fn bind_array_transform(&mut self, f: Function) -> Result<ExprImpl> {
-        let [array, lambda] = <[FunctionArg; 2]>::try_from(f.args).map_err(|args| -> RwError {
-            ErrorCode::BindError(format!(
-                "`array_transform` expect two inputs `array` and `lambda`, but {} were given",
-                args.len()
-            ))
-            .into()
-        })?;
+        let [array, lambda] =
+            <[FunctionArg; 2]>::try_from(f.arg_list.args).map_err(|args| -> RwError {
+                ErrorCode::BindError(format!(
+                    "`array_transform` expect two inputs `array` and `lambda`, but {} were given",
+                    args.len()
+                ))
+                .into()
+            })?;
 
         let bound_array = self.bind_function_arg(array)?;
         let [bound_array] = <[ExprImpl; 1]>::try_from(bound_array).map_err(|bound_array| -> RwError {
diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs
index 308f34a31d447..d11134b097e3b 100644
--- a/src/frontend/src/binder/expr/mod.rs
+++ b/src/frontend/src/binder/expr/mod.rs
@@ -14,7 +14,7 @@
 
 use itertools::Itertools;
 use risingwave_common::catalog::{ColumnDesc, ColumnId, PG_CATALOG_SCHEMA_NAME};
-use risingwave_common::types::DataType;
+use risingwave_common::types::{DataType, MapType};
 use risingwave_common::util::iter_util::zip_eq_fast;
 use risingwave_common::{bail_no_function, bail_not_implemented, not_implemented};
 use risingwave_pb::plan_common::{AdditionalColumn, ColumnDescVersion};
@@ -999,6 +999,11 @@ pub fn bind_data_type(data_type: &AstDataType) -> Result<DataType> {
                 .collect::<Result<Vec<_>>>()?,
             types.iter().map(|f| f.name.real_value()).collect_vec(),
         ),
+        AstDataType::Map(kv) => {
+            let key = bind_data_type(&kv.0)?;
+            let value = bind_data_type(&kv.1)?;
+            DataType::Map(MapType::try_from_kv(key, value)?)
+        }
         AstDataType::Custom(qualified_type_name) => {
             let idents = qualified_type_name
                 .0
diff --git a/src/frontend/src/binder/expr/value.rs b/src/frontend/src/binder/expr/value.rs
index e1fc78e884e02..5b69610f13bfe 100644
--- a/src/frontend/src/binder/expr/value.rs
+++ b/src/frontend/src/binder/expr/value.rs
@@ -212,7 +212,7 @@ impl Binder {
             .map(|e| self.bind_expr_inner(e))
             .collect::<Result<Vec<ExprImpl>>>()?;
         let data_type =
-            DataType::new_struct(exprs.iter().map(|e| e.return_type()).collect_vec(), vec![]);
+            DataType::new_unnamed_struct(exprs.iter().map(|e| e.return_type()).collect_vec());
         let expr: ExprImpl = FunctionCall::new_unchecked(ExprType::Row, exprs, data_type).into();
         Ok(expr)
     }
diff --git a/src/frontend/src/binder/insert.rs b/src/frontend/src/binder/insert.rs
index e0b5ce422e75a..505acec2265b7 100644
--- a/src/frontend/src/binder/insert.rs
+++ b/src/frontend/src/binder/insert.rs
@@ -14,6 +14,7 @@
 
 use std::collections::{BTreeMap, HashMap, HashSet};
 
+use anyhow::Context;
 use itertools::Itertools;
 use risingwave_common::catalog::{ColumnCatalog, Schema, TableVersionId};
 use risingwave_common::types::DataType;
@@ -27,6 +28,7 @@ use crate::catalog::TableId;
 use crate::error::{ErrorCode, Result, RwError};
 use crate::expr::{ExprImpl, InputRef};
 use crate::user::UserId;
+use crate::utils::ordinal;
 
 #[derive(Debug, Clone)]
 pub struct BoundInsert {
@@ -197,7 +199,7 @@ impl Binder {
         let bound_query;
         let cast_exprs;
 
-        let bounded_column_nums = match source.as_simple_values() {
+        let bound_column_nums = match source.as_simple_values() {
             None => {
                 bound_query = self.bind_query(source)?;
                 let actual_types = bound_query.data_types();
@@ -234,7 +236,7 @@ impl Binder {
             cols_to_insert_in_table.len()
         };
 
-        let (err_msg, default_column_indices) = match num_target_cols.cmp(&bounded_column_nums) {
+        let (err_msg, default_column_indices) = match num_target_cols.cmp(&bound_column_nums) {
             std::cmp::Ordering::Equal => (None, default_column_indices),
             std::cmp::Ordering::Greater => {
                 if has_user_specified_columns {
@@ -248,7 +250,7 @@ impl Binder {
                     //      insert into t values (7)
                     // this kind of usage is fine, null values will be provided
                     // implicitly.
-                    (None, col_indices_to_insert.split_off(bounded_column_nums))
+                    (None, col_indices_to_insert.split_off(bound_column_nums))
                 }
             }
             std::cmp::Ordering::Less => {
@@ -312,10 +314,22 @@ impl Binder {
         let msg = match expected_types.len().cmp(&exprs.len()) {
             std::cmp::Ordering::Less => "INSERT has more expressions than target columns",
             _ => {
+                let expr_len = exprs.len();
                 return exprs
                     .into_iter()
                     .zip_eq_fast(expected_types.iter().take(expr_num))
-                    .map(|(e, t)| e.cast_assign(t.clone()).map_err(Into::into))
+                    .enumerate()
+                    .map(|(i, (e, t))| {
+                        let res = e.cast_assign(t.clone());
+                        if expr_len > 1 {
+                            res.with_context(|| {
+                                format!("failed to cast the {} column", ordinal(i + 1))
+                            })
+                            .map_err(Into::into)
+                        } else {
+                            res.map_err(Into::into)
+                        }
+                    })
                     .try_collect();
             }
         };
diff --git a/src/frontend/src/binder/mod.rs b/src/frontend/src/binder/mod.rs
index 7cd9032890091..1b745b9a67c5f 100644
--- a/src/frontend/src/binder/mod.rs
+++ b/src/frontend/src/binder/mod.rs
@@ -260,8 +260,8 @@ impl UdfContext {
     }
 }
 
-/// `ParameterTypes` is used to record the types of the parameters during binding. It works
-/// following the rules:
+/// `ParameterTypes` is used to record the types of the parameters during binding prepared stataments.
+/// It works by following the rules:
 /// 1. At the beginning, it contains the user specified parameters type.
 /// 2. When the binder encounters a parameter, it will record it as unknown(call `record_new_param`)
 ///    if it didn't exist in `ParameterTypes`.
@@ -790,30 +790,32 @@ mod tests {
                                                     },
                                                 ],
                                             ),
-                                            args: [
-                                                Unnamed(
-                                                    Expr(
-                                                        Value(
-                                                            Number(
-                                                                "0.5",
+                                            arg_list: FunctionArgList {
+                                                distinct: false,
+                                                args: [
+                                                    Unnamed(
+                                                        Expr(
+                                                            Value(
+                                                                Number(
+                                                                    "0.5",
+                                                                ),
                                                             ),
                                                         ),
                                                     ),
-                                                ),
-                                                Unnamed(
-                                                    Expr(
-                                                        Value(
-                                                            Number(
-                                                                "0.01",
+                                                    Unnamed(
+                                                        Expr(
+                                                            Value(
+                                                                Number(
+                                                                    "0.01",
+                                                                ),
                                                             ),
                                                         ),
                                                     ),
-                                                ),
-                                            ],
-                                            variadic: false,
+                                                ],
+                                                variadic: false,
+                                                order_by: [],
+                                            },
                                             over: None,
-                                            distinct: false,
-                                            order_by: [],
                                             filter: None,
                                             within_group: Some(
                                                 OrderByExpr {
diff --git a/src/frontend/src/binder/relation/table_function.rs b/src/frontend/src/binder/relation/table_function.rs
index 22b9c2a344c2c..cc672703cda35 100644
--- a/src/frontend/src/binder/relation/table_function.rs
+++ b/src/frontend/src/binder/relation/table_function.rs
@@ -18,7 +18,7 @@ use itertools::Itertools;
 use risingwave_common::bail_not_implemented;
 use risingwave_common::catalog::{Field, Schema, RW_INTERNAL_TABLE_FUNCTION_NAME};
 use risingwave_common::types::DataType;
-use risingwave_sqlparser::ast::{Function, FunctionArg, ObjectName, TableAlias};
+use risingwave_sqlparser::ast::{Function, FunctionArg, FunctionArgList, ObjectName, TableAlias};
 
 use super::watermark::is_watermark_func;
 use super::{Binder, Relation, Result, WindowTableFunctionKind};
@@ -85,11 +85,8 @@ impl Binder {
         let func = self.bind_function(Function {
             scalar_as_agg: false,
             name,
-            args,
-            variadic: false,
+            arg_list: FunctionArgList::args_only(args),
             over: None,
-            distinct: false,
-            order_by: vec![],
             filter: None,
             within_group: None,
         });
diff --git a/src/frontend/src/binder/select.rs b/src/frontend/src/binder/select.rs
index e15a9eac73246..e6eead2bc08d8 100644
--- a/src/frontend/src/binder/select.rs
+++ b/src/frontend/src/binder/select.rs
@@ -714,8 +714,8 @@ fn data_type_to_alias(data_type: &AstDataType) -> Option<String> {
         AstDataType::Jsonb => "jsonb".to_string(),
         AstDataType::Array(ty) => return data_type_to_alias(ty),
         AstDataType::Custom(ty) => format!("{}", ty),
-        AstDataType::Struct(_) => {
-            // Note: Postgres doesn't have anonymous structs
+        AstDataType::Struct(_) | AstDataType::Map(_) => {
+            // It doesn't bother to derive aliases for these types.
             return None;
         }
     };
diff --git a/src/frontend/src/catalog/source_catalog.rs b/src/frontend/src/catalog/source_catalog.rs
index 8e64a6db4e2b9..f453a89204501 100644
--- a/src/frontend/src/catalog/source_catalog.rs
+++ b/src/frontend/src/catalog/source_catalog.rs
@@ -114,12 +114,9 @@ impl From<&PbSource> for SourceCatalog {
         let owner = prost.owner;
         let watermark_descs = prost.get_watermark_descs().clone();
 
-        let associated_table_id = prost
-            .optional_associated_table_id
-            .clone()
-            .map(|id| match id {
-                OptionalAssociatedTableId::AssociatedTableId(id) => id,
-            });
+        let associated_table_id = prost.optional_associated_table_id.map(|id| match id {
+            OptionalAssociatedTableId::AssociatedTableId(id) => id,
+        });
         let version = prost.version;
 
         let connection_id = prost.connection_id;
diff --git a/src/frontend/src/catalog/system_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/mod.rs
index 5e32dcb7b2aba..432266d1871a9 100644
--- a/src/frontend/src/catalog/system_catalog/mod.rs
+++ b/src/frontend/src/catalog/system_catalog/mod.rs
@@ -240,7 +240,7 @@ fn get_acl_items(
 ) -> String {
     let mut res = String::from("{");
     let mut empty_flag = true;
-    let super_privilege = available_prost_privilege(object.clone(), for_dml_table);
+    let super_privilege = available_prost_privilege(*object, for_dml_table);
     for user in users {
         let privileges = if user.is_super {
             vec![&super_privilege]
diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_indexes.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_indexes.rs
index a602e71804777..05741c7b89ef6 100644
--- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_indexes.rs
+++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_indexes.rs
@@ -29,6 +29,15 @@ use risingwave_frontend_macro::system_catalog;
         JOIN rw_catalog.rw_tables t ON i.primary_table_id = t.id
         JOIN rw_catalog.rw_schemas s ON i.schema_id = s.id
     UNION ALL
+    SELECT s.name AS schemaname,
+            t.name AS tablename,
+            i.name AS indexname,
+            NULL AS tablespace,
+            i.definition AS indexdef
+        FROM rw_catalog.rw_indexes i
+        JOIN rw_catalog.rw_materialized_views t ON i.primary_table_id = t.id
+        JOIN rw_catalog.rw_schemas s ON i.schema_id = s.id
+    UNION ALL
     SELECT s.name AS schemaname,
             t.name AS tablename,
             concat(t.name, '_pkey') AS indexname,
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
index 0c45c2f37ad5e..07a60e4375727 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version.rs
@@ -101,7 +101,7 @@ fn version_to_compaction_group_rows(version: &HummockVersion) -> Vec<RwHummockVe
         .values()
         .map(|cg| RwHummockVersion {
             version_id: version.id.to_u64() as _,
-            max_committed_epoch: version.max_committed_epoch as _,
+            max_committed_epoch: version.visible_table_committed_epoch() as _,
             safe_epoch: version.visible_table_safe_epoch() as _,
             compaction_group: json!(cg.to_protobuf()).into(),
         })
diff --git a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version_deltas.rs b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version_deltas.rs
index ed43ca521352f..271bce2e5bbb4 100644
--- a/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version_deltas.rs
+++ b/src/frontend/src/catalog/system_catalog/rw_catalog/rw_hummock_version_deltas.rs
@@ -41,7 +41,7 @@ async fn read(reader: &SysCatalogReaderImpl) -> Result<Vec<RwHummockVersionDelta
         .map(|d| RwHummockVersionDelta {
             id: d.id.to_u64() as _,
             prev_id: d.prev_id.to_u64() as _,
-            max_committed_epoch: d.max_committed_epoch as _,
+            max_committed_epoch: d.visible_table_committed_epoch() as _,
             safe_epoch: d.visible_table_safe_epoch() as _,
             trivial_move: d.trivial_move,
             group_deltas: json!(d
diff --git a/src/frontend/src/expr/literal.rs b/src/frontend/src/expr/literal.rs
index d44a1b859d289..ed76d1a9bf998 100644
--- a/src/frontend/src/expr/literal.rs
+++ b/src/frontend/src/expr/literal.rs
@@ -54,7 +54,8 @@ impl std::fmt::Debug for Literal {
                     | DataType::Interval
                     | DataType::Jsonb
                     | DataType::Int256
-                    | DataType::Struct(_) => write!(
+                    | DataType::Struct(_)
+                    | DataType::Map(_) => write!(
                         f,
                         "'{}'",
                         v.as_scalar_ref_impl().to_text_with_type(&data_type)
@@ -69,7 +70,12 @@ impl std::fmt::Debug for Literal {
 
 impl Literal {
     pub fn new(data: Datum, data_type: DataType) -> Self {
-        assert!(literal_type_match(&data_type, data.as_ref()));
+        assert!(
+            literal_type_match(&data_type, data.as_ref()),
+            "data_type: {:?}, data: {:?}",
+            data_type,
+            data
+        );
         Literal {
             data,
             data_type: Some(data_type),
diff --git a/src/frontend/src/expr/mod.rs b/src/frontend/src/expr/mod.rs
index d42317b00f10b..73becd7bc86c8 100644
--- a/src/frontend/src/expr/mod.rs
+++ b/src/frontend/src/expr/mod.rs
@@ -68,7 +68,7 @@ pub use subquery::{Subquery, SubqueryKind};
 pub use table_function::{TableFunction, TableFunctionType};
 pub use type_inference::{
     align_types, cast_map_array, cast_ok, cast_sigs, infer_some_all, infer_type, infer_type_name,
-    infer_type_with_sigmap, least_restrictive, CastContext, CastSig, FuncSign,
+    infer_type_with_sigmap, CastContext, CastSig, FuncSign,
 };
 pub use user_defined_function::UserDefinedFunction;
 pub use utils::*;
diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs
index dd2f353a34b0d..59f087672417c 100644
--- a/src/frontend/src/expr/pure.rs
+++ b/src/frontend/src/expr/pure.rs
@@ -249,7 +249,9 @@ impl ExprVisitor for ImpureAnalyzer {
             | Type::InetNtoa
             | Type::InetAton
             | Type::QuoteLiteral
-            | Type::QuoteNullable =>
+            | Type::QuoteNullable
+            | Type::MapFromEntries
+            | Type::MapAccess =>
             // expression output is deterministic(same result for the same input)
             {
                 func_call
diff --git a/src/frontend/src/expr/type_inference/cast.rs b/src/frontend/src/expr/type_inference/cast.rs
index 1f1a96e92b826..51441c3f70c5b 100644
--- a/src/frontend/src/expr/type_inference/cast.rs
+++ b/src/frontend/src/expr/type_inference/cast.rs
@@ -29,7 +29,10 @@ use crate::expr::{Expr as _, ExprImpl, InputRef, Literal};
 ///
 /// If you also need to cast them to this type, and there are more than 2 exprs, check out
 /// [`align_types`].
-pub fn least_restrictive(lhs: DataType, rhs: DataType) -> std::result::Result<DataType, ErrorCode> {
+///
+/// Note: be careful that literal strings are considered untyped.
+/// e.g., `align_types(1, '1')` will be `Int32`, but `least_restrictive(Int32, Varchar)` will return error.
+fn least_restrictive(lhs: DataType, rhs: DataType) -> std::result::Result<DataType, ErrorCode> {
     if lhs == rhs {
         Ok(lhs)
     } else if cast_ok(&lhs, &rhs, CastContext::Implicit) {
@@ -81,6 +84,7 @@ pub fn align_array_and_element(
     element_indices: &[usize],
     inputs: &mut [ExprImpl],
 ) -> std::result::Result<DataType, ErrorCode> {
+    tracing::trace!(?inputs, "align_array_and_element begin");
     let mut dummy_element = match inputs[array_idx].is_untyped() {
         // when array is unknown type, make an unknown typed value (e.g. null)
         true => ExprImpl::from(Literal::new_untyped(None)),
@@ -106,7 +110,7 @@ pub fn align_array_and_element(
 
     // elements are already casted by `align_types`, we cast the array argument here
     inputs[array_idx].cast_implicit_mut(array_type.clone())?;
-
+    tracing::trace!(?inputs, "align_array_and_element done");
     Ok(array_type)
 }
 
@@ -114,6 +118,7 @@ pub fn align_array_and_element(
 pub fn cast_ok(source: &DataType, target: &DataType, allows: CastContext) -> bool {
     cast_ok_struct(source, target, allows)
         || cast_ok_array(source, target, allows)
+        || cast_ok_map(source, target, allows)
         || cast_ok_base(source, target, allows)
 }
 
@@ -161,6 +166,17 @@ fn cast_ok_array(source: &DataType, target: &DataType, allows: CastContext) -> b
     }
 }
 
+fn cast_ok_map(source: &DataType, target: &DataType, allows: CastContext) -> bool {
+    match (source, target) {
+        (DataType::Map(source_elem), DataType::Map(target_elem)) => cast_ok(
+            &source_elem.clone().into_list(),
+            &target_elem.clone().into_list(),
+            allows,
+        ),
+        _ => false,
+    }
+}
+
 pub fn cast_map_array() -> Vec<(DataTypeName, DataTypeName, CastContext)> {
     CAST_MAP
         .iter()
diff --git a/src/frontend/src/expr/type_inference/func.rs b/src/frontend/src/expr/type_inference/func.rs
index 5b6a12ba58cce..746460e2b6363 100644
--- a/src/frontend/src/expr/type_inference/func.rs
+++ b/src/frontend/src/expr/type_inference/func.rs
@@ -31,6 +31,7 @@ use crate::expr::{cast_ok, is_row_function, Expr as _, ExprImpl, ExprType, Funct
 /// is not supported on backend.
 ///
 /// It also mutates the `inputs` by adding necessary casts.
+#[tracing::instrument(level = "trace", skip(sig_map))]
 pub fn infer_type_with_sigmap(
     func_name: FuncName,
     inputs: &mut [ExprImpl],
@@ -65,6 +66,7 @@ pub fn infer_type_with_sigmap(
         })
         .collect_vec();
     let sig = infer_type_name(sig_map, func_name, &actuals)?;
+    tracing::trace!(?actuals, ?sig, "infer_type_name");
 
     // add implicit casts to inputs
     for (expr, t) in inputs.iter_mut().zip_eq_fast(&sig.inputs_type) {
@@ -82,6 +84,7 @@ pub fn infer_type_with_sigmap(
 
     let input_types = inputs.iter().map(|expr| expr.return_type()).collect_vec();
     let return_type = (sig.type_infer)(&input_types)?;
+    tracing::trace!(?input_types, ?return_type, "finished type inference");
     Ok(return_type)
 }
 
@@ -608,6 +611,21 @@ fn infer_type_for_special(
                 _ => Ok(None),
             }
         }
+        ExprType::MapAccess => {
+            ensure_arity!("map_access", | inputs | == 2);
+            let map_type = inputs[0].return_type().into_map();
+            // We do not align the map's key type with the input type here, but cast the latter to the former instead.
+            // e.g., for {1:'a'}[1.0], if we align them, we will get "numeric" as the key type, which violates the map type's restriction.
+            match inputs[1].cast_implicit_mut(map_type.key().clone()) {
+                Ok(()) => Ok(Some(map_type.value().clone())),
+                Err(_) => Err(ErrorCode::BindError(format!(
+                    "Cannot access {} in {}",
+                    inputs[1].return_type(),
+                    inputs[0].return_type(),
+                ))
+                .into()),
+            }
+        }
         ExprType::Vnode => {
             ensure_arity!("vnode", 1 <= | inputs |);
             Ok(Some(VirtualNode::RW_TYPE))
@@ -730,6 +748,8 @@ pub fn infer_type_name<'a>(
     };
 
     if candidates.is_empty() {
+        // TODO: when type mismatches, show what are supported signatures for the
+        // function with the given name.
         bail_no_function!("{}", sig());
     }
 
diff --git a/src/frontend/src/expr/type_inference/mod.rs b/src/frontend/src/expr/type_inference/mod.rs
index 5f191a898614c..2845f05ec0dae 100644
--- a/src/frontend/src/expr/type_inference/mod.rs
+++ b/src/frontend/src/expr/type_inference/mod.rs
@@ -18,7 +18,6 @@
 mod cast;
 mod func;
 pub use cast::{
-    align_types, cast_map_array, cast_ok, cast_ok_base, cast_sigs, least_restrictive, CastContext,
-    CastSig,
+    align_types, cast_map_array, cast_ok, cast_ok_base, cast_sigs, CastContext, CastSig,
 };
 pub use func::{infer_some_all, infer_type, infer_type_name, infer_type_with_sigmap, FuncSign};
diff --git a/src/frontend/src/handler/alter_streaming_rate_limit.rs b/src/frontend/src/handler/alter_streaming_rate_limit.rs
index 2dff4338a1de3..e916d8ed8b87a 100644
--- a/src/frontend/src/handler/alter_streaming_rate_limit.rs
+++ b/src/frontend/src/handler/alter_streaming_rate_limit.rs
@@ -14,7 +14,7 @@
 
 use pgwire::pg_response::{PgResponse, StatementType};
 use risingwave_common::bail;
-use risingwave_pb::meta::PbThrottleTarget;
+use risingwave_pb::meta::ThrottleTarget as PbThrottleTarget;
 use risingwave_sqlparser::ast::ObjectName;
 
 use super::{HandlerArgs, RwPgResponse};
@@ -72,6 +72,19 @@ pub async fn handle_alter_streaming_rate_limit(
             };
             (StatementType::ALTER_SOURCE, source_id)
         }
+        PbThrottleTarget::CdcTable => {
+            let reader = session.env().catalog_reader().read_guard();
+            let (table, schema_name) =
+                reader.get_any_table_by_name(db_name, schema_path, &real_table_name)?;
+            if table.table_type != TableType::Table {
+                return Err(ErrorCode::InvalidInputSyntax(format!(
+                    "\"{table_name}\" is not a table",
+                ))
+                .into());
+            }
+            session.check_privilege_for_drop_alter(schema_name, &**table)?;
+            (StatementType::ALTER_TABLE, table.id.table_id)
+        }
         _ => bail!("Unsupported throttle target: {:?}", kind),
     };
 
diff --git a/src/frontend/src/handler/create_mv.rs b/src/frontend/src/handler/create_mv.rs
index 0eabe34696150..4399d80811c19 100644
--- a/src/frontend/src/handler/create_mv.rs
+++ b/src/frontend/src/handler/create_mv.rs
@@ -38,6 +38,7 @@ use crate::planner::Planner;
 use crate::scheduler::streaming_manager::CreatingStreamingJobInfo;
 use crate::session::SessionImpl;
 use crate::stream_fragmenter::build_graph;
+use crate::utils::ordinal;
 
 pub(super) fn parse_column_names(columns: &[Ident]) -> Option<Vec<String>> {
     if columns.is_empty() {
@@ -278,20 +279,6 @@ It only indicates the physical clustering of the data, which may improve the per
     ))
 }
 
-fn ordinal(i: usize) -> String {
-    let s = i.to_string();
-    let suffix = if s.ends_with('1') && !s.ends_with("11") {
-        "st"
-    } else if s.ends_with('2') && !s.ends_with("12") {
-        "nd"
-    } else if s.ends_with('3') && !s.ends_with("13") {
-        "rd"
-    } else {
-        "th"
-    };
-    s + suffix
-}
-
 #[cfg(test)]
 pub mod tests {
     use std::collections::HashMap;
diff --git a/src/frontend/src/handler/create_sink.rs b/src/frontend/src/handler/create_sink.rs
index fdf6a65c4437c..834f92906efa2 100644
--- a/src/frontend/src/handler/create_sink.rs
+++ b/src/frontend/src/handler/create_sink.rs
@@ -882,7 +882,7 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock<HashMap<String, HashMap<Format, V
                 ),
                 KafkaSink::SINK_NAME => hashmap!(
                     Format::Plain => vec![Encode::Json, Encode::Avro, Encode::Protobuf],
-                    Format::Upsert => vec![Encode::Json, Encode::Avro],
+                    Format::Upsert => vec![Encode::Json, Encode::Avro, Encode::Protobuf],
                     Format::Debezium => vec![Encode::Json],
                 ),
                 KinesisSink::SINK_NAME => hashmap!(
@@ -942,7 +942,7 @@ pub mod tests {
         let proto_file = create_proto_file(PROTO_FILE_DATA);
         let sql = format!(
             r#"CREATE SOURCE t1
-    WITH (connector = 'kafka', kafka.topic = 'abc', kafka.servers = 'localhost:1001')
+    WITH (connector = 'kafka', kafka.topic = 'abc', kafka.brokers = 'localhost:1001')
     FORMAT PLAIN ENCODE PROTOBUF (message = '.test.TestRecord', schema.location = 'file://{}')"#,
             proto_file.path().to_str().unwrap()
         );
diff --git a/src/frontend/src/handler/mod.rs b/src/frontend/src/handler/mod.rs
index 271c41b1678c2..9cacbdcb8ce34 100644
--- a/src/frontend/src/handler/mod.rs
+++ b/src/frontend/src/handler/mod.rs
@@ -707,6 +707,18 @@ pub async fn handle(
             )
             .await
         }
+        Statement::AlterTable {
+            name,
+            operation: AlterTableOperation::SetBackfillRateLimit { rate_limit },
+        } => {
+            alter_streaming_rate_limit::handle_alter_streaming_rate_limit(
+                handler_args,
+                PbThrottleTarget::CdcTable,
+                name,
+                rate_limit,
+            )
+            .await
+        }
         Statement::AlterIndex {
             name,
             operation: AlterIndexOperation::RenameIndex { index_name },
diff --git a/src/frontend/src/handler/show.rs b/src/frontend/src/handler/show.rs
index 5be1681e0a094..6c0bc2c7e61e8 100644
--- a/src/frontend/src/handler/show.rs
+++ b/src/frontend/src/handler/show.rs
@@ -588,7 +588,7 @@ mod tests {
         let frontend = LocalFrontend::new(Default::default()).await;
 
         let sql = r#"CREATE SOURCE t1 (column1 varchar)
-        WITH (connector = 'kafka', kafka.topic = 'abc', kafka.servers = 'localhost:1001')
+        WITH (connector = 'kafka', kafka.topic = 'abc', kafka.brokers = 'localhost:1001')
         FORMAT PLAIN ENCODE JSON"#;
         frontend.run_sql(sql).await.unwrap();
 
@@ -602,7 +602,7 @@ mod tests {
         let proto_file = create_proto_file(PROTO_FILE_DATA);
         let sql = format!(
             r#"CREATE SOURCE t
-    WITH (connector = 'kafka', kafka.topic = 'abc', kafka.servers = 'localhost:1001')
+    WITH (connector = 'kafka', kafka.topic = 'abc', kafka.brokers = 'localhost:1001')
     FORMAT PLAIN ENCODE PROTOBUF (message = '.test.TestRecord', schema.location = 'file://{}')"#,
             proto_file.path().to_str().unwrap()
         );
diff --git a/src/frontend/src/observer/observer_manager.rs b/src/frontend/src/observer/observer_manager.rs
index 80f6316f9ca90..b49abcb023429 100644
--- a/src/frontend/src/observer/observer_manager.rs
+++ b/src/frontend/src/observer/observer_manager.rs
@@ -472,8 +472,7 @@ impl FrontendObserverNode {
         match info {
             Info::HummockSnapshot(hummock_snapshot) => match resp.operation() {
                 Operation::Update => {
-                    self.hummock_snapshot_manager
-                        .update(hummock_snapshot.clone());
+                    self.hummock_snapshot_manager.update(*hummock_snapshot);
                 }
                 _ => panic!("receive an unsupported notify {:?}", resp),
             },
diff --git a/src/frontend/src/optimizer/plan_expr_visitor/strong.rs b/src/frontend/src/optimizer/plan_expr_visitor/strong.rs
index 84b8c4f6eb8f3..5e9a3ce05392c 100644
--- a/src/frontend/src/optimizer/plan_expr_visitor/strong.rs
+++ b/src/frontend/src/optimizer/plan_expr_visitor/strong.rs
@@ -291,6 +291,8 @@ impl Strong {
             | ExprType::JsonbPopulateRecord
             | ExprType::JsonbToRecord
             | ExprType::JsonbSet
+            | ExprType::MapFromEntries
+            | ExprType::MapAccess
             | ExprType::Vnode
             | ExprType::TestPaidTier
             | ExprType::Proctime
diff --git a/src/frontend/src/optimizer/plan_node/logical_agg.rs b/src/frontend/src/optimizer/plan_node/logical_agg.rs
index 987a0ae204869..e63b7d760a68f 100644
--- a/src/frontend/src/optimizer/plan_node/logical_agg.rs
+++ b/src/frontend/src/optimizer/plan_node/logical_agg.rs
@@ -76,38 +76,17 @@ impl LogicalAgg {
         let mut core = self.core.clone();
 
         // ====== Handle approx percentile aggs
-        let SeparatedAggInfo { normal, approx } = self.separate_normal_and_special_agg();
-
-        let AggInfo {
-            calls: non_approx_percentile_agg_calls,
-            col_mapping: non_approx_percentile_col_mapping,
-        } = normal;
-        let AggInfo {
-            calls: approx_percentile_agg_calls,
-            col_mapping: approx_percentile_col_mapping,
-        } = approx;
-
-        let needs_row_merge = (!non_approx_percentile_agg_calls.is_empty()
-            && !approx_percentile_agg_calls.is_empty())
-            || approx_percentile_agg_calls.len() >= 2;
-        core.input = if needs_row_merge {
-            // If there's row merge, we need to share the input.
-            StreamShare::new_from_input(stream_input.clone()).into()
-        } else {
-            stream_input
-        };
-        core.agg_calls = non_approx_percentile_agg_calls;
+        let (non_approx_percentile_col_mapping, approx_percentile_col_mapping, approx_percentile) =
+            self.prepare_approx_percentile(&mut core, stream_input.clone())?;
 
-        let approx_percentile =
-            self.build_approx_percentile_aggs(core.input.clone(), &approx_percentile_agg_calls)?;
-
-        // ====== Handle normal aggs
         if core.agg_calls.is_empty() {
             if let Some(approx_percentile) = approx_percentile {
                 return Ok(approx_percentile);
             };
             bail!("expected at least one agg call");
         }
+
+        // ====== Handle normal aggs
         let total_agg_calls = core
             .agg_calls
             .iter()
@@ -123,21 +102,12 @@ impl LogicalAgg {
             new_stream_simple_agg(Agg::new(total_agg_calls, IndexSet::empty(), exchange));
 
         // ====== Merge approx percentile and normal aggs
-        if let Some(approx_percentile) = approx_percentile {
-            if needs_row_merge {
-                let row_merge = StreamRowMerge::new(
-                    approx_percentile,
-                    global_agg.into(),
-                    approx_percentile_col_mapping,
-                    non_approx_percentile_col_mapping,
-                )?;
-                Ok(row_merge.into())
-            } else {
-                Ok(approx_percentile)
-            }
-        } else {
-            Ok(global_agg.into())
-        }
+        Self::add_row_merge_if_needed(
+            approx_percentile,
+            global_agg.into(),
+            approx_percentile_col_mapping,
+            non_approx_percentile_col_mapping,
+        )
     }
 
     /// Generate plan for stateless/stateful 2-phase streaming agg.
@@ -148,10 +118,21 @@ impl LogicalAgg {
         stream_input: PlanRef,
         dist_key: &[usize],
     ) -> Result<PlanRef> {
-        let input_col_num = stream_input.schema().len();
+        let mut core = self.core.clone();
+
+        let (non_approx_percentile_col_mapping, approx_percentile_col_mapping, approx_percentile) =
+            self.prepare_approx_percentile(&mut core, stream_input.clone())?;
+
+        if core.agg_calls.is_empty() {
+            if let Some(approx_percentile) = approx_percentile {
+                return Ok(approx_percentile);
+            };
+            bail!("expected at least one agg call");
+        }
 
         // Generate vnode via project
         // TODO(kwannoel): We should apply Project optimization rules here.
+        let input_col_num = stream_input.schema().len(); // get schema len before moving `stream_input`.
         let project = StreamProject::new(generic::Project::with_vnode_col(stream_input, dist_key));
         let vnode_col_idx = project.base.schema().len() - 1;
 
@@ -160,7 +141,7 @@ impl LogicalAgg {
         local_group_key.insert(vnode_col_idx);
         let n_local_group_key = local_group_key.len();
         let local_agg = new_stream_hash_agg(
-            Agg::new(self.agg_calls().to_vec(), local_group_key, project.into()),
+            Agg::new(core.agg_calls.to_vec(), local_group_key, project.into()),
             Some(vnode_col_idx),
         );
         // Global group key excludes vnode.
@@ -173,11 +154,11 @@ impl LogicalAgg {
             .expect("some input group key could not be mapped");
 
         // Generate global agg step
-        if self.group_key().is_empty() {
+        let global_agg = if self.group_key().is_empty() {
             let exchange =
                 RequiredDist::single().enforce_if_not_satisfies(local_agg.into(), &Order::any())?;
             let global_agg = new_stream_simple_agg(Agg::new(
-                self.agg_calls()
+                core.agg_calls
                     .iter()
                     .enumerate()
                     .map(|(partial_output_idx, agg_call)| {
@@ -187,7 +168,7 @@ impl LogicalAgg {
                 global_group_key.into_iter().collect(),
                 exchange,
             ));
-            Ok(global_agg.into())
+            global_agg.into()
         } else {
             let exchange = RequiredDist::shard_by_key(input_col_num, &global_group_key)
                 .enforce_if_not_satisfies(local_agg.into(), &Order::any())?;
@@ -195,7 +176,7 @@ impl LogicalAgg {
             // we can just follow it.
             let global_agg = new_stream_hash_agg(
                 Agg::new(
-                    self.agg_calls()
+                    core.agg_calls
                         .iter()
                         .enumerate()
                         .map(|(partial_output_idx, agg_call)| {
@@ -208,8 +189,14 @@ impl LogicalAgg {
                 ),
                 None,
             );
-            Ok(global_agg.into())
-        }
+            global_agg.into()
+        };
+        Self::add_row_merge_if_needed(
+            approx_percentile,
+            global_agg,
+            approx_percentile_col_mapping,
+            non_approx_percentile_col_mapping,
+        )
     }
 
     fn gen_single_plan(&self, stream_input: PlanRef) -> Result<PlanRef> {
@@ -304,6 +291,71 @@ impl LogicalAgg {
         }
     }
 
+    /// Prepares metadata and the `approx_percentile` plan, if there's one present.
+    /// It may modify `core.agg_calls` to separate normal agg and approx percentile agg,
+    /// and `core.input` to share the input via `StreamShare`,
+    /// to both approx percentile agg and normal agg.
+    fn prepare_approx_percentile(
+        &self,
+        core: &mut Agg<PlanRef>,
+        stream_input: PlanRef,
+    ) -> Result<(ColIndexMapping, ColIndexMapping, Option<PlanRef>)> {
+        let SeparatedAggInfo { normal, approx } = self.separate_normal_and_special_agg();
+
+        let AggInfo {
+            calls: non_approx_percentile_agg_calls,
+            col_mapping: non_approx_percentile_col_mapping,
+        } = normal;
+        let AggInfo {
+            calls: approx_percentile_agg_calls,
+            col_mapping: approx_percentile_col_mapping,
+        } = approx;
+        if !self.group_key().is_empty() && !approx_percentile_agg_calls.is_empty() {
+            bail_not_implemented!("two-phase approx percentile agg with group key, please use single phase agg for approx_percentile with group key");
+        }
+
+        // Either we have approx percentile aggs and non_approx percentile aggs,
+        // or we have at least 2 approx percentile aggs.
+        let needs_row_merge = (!non_approx_percentile_agg_calls.is_empty()
+            && !approx_percentile_agg_calls.is_empty())
+            || approx_percentile_agg_calls.len() >= 2;
+        core.input = if needs_row_merge {
+            // If there's row merge, we need to share the input.
+            StreamShare::new_from_input(stream_input.clone()).into()
+        } else {
+            stream_input
+        };
+        core.agg_calls = non_approx_percentile_agg_calls;
+
+        let approx_percentile =
+            self.build_approx_percentile_aggs(core.input.clone(), &approx_percentile_agg_calls)?;
+        Ok((
+            non_approx_percentile_col_mapping,
+            approx_percentile_col_mapping,
+            approx_percentile,
+        ))
+    }
+
+    /// Add `RowMerge` if needed
+    fn add_row_merge_if_needed(
+        approx_percentile: Option<PlanRef>,
+        global_agg: PlanRef,
+        approx_percentile_col_mapping: ColIndexMapping,
+        non_approx_percentile_col_mapping: ColIndexMapping,
+    ) -> Result<PlanRef> {
+        if let Some(approx_percentile) = approx_percentile {
+            let row_merge = StreamRowMerge::new(
+                approx_percentile,
+                global_agg,
+                approx_percentile_col_mapping,
+                non_approx_percentile_col_mapping,
+            )?;
+            Ok(row_merge.into())
+        } else {
+            Ok(global_agg)
+        }
+    }
+
     fn separate_normal_and_special_agg(&self) -> SeparatedAggInfo {
         let estimated_len = self.agg_calls().len() - 1;
         let mut approx_percentile_agg_calls = Vec::with_capacity(estimated_len);
diff --git a/src/frontend/src/optimizer/rule/index_selection_rule.rs b/src/frontend/src/optimizer/rule/index_selection_rule.rs
index e65b249379750..548fda7b92af4 100644
--- a/src/frontend/src/optimizer/rule/index_selection_rule.rs
+++ b/src/frontend/src/optimizer/rule/index_selection_rule.rs
@@ -746,7 +746,7 @@ impl<'a> TableScanIoEstimator<'a> {
                 .sum::<usize>()
     }
 
-    pub fn estimate_data_type_size(data_type: &DataType) -> usize {
+    fn estimate_data_type_size(data_type: &DataType) -> usize {
         use std::mem::size_of;
 
         match data_type {
@@ -769,6 +769,7 @@ impl<'a> TableScanIoEstimator<'a> {
             DataType::Jsonb => 20,
             DataType::Struct { .. } => 20,
             DataType::List { .. } => 20,
+            DataType::Map(_) => 20,
         }
     }
 
diff --git a/src/frontend/src/scheduler/distributed/stage.rs b/src/frontend/src/scheduler/distributed/stage.rs
index 543d0c0a3ae6f..e30dfa0dad377 100644
--- a/src/frontend/src/scheduler/distributed/stage.rs
+++ b/src/frontend/src/scheduler/distributed/stage.rs
@@ -196,7 +196,7 @@ impl StageExecution {
         match cur_state {
             Pending { msg_sender } => {
                 let runner = StageRunner {
-                    epoch: self.epoch.clone(),
+                    epoch: self.epoch,
                     stage: self.stage.clone(),
                     worker_node_manager: self.worker_node_manager.clone(),
                     tasks: self.tasks.clone(),
@@ -380,30 +380,57 @@ impl StageRunner {
                 ));
             }
         } else if let Some(source_info) = self.stage.source_info.as_ref() {
-            let chunk_size = (source_info.split_info().unwrap().len() as f32
+            // If there is no file in source, the `chunk_size` is set to 1.
+            let chunk_size = ((source_info.split_info().unwrap().len() as f32
                 / self.stage.parallelism.unwrap() as f32)
-                .ceil() as usize;
-            for (id, split) in source_info
-                .split_info()
-                .unwrap()
-                .chunks(chunk_size)
-                .enumerate()
-            {
+                .ceil() as usize)
+                .max(1);
+            if source_info.split_info().unwrap().is_empty() {
+                // No file in source, schedule an empty task.
+                const EMPTY_TASK_ID: u64 = 0;
                 let task_id = PbTaskId {
                     query_id: self.stage.query_id.id.clone(),
                     stage_id: self.stage.id,
-                    task_id: id as u64,
+                    task_id: EMPTY_TASK_ID,
                 };
-                let plan_fragment = self
-                    .create_plan_fragment(id as u64, Some(PartitionInfo::Source(split.to_vec())));
-                let worker =
-                    self.choose_worker(&plan_fragment, id as u32, self.stage.dml_table_id)?;
+                let plan_fragment =
+                    self.create_plan_fragment(EMPTY_TASK_ID, Some(PartitionInfo::Source(vec![])));
+                let worker = self.choose_worker(
+                    &plan_fragment,
+                    EMPTY_TASK_ID as u32,
+                    self.stage.dml_table_id,
+                )?;
                 futures.push(self.schedule_task(
                     task_id,
                     plan_fragment,
                     worker,
                     expr_context.clone(),
                 ));
+            } else {
+                for (id, split) in source_info
+                    .split_info()
+                    .unwrap()
+                    .chunks(chunk_size)
+                    .enumerate()
+                {
+                    let task_id = PbTaskId {
+                        query_id: self.stage.query_id.id.clone(),
+                        stage_id: self.stage.id,
+                        task_id: id as u64,
+                    };
+                    let plan_fragment = self.create_plan_fragment(
+                        id as u64,
+                        Some(PartitionInfo::Source(split.to_vec())),
+                    );
+                    let worker =
+                        self.choose_worker(&plan_fragment, id as u32, self.stage.dml_table_id)?;
+                    futures.push(self.schedule_task(
+                        task_id,
+                        plan_fragment,
+                        worker,
+                        expr_context.clone(),
+                    ));
+                }
             }
         } else if let Some(file_scan_info) = self.stage.file_scan_info.as_ref() {
             let chunk_size = (file_scan_info.file_location.len() as f32
@@ -622,7 +649,7 @@ impl StageRunner {
             &plan_node,
             &task_id,
             self.ctx.to_batch_task_context(),
-            self.epoch.clone(),
+            self.epoch,
             shutdown_rx.clone(),
         );
 
@@ -908,7 +935,7 @@ impl StageRunner {
         let t_id = task_id.task_id;
 
         let stream_status: Fuse<Streaming<TaskInfoResponse>> = compute_client
-            .create_task(task_id, plan_fragment, self.epoch.clone(), expr_context)
+            .create_task(task_id, plan_fragment, self.epoch, expr_context)
             .await
             .inspect_err(|_| self.mask_failed_serving_worker(&worker))
             .map_err(|e| anyhow!(e))?
diff --git a/src/frontend/src/scheduler/plan_fragmenter.rs b/src/frontend/src/scheduler/plan_fragmenter.rs
index 7643e5c5e7ba2..65bfbe09c54b0 100644
--- a/src/frontend/src/scheduler/plan_fragmenter.rs
+++ b/src/frontend/src/scheduler/plan_fragmenter.rs
@@ -732,13 +732,19 @@ impl StageGraph {
 
             // For batch reading file source, the number of files involved is typically large.
             // In order to avoid generating a task for each file, the parallelism of tasks is limited here.
-            // todo(wcy-fdu): Currently it will be divided into half of schedule_unit_count groups, and this will be changed to configurable later.
+            // The minimum `task_parallelism` is 1. Additionally, `task_parallelism`
+            // must be greater than the number of files to read. Therefore, we first take the
+            // minimum of the number of files and (self.batch_parallelism / 2). If the number of
+            // files is 0, we set task_parallelism to 1.
+
             let task_parallelism = match &stage.source_info {
                 Some(SourceScanInfo::Incomplete(source_fetch_info)) => {
                     match source_fetch_info.connector {
-                        ConnectorProperties::Gcs(_) | ConnectorProperties::OpendalS3(_) => {
-                            (self.batch_parallelism / 2) as u32
-                        }
+                        ConnectorProperties::Gcs(_) | ConnectorProperties::OpendalS3(_) => (min(
+                            complete_source_info.split_info().unwrap().len() as u32,
+                            (self.batch_parallelism / 2) as u32,
+                        ))
+                        .max(1),
                         _ => complete_source_info.split_info().unwrap().len() as u32,
                     }
                 }
diff --git a/src/frontend/src/scheduler/snapshot.rs b/src/frontend/src/scheduler/snapshot.rs
index 9d13573e03a7b..73a3ade5799b5 100644
--- a/src/frontend/src/scheduler/snapshot.rs
+++ b/src/frontend/src/scheduler/snapshot.rs
@@ -121,7 +121,7 @@ impl PinnedSnapshot {
 
 impl Drop for PinnedSnapshot {
     fn drop(&mut self) {
-        let _ = self.unpin_sender.send(Operation::Unpin(self.value.clone()));
+        let _ = self.unpin_sender.send(Operation::Unpin(self.value));
     }
 }
 
@@ -202,9 +202,7 @@ impl HummockSnapshotManager {
                 false
             } else {
                 // First tell the worker that a new snapshot is going to be pinned.
-                self.worker_sender
-                    .send(Operation::Pin(snapshot.clone()))
-                    .unwrap();
+                self.worker_sender.send(Operation::Pin(snapshot)).unwrap();
                 // Then set the latest snapshot.
                 *old_snapshot = Arc::new(PinnedSnapshot {
                     value: snapshot,
diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs
index 697b626fb3398..ca2a90e4e38e7 100644
--- a/src/frontend/src/utils/mod.rs
+++ b/src/frontend/src/utils/mod.rs
@@ -186,3 +186,17 @@ impl Hash for dyn DynHash {
         self.dyn_hash(state);
     }
 }
+
+pub fn ordinal(i: usize) -> String {
+    let s = i.to_string();
+    let suffix = if s.ends_with('1') && !s.ends_with("11") {
+        "st"
+    } else if s.ends_with('2') && !s.ends_with("12") {
+        "nd"
+    } else if s.ends_with('3') && !s.ends_with("13") {
+        "rd"
+    } else {
+        "th"
+    };
+    s + suffix
+}
diff --git a/src/meta/Cargo.toml b/src/meta/Cargo.toml
index b35ce9e73d96b..4511e9f61d894 100644
--- a/src/meta/Cargo.toml
+++ b/src/meta/Cargo.toml
@@ -36,7 +36,7 @@ flate2 = "1"
 function_name = "0.3.0"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 hex = "0.4"
-hyper = "0.14" # required by tonic
+http = "1"
 itertools = { workspace = true }
 jsonbb = { workspace = true }
 maplit = "1.0.2"
@@ -81,7 +81,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
   "signal",
 ] }
 tokio-retry = "0.3"
-tokio-stream = { version = "0.1", features = ["net"] }
+tokio-stream = { workspace = true }
 tonic = { workspace = true }
 tower = { version = "0.4", features = ["util", "load-shed"] }
 tracing = "0.1"
diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs
index 65e730166aed8..825d40b5172d9 100644
--- a/src/meta/node/src/lib.rs
+++ b/src/meta/node/src/lib.rs
@@ -25,6 +25,7 @@ use educe::Educe;
 pub use error::{MetaError, MetaResult};
 use redact::Secret;
 use risingwave_common::config::OverrideConfig;
+use risingwave_common::license::LicenseKey;
 use risingwave_common::util::meta_addr::MetaAddressStrategy;
 use risingwave_common::util::resource_util;
 use risingwave_common::util::tokio_util::sync::CancellationToken;
@@ -187,8 +188,13 @@ pub struct MetaNodeOpts {
     #[clap(long, hide = true, env = "RW_CONNECTOR_RPC_ENDPOINT")]
     pub connector_rpc_endpoint: Option<String>,
 
+    /// The license key to activate enterprise features.
+    #[clap(long, hide = true, env = "RW_LICENSE_KEY")]
+    #[override_opts(path = system.license_key)]
+    pub license_key: Option<LicenseKey>,
+
     /// 128-bit AES key for secret store in HEX format.
-    #[educe(Debug(ignore))]
+    #[educe(Debug(ignore))] // TODO: use newtype to redact debug impl
     #[clap(long, hide = true, env = "RW_SECRET_STORE_PRIVATE_KEY_HEX")]
     pub secret_store_private_key_hex: Option<String>,
 
diff --git a/src/meta/node/src/server.rs b/src/meta/node/src/server.rs
index 5c72b39bcd156..5d1b3570e1ce3 100644
--- a/src/meta/node/src/server.rs
+++ b/src/meta/node/src/server.rs
@@ -772,7 +772,7 @@ pub async fn start_service_as_election_leader(
         risingwave_pb::meta::event_log::Event::MetaNodeStart(event),
     ]);
 
-    let server = tonic::transport::Server::builder()
+    let server_builder = tonic::transport::Server::builder()
         .layer(MetricsMiddlewareLayer::new(meta_metrics))
         .layer(TracingExtractLayer::new())
         .add_service(HeartbeatServiceServer::new(heartbeat_srv))
@@ -794,17 +794,19 @@ pub async fn start_service_as_election_leader(
         .add_service(ServingServiceServer::new(serving_srv))
         .add_service(CloudServiceServer::new(cloud_srv))
         .add_service(SinkCoordinationServiceServer::new(sink_coordination_srv))
-        .add_service(EventLogServiceServer::new(event_log_srv))
-        .add_service(TraceServiceServer::new(trace_srv))
-        .monitored_serve_with_shutdown(
-            address_info.listen_addr,
-            "grpc-meta-leader-service",
-            TcpConfig {
-                tcp_nodelay: true,
-                keepalive_duration: None,
-            },
-            shutdown.clone().cancelled_owned(),
-        );
+        .add_service(EventLogServiceServer::new(event_log_srv));
+    #[cfg(not(madsim))] // `otlp-embedded` does not use madsim-patched tonic
+    let server_builder = server_builder.add_service(TraceServiceServer::new(trace_srv));
+
+    let server = server_builder.monitored_serve_with_shutdown(
+        address_info.listen_addr,
+        "grpc-meta-leader-service",
+        TcpConfig {
+            tcp_nodelay: true,
+            keepalive_duration: None,
+        },
+        shutdown.clone().cancelled_owned(),
+    );
     started::set();
     let _server_handle = tokio::spawn(server);
 
diff --git a/src/meta/service/Cargo.toml b/src/meta/service/Cargo.toml
index 1e3330a2b53a0..69986f8570234 100644
--- a/src/meta/service/Cargo.toml
+++ b/src/meta/service/Cargo.toml
@@ -40,7 +40,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "time",
     "signal",
 ] }
-tokio-stream = { version = "0.1", features = ["net"] }
+tokio-stream = { workspace = true }
 tonic = { workspace = true }
 tracing = "0.1"
 
diff --git a/src/meta/service/src/ddl_service.rs b/src/meta/service/src/ddl_service.rs
index 2594ccc123ff3..a3ee394e8f18f 100644
--- a/src/meta/service/src/ddl_service.rs
+++ b/src/meta/service/src/ddl_service.rs
@@ -907,7 +907,7 @@ impl DdlService for DdlServiceImpl {
         let req = request.into_inner();
 
         let table_id = req.get_table_id();
-        let parallelism = req.get_parallelism()?.clone();
+        let parallelism = *req.get_parallelism()?;
         let deferred = req.get_deferred();
 
         self.ddl_controller
diff --git a/src/meta/service/src/hummock_service.rs b/src/meta/service/src/hummock_service.rs
index f2a933bb57295..21e203d8440bd 100644
--- a/src/meta/service/src/hummock_service.rs
+++ b/src/meta/service/src/hummock_service.rs
@@ -88,10 +88,13 @@ impl HummockManagerService for HummockServiceImpl {
         &self,
         _request: Request<GetCurrentVersionRequest>,
     ) -> Result<Response<GetCurrentVersionResponse>, Status> {
-        let current_version = self.hummock_manager.get_current_version().await;
+        let current_version = self
+            .hummock_manager
+            .on_current_version(|version| version.into())
+            .await;
         Ok(Response::new(GetCurrentVersionResponse {
             status: None,
-            current_version: Some(current_version.into()),
+            current_version: Some(current_version),
         }))
     }
 
diff --git a/src/meta/service/src/notification_service.rs b/src/meta/service/src/notification_service.rs
index 96ab254211d1c..aada6c6876afe 100644
--- a/src/meta/service/src/notification_service.rs
+++ b/src/meta/service/src/notification_service.rs
@@ -346,13 +346,16 @@ impl NotificationServiceImpl {
 
     async fn hummock_subscribe(&self) -> MetaResult<MetaSnapshot> {
         let (tables, catalog_version) = self.get_tables_and_creating_tables_snapshot().await?;
-        let hummock_version = self.hummock_manager.get_current_version().await;
+        let hummock_version = self
+            .hummock_manager
+            .on_current_version(|version| version.into())
+            .await;
         let hummock_write_limits = self.hummock_manager.write_limits().await;
         let meta_backup_manifest_id = self.backup_manager.manifest().manifest_id;
 
         Ok(MetaSnapshot {
             tables,
-            hummock_version: Some(hummock_version.into()),
+            hummock_version: Some(hummock_version),
             version: Some(SnapshotVersion {
                 catalog_version,
                 ..Default::default()
diff --git a/src/meta/service/src/stream_service.rs b/src/meta/service/src/stream_service.rs
index d50a088972eeb..cfbdda2e96509 100644
--- a/src/meta/service/src/stream_service.rs
+++ b/src/meta/service/src/stream_service.rs
@@ -118,6 +118,11 @@ impl StreamManagerService for StreamServiceImpl {
                     .update_mv_rate_limit_by_table_id(TableId::from(request.id), request.rate)
                     .await?
             }
+            ThrottleTarget::CdcTable => {
+                self.metadata_manager
+                    .update_mv_rate_limit_by_table_id(TableId::from(request.id), request.rate)
+                    .await?
+            }
             ThrottleTarget::Unspecified => {
                 return Err(Status::invalid_argument("unspecified throttle target"))
             }
diff --git a/src/meta/src/backup_restore/backup_manager.rs b/src/meta/src/backup_restore/backup_manager.rs
index 1b3dd61152278..7b3fcd4871eba 100644
--- a/src/meta/src/backup_restore/backup_manager.rs
+++ b/src/meta/src/backup_restore/backup_manager.rs
@@ -346,8 +346,11 @@ impl BackupWorker {
         let backup_manager_clone = self.backup_manager.clone();
         let job = async move {
             let hummock_manager = backup_manager_clone.hummock_manager.clone();
-            let hummock_version_builder =
-                async move { hummock_manager.get_current_version().await };
+            let hummock_version_builder = async move {
+                hummock_manager
+                    .on_current_version(|version| version.clone())
+                    .await
+            };
             match backup_manager_clone.env.meta_store() {
                 MetaStoreImpl::Kv(kv) => {
                     let mut snapshot_builder =
diff --git a/src/meta/src/barrier/mod.rs b/src/meta/src/barrier/mod.rs
index b8b829ab9cddb..48e8fcbbc05c1 100644
--- a/src/meta/src/barrier/mod.rs
+++ b/src/meta/src/barrier/mod.rs
@@ -688,7 +688,7 @@ impl GlobalBarrierManager {
                                             r#type: node.r#type,
                                             host: node.host.clone(),
                                             parallelism: node.parallelism,
-                                            property: node.property.clone(),
+                                            property: node.property,
                                             resource: node.resource.clone(),
                                             ..Default::default()
                                         },
diff --git a/src/meta/src/barrier/recovery.rs b/src/meta/src/barrier/recovery.rs
index e5adf887b254c..bb45d5ba7f4ce 100644
--- a/src/meta/src/barrier/recovery.rs
+++ b/src/meta/src/barrier/recovery.rs
@@ -21,6 +21,7 @@ use itertools::Itertools;
 use risingwave_common::catalog::TableId;
 use risingwave_common::config::DefaultParallelism;
 use risingwave_common::hash::WorkerSlotId;
+use risingwave_common::util::epoch::Epoch;
 use risingwave_meta_model_v2::StreamingParallelism;
 use risingwave_pb::common::ActorInfo;
 use risingwave_pb::meta::subscribe_response::{Info, Operation};
@@ -31,7 +32,7 @@ use risingwave_pb::stream_plan::AddMutation;
 use thiserror_ext::AsReport;
 use tokio::time::Instant;
 use tokio_retry::strategy::{jitter, ExponentialBackoff};
-use tracing::{debug, warn, Instrument};
+use tracing::{debug, info, warn, Instrument};
 
 use super::{CheckpointControl, TracedEpoch};
 use crate::barrier::command::CommandContext;
@@ -228,14 +229,6 @@ impl GlobalBarrierManager {
     ///
     /// Returns the new state of the barrier manager after recovery.
     pub async fn recovery(&mut self, paused_reason: Option<PausedReason>, err: Option<MetaError>) {
-        let prev_epoch = TracedEpoch::new(
-            self.context
-                .hummock_manager
-                .latest_snapshot()
-                .committed_epoch
-                .into(),
-        );
-
         // Mark blocked and abort buffered schedules, they might be dirty already.
         self.scheduled_barriers
             .abort_and_mark_blocked("cluster is under recovering");
@@ -334,15 +327,36 @@ impl GlobalBarrierManager {
                         .await
                         .context("purge state table from hummock")?;
 
+                    let (prev_epoch, version_id) = self
+                        .context
+                        .hummock_manager
+                        .on_current_version(|version| {
+                            let max_committed_epoch = version.visible_table_committed_epoch();
+                            for (table_id, info) in version.state_table_info.info() {
+                                assert_eq!(
+                                    info.committed_epoch, max_committed_epoch,
+                                    "table {} with invisible epoch is not purged",
+                                    table_id
+                                );
+                            }
+                            (
+                                TracedEpoch::new(Epoch::from(max_committed_epoch)),
+                                version.id,
+                            )
+                        })
+                        .await;
+
                     let mut control_stream_manager =
                         ControlStreamManager::new(self.context.clone());
 
+                    let reset_start_time = Instant::now();
                     control_stream_manager
-                        .reset(prev_epoch.value().0, active_streaming_nodes.current())
+                        .reset(version_id, active_streaming_nodes.current())
                         .await
                         .inspect_err(|err| {
                             warn!(error = %err.as_report(), "reset compute nodes failed");
                         })?;
+                    info!(elapsed=?reset_start_time.elapsed(), "control stream reset");
 
                     self.context.sink_manager.reset().await;
 
@@ -399,6 +413,7 @@ impl GlobalBarrierManager {
 
                     let mut node_to_collect =
                         control_stream_manager.inject_barrier(&command_ctx, &info, Some(&info))?;
+                    debug!(?node_to_collect, "inject initial barrier");
                     while !node_to_collect.is_empty() {
                         let (worker_id, result) = control_stream_manager
                             .next_complete_barrier_response()
@@ -407,6 +422,7 @@ impl GlobalBarrierManager {
                         assert_eq!(resp.epoch, command_ctx.prev_epoch.value().0);
                         assert!(node_to_collect.remove(&worker_id));
                     }
+                    debug!("collected initial barrier");
 
                     (
                         BarrierManagerState::new(
diff --git a/src/meta/src/barrier/rpc.rs b/src/meta/src/barrier/rpc.rs
index fc125d6e583a4..03dd0b187ba92 100644
--- a/src/meta/src/barrier/rpc.rs
+++ b/src/meta/src/barrier/rpc.rs
@@ -25,6 +25,7 @@ use futures::{pin_mut, FutureExt, StreamExt};
 use itertools::Itertools;
 use risingwave_common::hash::ActorId;
 use risingwave_common::util::tracing::TracingContext;
+use risingwave_hummock_sdk::HummockVersionId;
 use risingwave_pb::common::{ActorInfo, WorkerNode};
 use risingwave_pb::stream_plan::{Barrier, BarrierMutation};
 use risingwave_pb::stream_service::{
@@ -104,11 +105,11 @@ impl ControlStreamManager {
             warn!(id = node.id, host = ?node.host, "node already exists");
             return;
         }
-        let prev_epoch = self
+        let version_id = self
             .context
             .hummock_manager
-            .latest_snapshot()
-            .committed_epoch;
+            .on_current_version(|version| version.id)
+            .await;
         let node_id = node.id;
         let node_host = node.host.clone().unwrap();
         let mut backoff = ExponentialBackoff::from_millis(100)
@@ -118,7 +119,7 @@ impl ControlStreamManager {
         for i in 1..=MAX_RETRY {
             match self
                 .context
-                .new_control_stream_node(node.clone(), prev_epoch)
+                .new_control_stream_node(node.clone(), version_id)
                 .await
             {
                 Ok((stream_node, response_stream)) => {
@@ -142,13 +143,13 @@ impl ControlStreamManager {
 
     pub(super) async fn reset(
         &mut self,
-        prev_epoch: u64,
+        version_id: HummockVersionId,
         nodes: &HashMap<WorkerId, WorkerNode>,
     ) -> MetaResult<()> {
         let nodes = try_join_all(nodes.iter().map(|(worker_id, node)| async {
             let node = self
                 .context
-                .new_control_stream_node(node.clone(), prev_epoch)
+                .new_control_stream_node(node.clone(), version_id)
                 .await?;
             Result::<_, MetaError>::Ok((*worker_id, node))
         }))
@@ -353,7 +354,7 @@ impl GlobalBarrierManagerContext {
     async fn new_control_stream_node(
         &self,
         node: WorkerNode,
-        prev_epoch: u64,
+        initial_version_id: HummockVersionId,
     ) -> MetaResult<(
         ControlStreamNode,
         BoxStream<'static, risingwave_rpc_client::error::Result<StreamingControlStreamResponse>>,
@@ -363,7 +364,7 @@ impl GlobalBarrierManagerContext {
             .stream_client_pool()
             .get(&node)
             .await?
-            .start_streaming_control(prev_epoch)
+            .start_streaming_control(initial_version_id)
             .await?;
         Ok((
             ControlStreamNode {
diff --git a/src/meta/src/controller/cluster.rs b/src/meta/src/controller/cluster.rs
index 18643bc8c43c1..6a7ca826f160a 100644
--- a/src/meta/src/controller/cluster.rs
+++ b/src/meta/src/controller/cluster.rs
@@ -935,7 +935,7 @@ mod tests {
                     .add_worker(
                         PbWorkerType::ComputeNode,
                         host.clone(),
-                        property.clone(),
+                        property,
                         PbResource::default(),
                     )
                     .await?,
@@ -967,7 +967,7 @@ mod tests {
         );
 
         // re-register existing worker node with larger parallelism and change its serving mode.
-        let mut new_property = property.clone();
+        let mut new_property = property;
         new_property.worker_node_parallelism = (parallelism_num * 2) as _;
         new_property.is_serving = false;
         cluster_ctl
@@ -1021,7 +1021,7 @@ mod tests {
             .add_worker(
                 PbWorkerType::ComputeNode,
                 host.clone(),
-                property.clone(),
+                property,
                 PbResource::default(),
             )
             .await?;
diff --git a/src/meta/src/controller/rename.rs b/src/meta/src/controller/rename.rs
index 3947413ba8689..86465e286d958 100644
--- a/src/meta/src/controller/rename.rs
+++ b/src/meta/src/controller/rename.rs
@@ -18,8 +18,8 @@ use risingwave_pb::expr::expr_node::RexNode;
 use risingwave_pb::expr::{ExprNode, FunctionCall, UserDefinedFunction};
 use risingwave_sqlparser::ast::{
     Array, CreateSink, CreateSinkStatement, CreateSourceStatement, CreateSubscriptionStatement,
-    Distinct, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, Query, SelectItem,
-    SetExpr, Statement, TableAlias, TableFactor, TableWithJoins,
+    Distinct, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgList, Ident, ObjectName,
+    Query, SelectItem, SetExpr, Statement, TableAlias, TableFactor, TableWithJoins,
 };
 use risingwave_sqlparser::parser::Parser;
 
@@ -195,7 +195,7 @@ impl QueryRewriter<'_> {
             TableFactor::Derived { subquery, .. } => self.visit_query(subquery),
             TableFactor::TableFunction { args, .. } => {
                 for arg in args {
-                    self.visit_function_args(arg);
+                    self.visit_function_arg(arg);
                 }
             }
             TableFactor::NestedJoin(table_with_joins) => {
@@ -247,8 +247,8 @@ impl QueryRewriter<'_> {
     }
 
     /// Visit function arguments and update all references.
-    fn visit_function_args(&self, function_args: &mut FunctionArg) {
-        match function_args {
+    fn visit_function_arg(&self, function_arg: &mut FunctionArg) {
+        match function_arg {
             FunctionArg::Unnamed(arg) | FunctionArg::Named { arg, .. } => match arg {
                 FunctionArgExpr::Expr(expr) | FunctionArgExpr::ExprQualifiedWildcard(expr, _) => {
                     self.visit_expr(expr)
@@ -264,10 +264,25 @@ impl QueryRewriter<'_> {
         }
     }
 
+    fn visit_function_arg_list(&self, arg_list: &mut FunctionArgList) {
+        for arg in &mut arg_list.args {
+            self.visit_function_arg(arg);
+        }
+        for expr in &mut arg_list.order_by {
+            self.visit_expr(&mut expr.expr)
+        }
+    }
+
     /// Visit function and update all references.
     fn visit_function(&self, function: &mut Function) {
-        for arg in &mut function.args {
-            self.visit_function_args(arg);
+        self.visit_function_arg_list(&mut function.arg_list);
+        if let Some(over) = &mut function.over {
+            for expr in &mut over.partition_by {
+                self.visit_expr(expr);
+            }
+            for expr in &mut over.order_by {
+                self.visit_expr(&mut expr.expr);
+            }
         }
     }
 
@@ -482,4 +497,28 @@ mod tests {
         let actual = alter_relation_rename_refs(definition, from, to);
         assert_eq!(expected, actual);
     }
+
+    #[test]
+    fn test_rename_with_complex_funcs() {
+        let definition = "CREATE MATERIALIZED VIEW mv1 AS SELECT \
+                            agg1(\
+                              foo.v1, func2(foo.v2) \
+                              ORDER BY \
+                              (SELECT foo.v3 FROM foo), \
+                              (SELECT first_value(foo.v4) OVER (PARTITION BY (SELECT foo.v5 FROM foo) ORDER BY (SELECT foo.v6 FROM foo)) FROM foo)\
+                            ) \
+                          FROM foo";
+        let from = "foo";
+        let to = "bar";
+        let expected = "CREATE MATERIALIZED VIEW mv1 AS SELECT \
+                          agg1(\
+                            foo.v1, func2(foo.v2) \
+                            ORDER BY \
+                            (SELECT foo.v3 FROM bar AS foo), \
+                            (SELECT first_value(foo.v4) OVER (PARTITION BY (SELECT foo.v5 FROM bar AS foo) ORDER BY (SELECT foo.v6 FROM bar AS foo)) FROM bar AS foo)\
+                          ) \
+                        FROM bar AS foo";
+        let actual = alter_relation_rename_refs(definition, from, to);
+        assert_eq!(expected, actual);
+    }
 }
diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs
index f5d97eca1c564..4756a9ded4588 100644
--- a/src/meta/src/controller/streaming_job.rs
+++ b/src/meta/src/controller/streaming_job.rs
@@ -1354,6 +1354,10 @@ impl CatalogController {
                 || (*fragment_type_mask & PbFragmentTypeFlag::Source as i32 != 0)
             {
                 visit_stream_node(stream_node, |node| match node {
+                    PbNodeBody::StreamCdcScan(node) => {
+                        node.rate_limit = rate_limit;
+                        found = true;
+                    }
                     PbNodeBody::StreamScan(node) => {
                         node.rate_limit = rate_limit;
                         found = true;
diff --git a/src/meta/src/controller/system_param.rs b/src/meta/src/controller/system_param.rs
index a4b5ff239c074..e9d3c25be89fb 100644
--- a/src/meta/src/controller/system_param.rs
+++ b/src/meta/src/controller/system_param.rs
@@ -31,7 +31,6 @@ use sea_orm::{ActiveModelTrait, DatabaseConnection, EntityTrait, TransactionTrai
 use tokio::sync::oneshot::Sender;
 use tokio::sync::RwLock;
 use tokio::task::JoinHandle;
-use tracing::info;
 
 use crate::controller::SqlMetaStore;
 use crate::manager::{LocalNotification, NotificationManagerRef};
@@ -143,7 +142,7 @@ impl SystemParamsController {
         let params = SystemParameter::find().all(&db).await?;
         let params = merge_params(system_params_from_db(params)?, init_params);
 
-        info!(initial_params = ?SystemParamsReader::new(&params), "initialize system parameters");
+        tracing::info!(initial_params = ?SystemParamsReader::new(&params), "initialize system parameters");
         check_missing_params(&params).map_err(|e| anyhow!(e))?;
 
         let ctl = Self {
diff --git a/src/meta/src/hummock/manager/commit_epoch.rs b/src/meta/src/hummock/manager/commit_epoch.rs
index 0f42c17751377..c7c2057fcc96e 100644
--- a/src/meta/src/hummock/manager/commit_epoch.rs
+++ b/src/meta/src/hummock/manager/commit_epoch.rs
@@ -377,7 +377,7 @@ impl HummockManager {
             committed_epoch: epoch,
             current_epoch: epoch,
         };
-        let prev_snapshot = self.latest_snapshot.swap(snapshot.clone().into());
+        let prev_snapshot = self.latest_snapshot.swap(snapshot.into());
         assert!(prev_snapshot.committed_epoch < epoch);
         assert!(prev_snapshot.current_epoch < epoch);
 
diff --git a/src/meta/src/hummock/manager/compaction.rs b/src/meta/src/hummock/manager/compaction.rs
index b096298b7f401..8655df1367742 100644
--- a/src/meta/src/hummock/manager/compaction.rs
+++ b/src/meta/src/hummock/manager/compaction.rs
@@ -665,7 +665,7 @@ impl HummockManager {
         let _timer = start_measure_real_process_timer!(self, "get_compact_tasks_impl");
 
         let start_time = Instant::now();
-        let max_committed_epoch = versioning.current_version.max_committed_epoch;
+        let max_committed_epoch = versioning.current_version.visible_table_committed_epoch();
         let watermark = self
             .context_info
             .read()
@@ -1375,13 +1375,15 @@ impl HummockManager {
         _base_version_id: HummockVersionId,
         compaction_groups: Vec<CompactionGroupId>,
     ) -> Result<()> {
-        let old_version = self.get_current_version().await;
-        tracing::info!(
-            "Trigger compaction for version {}, epoch {}, groups {:?}",
-            old_version.id,
-            old_version.max_committed_epoch,
-            compaction_groups
-        );
+        self.on_current_version(|old_version| {
+            tracing::info!(
+                "Trigger compaction for version {}, epoch {}, groups {:?}",
+                old_version.id,
+                old_version.visible_table_committed_epoch(),
+                compaction_groups
+            );
+        })
+        .await;
 
         if compaction_groups.is_empty() {
             return Ok(());
diff --git a/src/meta/src/hummock/manager/compaction_group_manager.rs b/src/meta/src/hummock/manager/compaction_group_manager.rs
index d585c23e19ee1..a9ab7ea24e63b 100644
--- a/src/meta/src/hummock/manager/compaction_group_manager.rs
+++ b/src/meta/src/hummock/manager/compaction_group_manager.rs
@@ -221,7 +221,9 @@ impl HummockManager {
             &self.metrics,
         );
         let mut new_version_delta = version.new_delta();
-        let epoch = new_version_delta.latest_version().max_committed_epoch;
+        let epoch = new_version_delta
+            .latest_version()
+            .visible_table_committed_epoch();
 
         for (table_id, raw_group_id) in pairs {
             let mut group_id = *raw_group_id;
diff --git a/src/meta/src/hummock/manager/context.rs b/src/meta/src/hummock/manager/context.rs
index b2e0b30771f98..805e6451678f6 100644
--- a/src/meta/src/hummock/manager/context.rs
+++ b/src/meta/src/hummock/manager/context.rs
@@ -196,7 +196,7 @@ impl HummockManager {
 
     pub async fn commit_epoch_sanity_check(
         &self,
-        epoch: HummockEpoch,
+        max_committed_epoch: HummockEpoch,
         sstables: &[LocalSstableInfo],
         sst_to_context: &HashMap<HummockSstableObjectId, HummockContextId>,
         current_version: &HummockVersion,
@@ -221,11 +221,12 @@ impl HummockManager {
             }
         }
 
-        if epoch <= current_version.max_committed_epoch {
+        // TODO: allow equal when supporting partial checkpoint
+        if max_committed_epoch <= current_version.visible_table_committed_epoch() {
             return Err(anyhow::anyhow!(
                 "Epoch {} <= max_committed_epoch {}",
-                epoch,
-                current_version.max_committed_epoch
+                max_committed_epoch,
+                current_version.visible_table_committed_epoch()
             )
             .into());
         }
@@ -252,7 +253,7 @@ impl HummockManager {
                 .send_event(ResponseEvent::ValidationTask(ValidationTask {
                     sst_infos: sst_infos.into_iter().map(|sst| sst.into()).collect_vec(),
                     sst_id_to_worker_id: sst_to_context.clone(),
-                    epoch,
+                    epoch: max_committed_epoch,
                 }))
                 .is_err()
             {
@@ -427,7 +428,7 @@ impl HummockManager {
         let _timer = start_measure_real_process_timer!(self, "unpin_snapshot_before");
         // Use the max_committed_epoch in storage as the snapshot ts so only committed changes are
         // visible in the snapshot.
-        let max_committed_epoch = versioning.current_version.max_committed_epoch;
+        let max_committed_epoch = versioning.current_version.visible_table_committed_epoch();
         // Ensure the unpin will not clean the latest one.
         let snapshot_committed_epoch = hummock_snapshot.committed_epoch;
         #[cfg(not(test))]
diff --git a/src/meta/src/hummock/manager/mod.rs b/src/meta/src/hummock/manager/mod.rs
index 3ab2e02f026df..5a3c1ca9ba26a 100644
--- a/src/meta/src/hummock/manager/mod.rs
+++ b/src/meta/src/hummock/manager/mod.rs
@@ -433,8 +433,8 @@ impl HummockManager {
 
         self.latest_snapshot.store(
             HummockSnapshot {
-                committed_epoch: redo_state.max_committed_epoch,
-                current_epoch: redo_state.max_committed_epoch,
+                committed_epoch: redo_state.visible_table_committed_epoch(),
+                current_epoch: redo_state.visible_table_committed_epoch(),
             }
             .into(),
         );
diff --git a/src/meta/src/hummock/manager/tests.rs b/src/meta/src/hummock/manager/tests.rs
index a35dc5c4b077f..5fbec419cc7fe 100644
--- a/src/meta/src/hummock/manager/tests.rs
+++ b/src/meta/src/hummock/manager/tests.rs
@@ -318,7 +318,10 @@ async fn test_hummock_transaction() {
         .await;
         // Get tables before committing epoch1. No tables should be returned.
         let current_version = hummock_manager.get_current_version().await;
-        assert_eq!(current_version.max_committed_epoch, INVALID_EPOCH);
+        assert_eq!(
+            current_version.visible_table_committed_epoch(),
+            INVALID_EPOCH
+        );
         assert!(get_sorted_committed_object_ids(&current_version).is_empty());
 
         // Commit epoch1
@@ -333,7 +336,7 @@ async fn test_hummock_transaction() {
 
         // Get tables after committing epoch1. All tables committed in epoch1 should be returned
         let current_version = hummock_manager.get_current_version().await;
-        assert_eq!(current_version.max_committed_epoch, epoch1);
+        assert_eq!(current_version.visible_table_committed_epoch(), epoch1);
         assert_eq!(
             get_sorted_object_ids(&committed_tables),
             get_sorted_committed_object_ids(&current_version)
@@ -356,7 +359,7 @@ async fn test_hummock_transaction() {
         // Get tables before committing epoch2. tables_in_epoch1 should be returned and
         // tables_in_epoch2 should be invisible.
         let current_version = hummock_manager.get_current_version().await;
-        assert_eq!(current_version.max_committed_epoch, epoch1);
+        assert_eq!(current_version.visible_table_committed_epoch(), epoch1);
         assert_eq!(
             get_sorted_object_ids(&committed_tables),
             get_sorted_committed_object_ids(&current_version)
@@ -375,7 +378,7 @@ async fn test_hummock_transaction() {
         // Get tables after committing epoch2. tables_in_epoch1 and tables_in_epoch2 should be
         // returned
         let current_version = hummock_manager.get_current_version().await;
-        assert_eq!(current_version.max_committed_epoch, epoch2);
+        assert_eq!(current_version.visible_table_committed_epoch(), epoch2);
         assert_eq!(
             get_sorted_object_ids(&committed_tables),
             get_sorted_committed_object_ids(&current_version)
@@ -1148,7 +1151,7 @@ async fn test_extend_objects_to_delete() {
     );
     let objects_to_delete = hummock_manager.get_objects_to_delete();
     assert_eq!(objects_to_delete.len(), orphan_sst_num as usize);
-    let new_epoch = pinned_version2.max_committed_epoch.next_epoch();
+    let new_epoch = pinned_version2.visible_table_committed_epoch().next_epoch();
     hummock_manager
         .commit_epoch_for_test(
             new_epoch,
@@ -1158,7 +1161,7 @@ async fn test_extend_objects_to_delete() {
         .await
         .unwrap();
     let pinned_version3: HummockVersion = hummock_manager.pin_version(context_id).await.unwrap();
-    assert_eq!(new_epoch, pinned_version3.max_committed_epoch);
+    assert_eq!(new_epoch, pinned_version3.visible_table_committed_epoch());
     hummock_manager
         .unpin_version_before(context_id, pinned_version3.id)
         .await
diff --git a/src/meta/src/hummock/manager/time_travel.rs b/src/meta/src/hummock/manager/time_travel.rs
index 70035e70aa435..61bcdfe7e8de5 100644
--- a/src/meta/src/hummock/manager/time_travel.rs
+++ b/src/meta/src/hummock/manager/time_travel.rs
@@ -388,7 +388,7 @@ impl HummockManager {
             Ok(count)
         }
 
-        let epoch = delta.max_committed_epoch;
+        let epoch = delta.visible_table_committed_epoch();
         let version_id: u64 = delta.id.to_u64();
         let m = hummock_epoch_to_version::ActiveModel {
             epoch: Set(epoch.try_into().unwrap()),
@@ -483,14 +483,14 @@ fn replay_archive(
     deltas: impl Iterator<Item = PbHummockVersionDelta>,
 ) -> HummockVersion {
     let mut last_version = HummockVersion::from_persisted_protobuf(&version);
-    let mut mce = last_version.max_committed_epoch;
+    let mut mce = last_version.visible_table_committed_epoch();
     for d in deltas {
         let d = HummockVersionDelta::from_persisted_protobuf(&d);
         assert!(
-            d.max_committed_epoch > mce,
+            d.visible_table_committed_epoch() > mce,
             "time travel expects delta from commit_epoch only"
         );
-        mce = d.max_committed_epoch;
+        mce = d.visible_table_committed_epoch();
         // Need to work around the assertion in `apply_version_delta`.
         // Because compaction deltas are not included in time travel archive.
         while last_version.id < d.prev_id {
diff --git a/src/meta/src/hummock/manager/transaction.rs b/src/meta/src/hummock/manager/transaction.rs
index f14aa070d586e..32a6c7b9c1a04 100644
--- a/src/meta/src/hummock/manager/transaction.rs
+++ b/src/meta/src/hummock/manager/transaction.rs
@@ -43,7 +43,7 @@ fn trigger_delta_log_stats(metrics: &MetaMetrics, total_number: usize) {
 fn trigger_version_stat(metrics: &MetaMetrics, current_version: &HummockVersion) {
     metrics
         .max_committed_epoch
-        .set(current_version.max_committed_epoch as i64);
+        .set(current_version.visible_table_committed_epoch() as i64);
     metrics
         .version_size
         .set(current_version.estimated_encode_len() as i64);
@@ -129,7 +129,7 @@ impl<'a> HummockVersionTransaction<'a> {
         )>,
     ) -> HummockVersionDelta {
         let mut new_version_delta = self.new_delta();
-        new_version_delta.max_committed_epoch = max_committed_epoch;
+        new_version_delta.set_max_committed_epoch(max_committed_epoch);
         new_version_delta.new_table_watermarks = new_table_watermarks;
         new_version_delta.change_log_delta = change_log_delta;
 
diff --git a/src/meta/src/hummock/manager/versioning.rs b/src/meta/src/hummock/manager/versioning.rs
index 698996c701ac8..30c8b09a69d0f 100644
--- a/src/meta/src/hummock/manager/versioning.rs
+++ b/src/meta/src/hummock/manager/versioning.rs
@@ -149,16 +149,13 @@ impl HummockManager {
     /// Should not be called inside [`HummockManager`], because it requests locks internally.
     ///
     /// Note: this method can hurt performance because it will clone a large object.
+    #[cfg(any(test, feature = "test"))]
     pub async fn get_current_version(&self) -> HummockVersion {
-        self.versioning.read().await.current_version.clone()
+        self.on_current_version(|version| version.clone()).await
     }
 
-    pub async fn get_current_max_committed_epoch(&self) -> HummockEpoch {
-        self.versioning
-            .read()
-            .await
-            .current_version
-            .max_committed_epoch
+    pub async fn on_current_version<T>(&self, mut f: impl FnMut(&HummockVersion) -> T) -> T {
+        f(&self.versioning.read().await.current_version)
     }
 
     /// Gets the mapping from table id to compaction group id
@@ -181,7 +178,7 @@ impl HummockManager {
             .hummock_version_deltas
             .range(start_id..)
             .map(|(_id, delta)| delta)
-            .filter(|delta| delta.max_committed_epoch <= committed_epoch_limit)
+            .filter(|delta| delta.visible_table_committed_epoch() <= committed_epoch_limit)
             .take(num_limit as _)
             .cloned()
             .collect();
diff --git a/src/meta/src/hummock/model/ext/hummock.rs b/src/meta/src/hummock/model/ext/hummock.rs
index 37dae37218fad..562ea1016af1f 100644
--- a/src/meta/src/hummock/model/ext/hummock.rs
+++ b/src/meta/src/hummock/model/ext/hummock.rs
@@ -223,7 +223,7 @@ impl Transactional<Transaction> for HummockVersionDelta {
         let m = hummock_version_delta::ActiveModel {
             id: Set(self.id.to_u64().try_into().unwrap()),
             prev_id: Set(self.prev_id.to_u64().try_into().unwrap()),
-            max_committed_epoch: Set(self.max_committed_epoch.try_into().unwrap()),
+            max_committed_epoch: Set(self.visible_table_committed_epoch().try_into().unwrap()),
             safe_epoch: Set(self.visible_table_safe_epoch().try_into().unwrap()),
             trivial_move: Set(self.trivial_move),
             full_version_delta: Set(FullVersionDelta::from(&self.into())),
diff --git a/src/meta/src/hummock/model/pinned_snapshot.rs b/src/meta/src/hummock/model/pinned_snapshot.rs
index f485d9dab7211..fd009e22b789f 100644
--- a/src/meta/src/hummock/model/pinned_snapshot.rs
+++ b/src/meta/src/hummock/model/pinned_snapshot.rs
@@ -28,7 +28,7 @@ impl MetadataModel for HummockPinnedSnapshot {
     }
 
     fn to_protobuf(&self) -> Self::PbType {
-        self.clone()
+        *self
     }
 
     fn from_protobuf(prost: Self::PbType) -> Self {
diff --git a/src/meta/src/hummock/model/pinned_version.rs b/src/meta/src/hummock/model/pinned_version.rs
index e8f6b2e65e75e..b2e7d97501b2b 100644
--- a/src/meta/src/hummock/model/pinned_version.rs
+++ b/src/meta/src/hummock/model/pinned_version.rs
@@ -28,7 +28,7 @@ impl MetadataModel for HummockPinnedVersion {
     }
 
     fn to_protobuf(&self) -> Self::PbType {
-        self.clone()
+        *self
     }
 
     fn from_protobuf(prost: Self::PbType) -> Self {
diff --git a/src/meta/src/manager/catalog/fragment.rs b/src/meta/src/manager/catalog/fragment.rs
index a523bbfeb3e7e..b734cdb54602a 100644
--- a/src/meta/src/manager/catalog/fragment.rs
+++ b/src/meta/src/manager/catalog/fragment.rs
@@ -1079,6 +1079,11 @@ impl FragmentManager {
                 for actor in &mut fragment.actors {
                     if let Some(node) = actor.nodes.as_mut() {
                         visit_stream_node(node, |node_body| match node_body {
+                            // rate limit for cdc backfill
+                            NodeBody::StreamCdcScan(ref mut node) => {
+                                node.rate_limit = rate_limit;
+                                actor_to_apply.push(actor.actor_id);
+                            }
                             NodeBody::StreamScan(ref mut node) => {
                                 node.rate_limit = rate_limit;
                                 actor_to_apply.push(actor.actor_id);
diff --git a/src/meta/src/manager/catalog/user.rs b/src/meta/src/manager/catalog/user.rs
index f6e2f9e03e835..81181b0fc1e17 100644
--- a/src/meta/src/manager/catalog/user.rs
+++ b/src/meta/src/manager/catalog/user.rs
@@ -234,7 +234,7 @@ mod tests {
             .grant_privilege(
                 &[test_sub_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Update, Action::Delete],
                     true,
                 )],
@@ -249,7 +249,7 @@ mod tests {
             .grant_privilege(
                 &[test_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Insert],
                     false,
                 )],
@@ -258,7 +258,7 @@ mod tests {
             .await?;
         let user = catalog_manager.get_user(test_user_id).await?;
         assert_eq!(user.grant_privileges.len(), 1);
-        assert_eq!(user.grant_privileges[0].object, Some(object.clone()));
+        assert_eq!(user.grant_privileges[0].object, Some(object));
         assert_eq!(user.grant_privileges[0].action_with_opts.len(), 2);
         assert!(user.grant_privileges[0]
             .action_with_opts
@@ -269,7 +269,7 @@ mod tests {
             .grant_privilege(
                 &[test_sub_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Insert],
                     true,
                 )],
@@ -284,7 +284,7 @@ mod tests {
             .grant_privilege(
                 &[test_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Insert],
                     true,
                 )],
@@ -293,7 +293,7 @@ mod tests {
             .await?;
         let user = catalog_manager.get_user(test_user_id).await?;
         assert_eq!(user.grant_privileges.len(), 1);
-        assert_eq!(user.grant_privileges[0].object, Some(object.clone()));
+        assert_eq!(user.grant_privileges[0].object, Some(object));
         assert_eq!(user.grant_privileges[0].action_with_opts.len(), 2);
         assert!(user.grant_privileges[0]
             .action_with_opts
@@ -304,7 +304,7 @@ mod tests {
             .grant_privilege(
                 &[test_sub_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Insert],
                     true,
                 )],
@@ -319,7 +319,7 @@ mod tests {
             .grant_privilege(
                 &[test_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Update, Action::Delete],
                     true,
                 )],
@@ -328,7 +328,7 @@ mod tests {
             .await?;
         let user = catalog_manager.get_user(test_user_id).await?;
         assert_eq!(user.grant_privileges.len(), 1);
-        assert_eq!(user.grant_privileges[0].object, Some(object.clone()));
+        assert_eq!(user.grant_privileges[0].object, Some(object));
         assert_eq!(user.grant_privileges[0].action_with_opts.len(), 4);
         assert!(user.grant_privileges[0]
             .action_with_opts
@@ -339,7 +339,7 @@ mod tests {
         let res = catalog_manager
             .revoke_privilege(
                 &[test_user_id],
-                &[make_privilege(object.clone(), &[Action::Connect], false)],
+                &[make_privilege(object, &[Action::Connect], false)],
                 0,
                 test_sub_user_id,
                 true,
@@ -355,11 +355,7 @@ mod tests {
         let res = catalog_manager
             .revoke_privilege(
                 &[test_user_id],
-                &[make_privilege(
-                    other_object.clone(),
-                    &[Action::Connect],
-                    false,
-                )],
+                &[make_privilege(other_object, &[Action::Connect], false)],
                 0,
                 test_sub_user_id,
                 true,
@@ -376,7 +372,7 @@ mod tests {
             .revoke_privilege(
                 &[test_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[
                         Action::Select,
                         Action::Insert,
@@ -401,7 +397,7 @@ mod tests {
             .revoke_privilege(
                 &[test_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[
                         Action::Select,
                         Action::Insert,
@@ -429,7 +425,7 @@ mod tests {
             .revoke_privilege(
                 &[test_user_id],
                 &[make_privilege(
-                    object.clone(),
+                    object,
                     &[Action::Select, Action::Insert, Action::Delete],
                     false,
                 )],
diff --git a/src/meta/src/manager/diagnose.rs b/src/meta/src/manager/diagnose.rs
index cb5f005829e80..190bc51f41a8e 100644
--- a/src/meta/src/manager/diagnose.rs
+++ b/src/meta/src/manager/diagnose.rs
@@ -414,10 +414,8 @@ impl DiagnoseCommand {
 
     #[cfg_attr(coverage, coverage(off))]
     async fn write_storage(&self, s: &mut String) {
-        let version = self.hummock_manger.get_current_version().await;
         let mut sst_num = 0;
         let mut sst_total_file_size = 0;
-        let compaction_group_num = version.levels.len();
         let back_pressured_compaction_groups = self
             .hummock_manger
             .write_limits()
@@ -470,32 +468,41 @@ impl DiagnoseCommand {
 
         let top_k = 10;
         let mut top_tombstone_delete_sst = BinaryHeap::with_capacity(top_k);
-        for compaction_group in version.levels.values() {
-            let mut visit_level = |level: &Level| {
-                sst_num += level.table_infos.len();
-                sst_total_file_size += level.table_infos.iter().map(|t| t.file_size).sum::<u64>();
-                for sst in &level.table_infos {
-                    if sst.total_key_count == 0 {
-                        continue;
-                    }
-                    let tombstone_delete_ratio = sst.stale_key_count * 10000 / sst.total_key_count;
-                    let e = SstableSort {
-                        compaction_group_id: compaction_group.group_id,
-                        sst_id: sst.sst_id,
-                        delete_ratio: tombstone_delete_ratio,
+        let compaction_group_num = self
+            .hummock_manger
+            .on_current_version(|version| {
+                for compaction_group in version.levels.values() {
+                    let mut visit_level = |level: &Level| {
+                        sst_num += level.table_infos.len();
+                        sst_total_file_size +=
+                            level.table_infos.iter().map(|t| t.file_size).sum::<u64>();
+                        for sst in &level.table_infos {
+                            if sst.total_key_count == 0 {
+                                continue;
+                            }
+                            let tombstone_delete_ratio =
+                                sst.stale_key_count * 10000 / sst.total_key_count;
+                            let e = SstableSort {
+                                compaction_group_id: compaction_group.group_id,
+                                sst_id: sst.sst_id,
+                                delete_ratio: tombstone_delete_ratio,
+                            };
+                            top_k_sstables(top_k, &mut top_tombstone_delete_sst, e);
+                        }
                     };
-                    top_k_sstables(top_k, &mut top_tombstone_delete_sst, e);
+                    let l0 = &compaction_group.l0;
+                    // FIXME: why chaining levels iter leads to segmentation fault?
+                    for level in &l0.sub_levels {
+                        visit_level(level);
+                    }
+                    for level in &compaction_group.levels {
+                        visit_level(level);
+                    }
                 }
-            };
-            let l0 = &compaction_group.l0;
-            // FIXME: why chaining levels iter leads to segmentation fault?
-            for level in &l0.sub_levels {
-                visit_level(level);
-            }
-            for level in &compaction_group.levels {
-                visit_level(level);
-            }
-        }
+                version.levels.len()
+            })
+            .await;
+
         let _ = writeln!(s, "number of SSTables: {sst_num}");
         let _ = writeln!(s, "total size of SSTables (byte): {sst_total_file_size}");
         let _ = writeln!(s, "number of compaction groups: {compaction_group_num}");
diff --git a/src/meta/src/manager/system_param/mod.rs b/src/meta/src/manager/system_param/mod.rs
index d77be8e5d03d4..8776746c54718 100644
--- a/src/meta/src/manager/system_param/mod.rs
+++ b/src/meta/src/manager/system_param/mod.rs
@@ -30,7 +30,6 @@ use risingwave_pb::meta::SystemParams;
 use tokio::sync::oneshot::Sender;
 use tokio::sync::RwLock;
 use tokio::task::JoinHandle;
-use tracing::info;
 
 use self::model::SystemParamsModel;
 use super::NotificationManagerRef;
@@ -77,7 +76,7 @@ impl SystemParamsManager {
             return Err(require_sql_meta_store_err().into());
         }
 
-        info!(initial_params = ?SystemParamsReader::new(&params), "initialize system parameters");
+        tracing::info!(initial_params = ?SystemParamsReader::new(&params), "initialize system parameters");
         check_missing_params(&params).map_err(|e| anyhow!(e))?;
 
         Ok(Self {
diff --git a/src/meta/src/rpc/intercept.rs b/src/meta/src/rpc/intercept.rs
index 8b5bb67f30943..87151e06b88a1 100644
--- a/src/meta/src/rpc/intercept.rs
+++ b/src/meta/src/rpc/intercept.rs
@@ -16,7 +16,7 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use futures::Future;
-use hyper::Body;
+use tonic::body::BoxBody;
 use tower::{Layer, Service};
 
 use crate::rpc::metrics::MetaMetrics;
@@ -49,9 +49,9 @@ pub struct MetricsMiddleware<S> {
     metrics: Arc<MetaMetrics>,
 }
 
-impl<S> Service<hyper::Request<Body>> for MetricsMiddleware<S>
+impl<S> Service<http::Request<BoxBody>> for MetricsMiddleware<S>
 where
-    S: Service<hyper::Request<Body>> + Clone + Send + 'static,
+    S: Service<http::Request<BoxBody>> + Clone + Send + 'static,
     S::Future: Send + 'static,
 {
     type Error = S::Error;
@@ -63,7 +63,7 @@ where
         self.inner.poll_ready(cx)
     }
 
-    fn call(&mut self, req: hyper::Request<Body>) -> Self::Future {
+    fn call(&mut self, req: http::Request<BoxBody>) -> Self::Future {
         // This is necessary because tonic internally uses `tower::buffer::Buffer`.
         // See https://github.com/tower-rs/tower/issues/547#issuecomment-767629149
         // for details on why this is necessary
diff --git a/src/object_store/Cargo.toml b/src/object_store/Cargo.toml
index e821c2fc85090..fa433a30abcb4 100644
--- a/src/object_store/Cargo.toml
+++ b/src/object_store/Cargo.toml
@@ -26,7 +26,7 @@ crc32fast = "1"
 either = "1"
 fail = "0.5"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
-hyper = { version = "0.14", features = ["tcp", "client"] }                    # required by aws sdk
+hyper = { version = "0.14", features = ["tcp", "client"] }                    # TODO(http-bump): required by aws sdk
 hyper-rustls = { version = "0.24.2", features = ["webpki-roots"] }
 hyper-tls = "0.5.0"
 itertools = { workspace = true }
diff --git a/src/prost/build.rs b/src/prost/build.rs
index bc4c6c413260f..e9974d6d87a09 100644
--- a/src/prost/build.rs
+++ b/src/prost/build.rs
@@ -131,7 +131,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         )
         .type_attribute("plan_common.GeneratedColumnDesc", "#[derive(Eq, Hash)]")
         .type_attribute("plan_common.DefaultColumnDesc", "#[derive(Eq, Hash)]")
-        .type_attribute("plan_common.Cardinality", "#[derive(Eq, Hash, Copy)]")
+        .type_attribute("plan_common.Cardinality", "#[derive(Eq, Hash)]")
         .type_attribute("plan_common.ExternalTableDesc", "#[derive(Eq, Hash)]")
         .type_attribute("plan_common.ColumnDesc", "#[derive(Eq, Hash)]")
         .type_attribute("plan_common.AdditionalColumn", "#[derive(Eq, Hash)]")
diff --git a/src/prost/src/lib.rs b/src/prost/src/lib.rs
index 8ca7656a281fa..b8cab9006dea4 100644
--- a/src/prost/src/lib.rs
+++ b/src/prost/src/lib.rs
@@ -19,6 +19,7 @@
 
 use std::str::FromStr;
 
+pub use prost::Message;
 use risingwave_error::tonic::ToTonicStatus;
 use thiserror::Error;
 
diff --git a/src/risedevtool/Cargo.toml b/src/risedevtool/Cargo.toml
index b8a2ca9db14f9..71c2662024dbb 100644
--- a/src/risedevtool/Cargo.toml
+++ b/src/risedevtool/Cargo.toml
@@ -23,7 +23,7 @@ clap = { workspace = true }
 console = "0.15"
 fs-err = "2.11.0"
 glob = "0.3"
-google-cloud-pubsub = "0.25"
+google-cloud-pubsub = "0.28"
 indicatif = "0.17"
 itertools = { workspace = true }
 rdkafka = { workspace = true }
diff --git a/src/rpc_client/Cargo.toml b/src/rpc_client/Cargo.toml
index 37064df273ed0..49729c6d9e8ac 100644
--- a/src/rpc_client/Cargo.toml
+++ b/src/rpc_client/Cargo.toml
@@ -19,8 +19,8 @@ async-trait = "0.1"
 easy-ext = "1"
 either = "1.13.0"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
-http = "0.2"
-hyper = "0.14" # required by tonic
+http = "1"
+hyper = "1"
 itertools = { workspace = true }
 lru = { workspace = true }
 moka = { version = "0.12", features = ["future"] }
@@ -43,7 +43,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
     "signal",
 ] }
 tokio-retry = "0.3"
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tonic = { workspace = true }
 tower = "0.4"
 tracing = "0.1"
diff --git a/src/rpc_client/src/meta_client.rs b/src/rpc_client/src/meta_client.rs
index 748e65d1f3af5..5a45e0752c9d8 100644
--- a/src/rpc_client/src/meta_client.rs
+++ b/src/rpc_client/src/meta_client.rs
@@ -245,7 +245,7 @@ impl MetaClient {
                 .add_worker_node(AddWorkerNodeRequest {
                     worker_type: worker_type as i32,
                     host: Some(addr.to_protobuf()),
-                    property: Some(property.clone()),
+                    property: Some(property),
                     resource: Some(risingwave_pb::common::worker_node::Resource {
                         rw_version: RW_VERSION.to_string(),
                         total_memory_bytes: system_memory_available_bytes() as _,
diff --git a/src/rpc_client/src/stream_client.rs b/src/rpc_client/src/stream_client.rs
index 988931cb207b6..c3f876549a36d 100644
--- a/src/rpc_client/src/stream_client.rs
+++ b/src/rpc_client/src/stream_client.rs
@@ -21,6 +21,7 @@ use futures::TryStreamExt;
 use risingwave_common::config::MAX_CONNECTION_WINDOW_SIZE;
 use risingwave_common::monitor::{EndpointExt, TcpConfig};
 use risingwave_common::util::addr::HostAddr;
+use risingwave_hummock_sdk::HummockVersionId;
 use risingwave_pb::stream_service::stream_service_client::StreamServiceClient;
 use risingwave_pb::stream_service::streaming_control_stream_request::InitRequest;
 use risingwave_pb::stream_service::streaming_control_stream_response::InitResponse;
@@ -86,10 +87,15 @@ pub type StreamingControlHandle =
     UnboundedBidiStreamHandle<StreamingControlStreamRequest, StreamingControlStreamResponse>;
 
 impl StreamClient {
-    pub async fn start_streaming_control(&self, prev_epoch: u64) -> Result<StreamingControlHandle> {
+    pub async fn start_streaming_control(
+        &self,
+        version_id: HummockVersionId,
+    ) -> Result<StreamingControlHandle> {
         let first_request = StreamingControlStreamRequest {
             request: Some(streaming_control_stream_request::Request::Init(
-                InitRequest { prev_epoch },
+                InitRequest {
+                    version_id: version_id.to_u64(),
+                },
             )),
         };
         let mut client = self.0.to_owned();
diff --git a/src/rpc_client/src/tracing.rs b/src/rpc_client/src/tracing.rs
index 50c98007bb9fd..aab07d43225d4 100644
--- a/src/rpc_client/src/tracing.rs
+++ b/src/rpc_client/src/tracing.rs
@@ -16,46 +16,22 @@ use std::task::{Context, Poll};
 
 use futures::Future;
 use risingwave_common::util::tracing::TracingContext;
-use tower::{Layer, Service};
-
-/// A layer that decorates the inner service with [`TracingInject`].
-#[derive(Clone, Default)]
-pub struct TracingInjectLayer {
-    _private: (),
-}
-
-impl TracingInjectLayer {
-    #[allow(dead_code)]
-    pub fn new() -> Self {
-        Self::default()
-    }
-}
-
-impl<S> Layer<S> for TracingInjectLayer {
-    type Service = TracingInject<S>;
-
-    fn layer(&self, service: S) -> Self::Service {
-        TracingInject { inner: service }
-    }
-}
+use tonic::body::BoxBody;
+use tower::Service;
 
 /// A service wrapper that injects the [`TracingContext`] obtained from the current tracing span
 /// into the HTTP headers of the request.
 ///
 /// See also `TracingExtract` in the `common_service` crate.
 #[derive(Clone, Debug)]
-pub struct TracingInject<S> {
-    inner: S,
+pub struct TracingInjectChannel {
+    inner: tonic::transport::Channel,
 }
 
-impl<S, B> Service<hyper::Request<B>> for TracingInject<S>
-where
-    S: Service<hyper::Request<B>> + Clone + Send + 'static,
-    S::Future: Send + 'static,
-    B: hyper::body::HttpBody, // tonic `Channel` uses `BoxBody` instead of `hyper::Body`
-{
-    type Error = S::Error;
-    type Response = S::Response;
+#[cfg(not(madsim))]
+impl Service<http::Request<BoxBody>> for TracingInjectChannel {
+    type Error = tonic::transport::Error;
+    type Response = http::Response<BoxBody>;
 
     type Future = impl Future<Output = Result<Self::Response, Self::Error>>;
 
@@ -63,7 +39,7 @@ where
         self.inner.poll_ready(cx)
     }
 
-    fn call(&mut self, mut req: hyper::Request<B>) -> Self::Future {
+    fn call(&mut self, mut req: http::Request<BoxBody>) -> Self::Future {
         // This is necessary because tonic internally uses `tower::buffer::Buffer`.
         // See https://github.com/tower-rs/tower/issues/547#issuecomment-767629149
         // for details on why this is necessary
@@ -81,21 +57,21 @@ where
 /// A wrapper around tonic's `Channel` that injects the [`TracingContext`] obtained from the current
 /// tracing span when making gRPC requests.
 #[cfg(not(madsim))]
-pub type Channel = TracingInject<tonic::transport::Channel>;
+pub type Channel = TracingInjectChannel;
 #[cfg(madsim)]
 pub type Channel = tonic::transport::Channel;
 
-/// An extension trait for tonic's `Channel` that wraps it in a [`TracingInject`] service.
+/// An extension trait for tonic's `Channel` that wraps it into a [`TracingInjectChannel`].
 #[easy_ext::ext(TracingInjectedChannelExt)]
 impl tonic::transport::Channel {
-    /// Wraps the channel in a [`TracingInject`] service, so that the [`TracingContext`] obtained
+    /// Wraps the channel into a [`TracingInjectChannel`], so that the [`TracingContext`] obtained
     /// from the current tracing span is injected into the HTTP headers of the request.
     ///
     /// The server can then extract the [`TracingContext`] from the HTTP headers with the
     /// `TracingExtract` middleware.
     pub fn tracing_injected(self) -> Channel {
         #[cfg(not(madsim))]
-        return TracingInject { inner: self };
+        return TracingInjectChannel { inner: self };
         #[cfg(madsim)]
         return self;
     }
diff --git a/src/sqlparser/src/ast/data_type.rs b/src/sqlparser/src/ast/data_type.rs
index 1e588955f093b..96c67659beb4f 100644
--- a/src/sqlparser/src/ast/data_type.rs
+++ b/src/sqlparser/src/ast/data_type.rs
@@ -70,6 +70,8 @@ pub enum DataType {
     Array(Box<DataType>),
     /// Structs
     Struct(Vec<StructField>),
+    /// Map(key_type, value_type)
+    Map(Box<(DataType, DataType)>),
 }
 
 impl fmt::Display for DataType {
@@ -110,6 +112,9 @@ impl fmt::Display for DataType {
             DataType::Struct(defs) => {
                 write!(f, "STRUCT<{}>", display_comma_separated(defs))
             }
+            DataType::Map(kv) => {
+                write!(f, "MAP({},{})", kv.0, kv.1)
+            }
         }
     }
 }
diff --git a/src/sqlparser/src/ast/ddl.rs b/src/sqlparser/src/ast/ddl.rs
index 6ea385df950fc..89e8f24bf5922 100644
--- a/src/sqlparser/src/ast/ddl.rs
+++ b/src/sqlparser/src/ast/ddl.rs
@@ -106,6 +106,10 @@ pub enum AlterTableOperation {
     SetSourceRateLimit {
         rate_limit: i32,
     },
+    /// SET BACKFILL_RATE_LIMIT TO <rate_limit>
+    SetBackfillRateLimit {
+        rate_limit: i32,
+    },
 }
 
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -293,6 +297,9 @@ impl fmt::Display for AlterTableOperation {
             AlterTableOperation::SetSourceRateLimit { rate_limit } => {
                 write!(f, "SET SOURCE_RATE_LIMIT TO {}", rate_limit)
             }
+            AlterTableOperation::SetBackfillRateLimit { rate_limit } => {
+                write!(f, "SET BACKFILL_RATE_LIMIT TO {}", rate_limit)
+            }
         }
     }
 }
diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs
index d5cca61b6a186..d5a75688a4c7b 100644
--- a/src/sqlparser/src/ast/mod.rs
+++ b/src/sqlparser/src/ast/mod.rs
@@ -2477,6 +2477,71 @@ impl fmt::Display for FunctionArg {
     }
 }
 
+/// A list of function arguments, including additional modifiers like `DISTINCT` or `ORDER BY`.
+/// This basically holds all the information between the `(` and `)` in a function call.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub struct FunctionArgList {
+    /// Aggregate function calls may have a `DISTINCT`, e.g. `count(DISTINCT x)`.
+    pub distinct: bool,
+    pub args: Vec<FunctionArg>,
+    /// Whether the last argument is variadic, e.g. `foo(a, b, VARIADIC c)`.
+    pub variadic: bool,
+    /// Aggregate function calls may have an `ORDER BY`, e.g. `array_agg(x ORDER BY y)`.
+    pub order_by: Vec<OrderByExpr>,
+}
+
+impl fmt::Display for FunctionArgList {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "(")?;
+        if self.distinct {
+            write!(f, "DISTINCT ")?;
+        }
+        if self.variadic {
+            for arg in &self.args[0..self.args.len() - 1] {
+                write!(f, "{}, ", arg)?;
+            }
+            write!(f, "VARIADIC {}", self.args.last().unwrap())?;
+        } else {
+            write!(f, "{}", display_comma_separated(&self.args))?;
+        }
+        if !self.order_by.is_empty() {
+            write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?;
+        }
+        write!(f, ")")?;
+        Ok(())
+    }
+}
+
+impl FunctionArgList {
+    pub fn empty() -> Self {
+        Self {
+            distinct: false,
+            args: vec![],
+            variadic: false,
+            order_by: vec![],
+        }
+    }
+
+    pub fn args_only(args: Vec<FunctionArg>) -> Self {
+        Self {
+            distinct: false,
+            args,
+            variadic: false,
+            order_by: vec![],
+        }
+    }
+
+    pub fn for_agg(distinct: bool, args: Vec<FunctionArg>, order_by: Vec<OrderByExpr>) -> Self {
+        Self {
+            distinct,
+            args,
+            variadic: false,
+            order_by,
+        }
+    }
+}
+
 /// A function call
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -2484,14 +2549,8 @@ pub struct Function {
     /// Whether the function is prefixed with `aggregate:`
     pub scalar_as_agg: bool,
     pub name: ObjectName,
-    pub args: Vec<FunctionArg>,
-    /// whether the last argument is variadic, e.g. `foo(a, b, variadic c)`
-    pub variadic: bool,
+    pub arg_list: FunctionArgList,
     pub over: Option<WindowSpec>,
-    // aggregate functions may specify eg `COUNT(DISTINCT x)`
-    pub distinct: bool,
-    // aggregate functions may contain order_by_clause
-    pub order_by: Vec<OrderByExpr>,
     pub filter: Option<Box<Expr>>,
     pub within_group: Option<Box<OrderByExpr>>,
 }
@@ -2501,11 +2560,8 @@ impl Function {
         Self {
             scalar_as_agg: false,
             name,
-            args: vec![],
-            variadic: false,
+            arg_list: FunctionArgList::empty(),
             over: None,
-            distinct: false,
-            order_by: vec![],
             filter: None,
             within_group: None,
         }
@@ -2515,26 +2571,9 @@ impl Function {
 impl fmt::Display for Function {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         if self.scalar_as_agg {
-            write!(f, "aggregate:")?;
+            write!(f, "AGGREGATE:")?;
         }
-        write!(
-            f,
-            "{}({}",
-            self.name,
-            if self.distinct { "DISTINCT " } else { "" },
-        )?;
-        if self.variadic {
-            for arg in &self.args[0..self.args.len() - 1] {
-                write!(f, "{}, ", arg)?;
-            }
-            write!(f, "VARIADIC {}", self.args.last().unwrap())?;
-        } else {
-            write!(f, "{}", display_comma_separated(&self.args))?;
-        }
-        if !self.order_by.is_empty() {
-            write!(f, " ORDER BY {}", display_comma_separated(&self.order_by))?;
-        }
-        write!(f, ")")?;
+        write!(f, "{}{}", self.name, self.arg_list)?;
         if let Some(o) = &self.over {
             write!(f, " OVER ({})", o)?;
         }
diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs
index 996fd9ebe8490..3a0e1508f287e 100644
--- a/src/sqlparser/src/parser.rs
+++ b/src/sqlparser/src/parser.rs
@@ -822,9 +822,7 @@ impl Parser<'_> {
             false
         };
         let name = self.parse_object_name()?;
-        self.expect_token(&Token::LParen)?;
-        let distinct = self.parse_all_or_distinct()?;
-        let (args, order_by, variadic) = self.parse_optional_args()?;
+        let arg_list = self.parse_argument_list()?;
         let over = if self.parse_keyword(Keyword::OVER) {
             // TODO: support window names (`OVER mywin`) in place of inline specification
             self.expect_token(&Token::LParen)?;
@@ -879,11 +877,8 @@ impl Parser<'_> {
         Ok(Expr::Function(Function {
             scalar_as_agg,
             name,
-            args,
-            variadic,
+            arg_list,
             over,
-            distinct,
-            order_by,
             filter,
             within_group,
         }))
@@ -3092,6 +3087,8 @@ impl Parser<'_> {
                 }
             } else if let Some(rate_limit) = self.parse_alter_source_rate_limit(true)? {
                 AlterTableOperation::SetSourceRateLimit { rate_limit }
+            } else if let Some(rate_limit) = self.parse_alter_backfill_rate_limit()? {
+                AlterTableOperation::SetBackfillRateLimit { rate_limit }
             } else {
                 return self.expected("SCHEMA/PARALLELISM/SOURCE_RATE_LIMIT after SET");
             }
@@ -3632,8 +3629,7 @@ impl Parser<'_> {
         .parse_next(self)
     }
 
-    /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) and convert
-    /// into an array of that datatype if needed
+    /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
     pub fn parse_data_type(&mut self) -> PResult<DataType> {
         parser_v2::data_type(self)
     }
@@ -4664,17 +4660,21 @@ impl Parser<'_> {
             }
         } else {
             let name = self.parse_object_name()?;
-            // Postgres,table-valued functions:
-            if self.consume_token(&Token::LParen) {
-                // ignore VARIADIC here
-                let (args, order_by, _variadic) = self.parse_optional_args()?;
-                // Table-valued functions do not support ORDER BY, should return error if it appears
-                if !order_by.is_empty() {
-                    parser_err!("Table-valued functions do not support ORDER BY clauses");
+            if self.peek_token() == Token::LParen {
+                // table-valued function
+
+                let arg_list = self.parse_argument_list()?;
+                if arg_list.distinct {
+                    parser_err!("DISTINCT is not supported in table-valued function calls");
+                }
+                if !arg_list.order_by.is_empty() {
+                    parser_err!("ORDER BY is not supported in table-valued function calls");
                 }
-                let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
 
+                let args = arg_list.args;
+                let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]);
                 let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?;
+
                 Ok(TableFactor::TableFunction {
                     name,
                     alias,
@@ -4957,17 +4957,19 @@ impl Parser<'_> {
         Ok((variadic, arg))
     }
 
-    pub fn parse_optional_args(&mut self) -> PResult<(Vec<FunctionArg>, Vec<OrderByExpr>, bool)> {
+    pub fn parse_argument_list(&mut self) -> PResult<FunctionArgList> {
+        self.expect_token(&Token::LParen)?;
         if self.consume_token(&Token::RParen) {
-            Ok((vec![], vec![], false))
+            Ok(FunctionArgList::empty())
         } else {
+            let distinct = self.parse_all_or_distinct()?;
             let args = self.parse_comma_separated(Parser::parse_function_args)?;
             if args
                 .iter()
                 .take(args.len() - 1)
                 .any(|(variadic, _)| *variadic)
             {
-                parser_err!("VARIADIC argument must be last");
+                parser_err!("VARIADIC argument must be the last");
             }
             let variadic = args.last().map(|(variadic, _)| *variadic).unwrap_or(false);
             let args = args.into_iter().map(|(_, arg)| arg).collect();
@@ -4977,8 +4979,16 @@ impl Parser<'_> {
             } else {
                 vec![]
             };
+
+            let arg_list = FunctionArgList {
+                distinct,
+                args,
+                variadic,
+                order_by,
+            };
+
             self.expect_token(&Token::RParen)?;
-            Ok((args, order_by, variadic))
+            Ok(arg_list)
         }
     }
 
diff --git a/src/sqlparser/src/parser_v2/data_type.rs b/src/sqlparser/src/parser_v2/data_type.rs
index 2a66344fb0357..2544e54d1d542 100644
--- a/src/sqlparser/src/parser_v2/data_type.rs
+++ b/src/sqlparser/src/parser_v2/data_type.rs
@@ -115,6 +115,11 @@ where
 /// Consume a data type definition.
 ///
 /// The parser is the main entry point for data type parsing.
+///
+/// Note: in recursion, we should use `data_type_stateful` instead of `data_type`,
+/// otherwise the type parameter will recurse like `Stateful<Stateful<Stateful<...>>>`.
+/// Also note that we cannot use `Parser<'_>` directly to avoid misuse, because we need
+/// generics `<S>` to parameterize over `Parser<'_>` and `Stateful<Parser<'_>>`.
 pub fn data_type<S>(input: &mut S) -> PResult<DataType>
 where
     S: TokenStream,
@@ -166,6 +171,14 @@ fn data_type_stateful_inner<S>(input: &mut StatefulStream<S>) -> PResult<DataTyp
 where
     S: TokenStream,
 {
+    trace(
+        "data_type_inner",
+        alt((keyword_datatype, non_keyword_datatype)),
+    )
+    .parse_next(input)
+}
+
+fn keyword_datatype<S: TokenStream>(input: &mut StatefulStream<S>) -> PResult<DataType> {
     let with_time_zone = || {
         opt(alt((
             (Keyword::WITH, Keyword::TIME, Keyword::ZONE).value(true),
@@ -186,7 +199,7 @@ where
         })
     };
 
-    let keywords = dispatch! {keyword;
+    let mut ty = dispatch! {keyword;
         Keyword::BOOLEAN | Keyword::BOOL => empty.value(DataType::Boolean),
         Keyword::FLOAT => opt(precision_in_range(1..54)).map(DataType::Float),
         Keyword::REAL => empty.value(DataType::Real),
@@ -211,26 +224,32 @@ where
         Keyword::NUMERIC | Keyword::DECIMAL | Keyword::DEC => cut_err(precision_and_scale()).map(|(precision, scale)| {
             DataType::Decimal(precision, scale)
         }),
-        _ =>  fail,
+        _ =>  fail
     };
 
-    trace(
-        "data_type_inner",
-        alt((
-            keywords,
-            trace(
-                "non_keyword_data_type",
-                object_name.map(
-                    |name| match name.to_string().to_ascii_lowercase().as_str() {
-                        // PostgreSQL built-in data types that are not keywords.
-                        "jsonb" => DataType::Jsonb,
-                        "regclass" => DataType::Regclass,
-                        "regproc" => DataType::Regproc,
-                        _ => DataType::Custom(name),
-                    },
-                ),
-            ),
-        )),
+    ty.parse_next(input)
+}
+
+fn non_keyword_datatype<S: TokenStream>(input: &mut StatefulStream<S>) -> PResult<DataType> {
+    let type_name = object_name.parse_next(input)?;
+    match type_name.to_string().to_ascii_lowercase().as_str() {
+        // PostgreSQL built-in data types that are not keywords.
+        "jsonb" => Ok(DataType::Jsonb),
+        "regclass" => Ok(DataType::Regclass),
+        "regproc" => Ok(DataType::Regproc),
+        "map" => cut_err(map_type_arguments).parse_next(input),
+        _ => Ok(DataType::Custom(type_name)),
+    }
+}
+
+fn map_type_arguments<S: TokenStream>(input: &mut StatefulStream<S>) -> PResult<DataType> {
+    delimited(
+        Token::LParen,
+        // key is string or integral type. value is arbitrary type.
+        // We don't validate here, but in binder bind_data_type
+        seq!(keyword_datatype, _:Token::Comma, data_type_stateful),
+        Token::RParen,
     )
+    .map(|(k, v)| DataType::Map(Box::new((k, v))))
     .parse_next(input)
 }
diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs
index 049cf79482032..46486419c629a 100644
--- a/src/sqlparser/tests/sqlparser_common.rs
+++ b/src/sqlparser/tests/sqlparser_common.rs
@@ -347,11 +347,10 @@ fn parse_select_count_wildcard() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::new_unchecked("COUNT")]),
-            args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard(None))],
-            variadic: false,
+            arg_list: FunctionArgList::args_only(vec![FunctionArg::Unnamed(
+                FunctionArgExpr::Wildcard(None)
+            )]),
             over: None,
-            distinct: false,
-            order_by: vec![],
             filter: None,
             within_group: None,
         }),
@@ -367,14 +366,15 @@ fn parse_select_count_distinct() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::new_unchecked("COUNT")]),
-            args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp {
-                op: UnaryOperator::Plus,
-                expr: Box::new(Expr::Identifier(Ident::new_unchecked("x"))),
-            }))],
-            variadic: false,
+            arg_list: FunctionArgList::for_agg(
+                true,
+                vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::UnaryOp {
+                    op: UnaryOperator::Plus,
+                    expr: Box::new(Expr::Identifier(Ident::new_unchecked("x"))),
+                }))],
+                vec![]
+            ),
             over: None,
-            distinct: true,
-            order_by: vec![],
             filter: None,
             within_group: None,
         }),
@@ -1166,11 +1166,10 @@ fn parse_select_having() {
             left: Box::new(Expr::Function(Function {
                 scalar_as_agg: false,
                 name: ObjectName(vec![Ident::new_unchecked("COUNT")]),
-                args: vec![FunctionArg::Unnamed(FunctionArgExpr::Wildcard(None))],
-                variadic: false,
+                arg_list: FunctionArgList::args_only(vec![FunctionArg::Unnamed(
+                    FunctionArgExpr::Wildcard(None)
+                )]),
                 over: None,
-                distinct: false,
-                order_by: vec![],
                 filter: None,
                 within_group: None,
             })),
@@ -1908,7 +1907,7 @@ fn parse_named_argument_function() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::new_unchecked("FUN")]),
-            args: vec![
+            arg_list: FunctionArgList::args_only(vec![
                 FunctionArg::Named {
                     name: Ident::new_unchecked("a"),
                     arg: FunctionArgExpr::Expr(Expr::Value(Value::SingleQuotedString(
@@ -1921,11 +1920,8 @@ fn parse_named_argument_function() {
                         "2".to_owned()
                     ))),
                 },
-            ],
-            variadic: false,
+            ]),
             over: None,
-            distinct: false,
-            order_by: vec![],
             filter: None,
             within_group: None,
         }),
@@ -1951,8 +1947,7 @@ fn parse_window_functions() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::new_unchecked("row_number")]),
-            args: vec![],
-            variadic: false,
+            arg_list: FunctionArgList::empty(),
             over: Some(WindowSpec {
                 partition_by: vec![],
                 order_by: vec![OrderByExpr {
@@ -1962,8 +1957,6 @@ fn parse_window_functions() {
                 }],
                 window_frame: None,
             }),
-            distinct: false,
-            order_by: vec![],
             filter: None,
             within_group: None,
         }),
@@ -1986,29 +1979,30 @@ fn parse_aggregate_with_order_by() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::new_unchecked("STRING_AGG")]),
-            args: vec![
-                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
-                    Ident::new_unchecked("a")
-                ))),
-                FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
-                    Ident::new_unchecked("b")
-                ))),
-            ],
-            variadic: false,
+            arg_list: FunctionArgList::for_agg(
+                false,
+                vec![
+                    FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
+                        Ident::new_unchecked("a")
+                    ))),
+                    FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier(
+                        Ident::new_unchecked("b")
+                    ))),
+                ],
+                vec![
+                    OrderByExpr {
+                        expr: Expr::Identifier(Ident::new_unchecked("b")),
+                        asc: Some(true),
+                        nulls_first: None,
+                    },
+                    OrderByExpr {
+                        expr: Expr::Identifier(Ident::new_unchecked("a")),
+                        asc: Some(false),
+                        nulls_first: None,
+                    }
+                ]
+            ),
             over: None,
-            distinct: false,
-            order_by: vec![
-                OrderByExpr {
-                    expr: Expr::Identifier(Ident::new_unchecked("b")),
-                    asc: Some(true),
-                    nulls_first: None,
-                },
-                OrderByExpr {
-                    expr: Expr::Identifier(Ident::new_unchecked("a")),
-                    asc: Some(false),
-                    nulls_first: None,
-                }
-            ],
             filter: None,
             within_group: None,
         }),
@@ -2024,13 +2018,10 @@ fn parse_aggregate_with_filter() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::new_unchecked("sum")]),
-            args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
-                Expr::Identifier(Ident::new_unchecked("a"))
-            )),],
-            variadic: false,
+            arg_list: FunctionArgList::args_only(vec![FunctionArg::Unnamed(
+                FunctionArgExpr::Expr(Expr::Identifier(Ident::new_unchecked("a")))
+            )]),
             over: None,
-            distinct: false,
-            order_by: vec![],
             filter: Some(Box::new(Expr::BinaryOp {
                 left: Box::new(Expr::Nested(Box::new(Expr::BinaryOp {
                     left: Box::new(Expr::Identifier(Ident::new_unchecked("a"))),
@@ -2282,11 +2273,8 @@ fn parse_delimited_identifiers() {
         &Expr::Function(Function {
             scalar_as_agg: false,
             name: ObjectName(vec![Ident::with_quote_unchecked('"', "myfun")]),
-            args: vec![],
-            variadic: false,
+            arg_list: FunctionArgList::empty(),
             over: None,
-            distinct: false,
-            order_by: vec![],
             filter: None,
             within_group: None,
         }),
diff --git a/src/sqlparser/tests/sqlparser_postgres.rs b/src/sqlparser/tests/sqlparser_postgres.rs
index f3088f47750c4..311b2ba213c45 100644
--- a/src/sqlparser/tests/sqlparser_postgres.rs
+++ b/src/sqlparser/tests/sqlparser_postgres.rs
@@ -1291,5 +1291,5 @@ fn parse_variadic_argument() {
     assert!(parse_sql_statements(sql)
         .unwrap_err()
         .to_string()
-        .contains("VARIADIC argument must be last"),);
+        .contains("VARIADIC argument must be the last"),);
 }
diff --git a/src/sqlparser/tests/testdata/create.yaml b/src/sqlparser/tests/testdata/create.yaml
index 4add7cde71a07..317130b79d5e6 100644
--- a/src/sqlparser/tests/testdata/create.yaml
+++ b/src/sqlparser/tests/testdata/create.yaml
@@ -36,12 +36,12 @@
     sql parser error: expected TABLE, found: EOF
     LINE 1: CREATE TABLE sbtest10 (id INT PRIMARY KEY, k INT, c CHARACTER VARYING, pad CHARACTER VARYING) FROM sbtest
                                                                                                                      ^
-- input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.location = 'file://')
-  formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.location = 'file://')
-  formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "location", quote_style: None }]), value: SingleQuotedString("file://") }], key_encode: None }), source_watermarks: [], include_column_options: [] } }'
-- input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://')
-  formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.servers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://')
-  formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "servers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }], key_encode: None }), source_watermarks: [], include_column_options: [] } }'
+- input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.location = 'file://')
+  formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.location = 'file://')
+  formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "brokers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "location", quote_style: None }]), value: SingleQuotedString("file://") }], key_encode: None }), source_watermarks: [], include_column_options: [] } }'
+- input: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://')
+  formatted_sql: CREATE SOURCE IF NOT EXISTS src WITH (kafka.topic = 'abc', kafka.brokers = 'localhost:1001') FORMAT PLAIN ENCODE PROTOBUF (message = 'Foo', schema.registry = 'http://')
+  formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: true, columns: [], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "src", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "topic", quote_style: None }]), value: SingleQuotedString("abc") }, SqlOption { name: ObjectName([Ident { value: "kafka", quote_style: None }, Ident { value: "brokers", quote_style: None }]), value: SingleQuotedString("localhost:1001") }]), source_schema: V2(ConnectorSchema { format: Plain, row_encode: Protobuf, row_options: [SqlOption { name: ObjectName([Ident { value: "message", quote_style: None }]), value: SingleQuotedString("Foo") }, SqlOption { name: ObjectName([Ident { value: "schema", quote_style: None }, Ident { value: "registry", quote_style: None }]), value: SingleQuotedString("http://") }], key_encode: None }), source_watermarks: [], include_column_options: [] } }'
 - input: CREATE SOURCE bid (auction INTEGER, bidder INTEGER, price INTEGER, WATERMARK FOR auction AS auction - 1, "date_time" TIMESTAMP) with (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12',  nexmark.min.event.gap.in.ns = '0')
   formatted_sql: CREATE SOURCE bid (auction INT, bidder INT, price INT, "date_time" TIMESTAMP, WATERMARK FOR auction AS auction - 1) WITH (connector = 'nexmark', nexmark.table.type = 'Bid', nexmark.split.num = '12', nexmark.min.event.gap.in.ns = '0') FORMAT NATIVE ENCODE NATIVE
   formatted_ast: 'CreateSource { stmt: CreateSourceStatement { if_not_exists: false, columns: [ColumnDef { name: Ident { value: "auction", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "bidder", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "price", quote_style: None }, data_type: Some(Int), collation: None, options: [] }, ColumnDef { name: Ident { value: "date_time", quote_style: Some(''"'') }, data_type: Some(Timestamp(false)), collation: None, options: [] }], wildcard_idx: None, constraints: [], source_name: ObjectName([Ident { value: "bid", quote_style: None }]), with_properties: WithProperties([SqlOption { name: ObjectName([Ident { value: "connector", quote_style: None }]), value: SingleQuotedString("nexmark") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "table", quote_style: None }, Ident { value: "type", quote_style: None }]), value: SingleQuotedString("Bid") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "split", quote_style: None }, Ident { value: "num", quote_style: None }]), value: SingleQuotedString("12") }, SqlOption { name: ObjectName([Ident { value: "nexmark", quote_style: None }, Ident { value: "min", quote_style: None }, Ident { value: "event", quote_style: None }, Ident { value: "gap", quote_style: None }, Ident { value: "in", quote_style: None }, Ident { value: "ns", quote_style: None }]), value: SingleQuotedString("0") }]), source_schema: V2(ConnectorSchema { format: Native, row_encode: Native, row_options: [], key_encode: None }), source_watermarks: [SourceWatermark { column: Ident { value: "auction", quote_style: None }, expr: BinaryOp { left: Identifier(Ident { value: "auction", quote_style: None }), op: Minus, right: Value(Number("1")) } }], include_column_options: [] } }'
diff --git a/src/sqlparser/tests/testdata/lambda.yaml b/src/sqlparser/tests/testdata/lambda.yaml
index ae3f650d73d44..04d94baf5060c 100644
--- a/src/sqlparser/tests/testdata/lambda.yaml
+++ b/src/sqlparser/tests/testdata/lambda.yaml
@@ -1,10 +1,10 @@
 # This file is automatically generated by `src/sqlparser/tests/parser_test.rs`.
 - input: select array_transform(array[1,2,3], |x| x * 2)
   formatted_sql: SELECT array_transform(ARRAY[1, 2, 3], |x| x * 2)
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "array_transform", quote_style: None }]), args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("3"))], named: true }))), Unnamed(Expr(LambdaFunction { args: [Ident { value: "x", quote_style: None }], body: BinaryOp { left: Identifier(Ident { value: "x", quote_style: None }), op: Multiply, right: Value(Number("2")) } }))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "array_transform", quote_style: None }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("3"))], named: true }))), Unnamed(Expr(LambdaFunction { args: [Ident { value: "x", quote_style: None }], body: BinaryOp { left: Identifier(Ident { value: "x", quote_style: None }), op: Multiply, right: Value(Number("2")) } }))], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select array_transform(array[], |s| case when s ilike 'apple%' then 'apple' when s ilike 'google%' then 'google' else 'unknown' end)
   formatted_sql: SELECT array_transform(ARRAY[], |s| CASE WHEN s ILIKE 'apple%' THEN 'apple' WHEN s ILIKE 'google%' THEN 'google' ELSE 'unknown' END)
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "array_transform", quote_style: None }]), args: [Unnamed(Expr(Array(Array { elem: [], named: true }))), Unnamed(Expr(LambdaFunction { args: [Ident { value: "s", quote_style: None }], body: Case { operand: None, conditions: [ILike { negated: false, expr: Identifier(Ident { value: "s", quote_style: None }), pattern: Value(SingleQuotedString("apple%")), escape_char: None }, ILike { negated: false, expr: Identifier(Ident { value: "s", quote_style: None }), pattern: Value(SingleQuotedString("google%")), escape_char: None }], results: [Value(SingleQuotedString("apple")), Value(SingleQuotedString("google"))], else_result: Some(Value(SingleQuotedString("unknown"))) } }))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "array_transform", quote_style: None }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(Array(Array { elem: [], named: true }))), Unnamed(Expr(LambdaFunction { args: [Ident { value: "s", quote_style: None }], body: Case { operand: None, conditions: [ILike { negated: false, expr: Identifier(Ident { value: "s", quote_style: None }), pattern: Value(SingleQuotedString("apple%")), escape_char: None }, ILike { negated: false, expr: Identifier(Ident { value: "s", quote_style: None }), pattern: Value(SingleQuotedString("google%")), escape_char: None }], results: [Value(SingleQuotedString("apple")), Value(SingleQuotedString("google"))], else_result: Some(Value(SingleQuotedString("unknown"))) } }))], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select array_transform(array[], |x, y| x + y * 2)
   formatted_sql: SELECT array_transform(ARRAY[], |x, y| x + y * 2)
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "array_transform", quote_style: None }]), args: [Unnamed(Expr(Array(Array { elem: [], named: true }))), Unnamed(Expr(LambdaFunction { args: [Ident { value: "x", quote_style: None }, Ident { value: "y", quote_style: None }], body: BinaryOp { left: Identifier(Ident { value: "x", quote_style: None }), op: Plus, right: BinaryOp { left: Identifier(Ident { value: "y", quote_style: None }), op: Multiply, right: Value(Number("2")) } } }))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "array_transform", quote_style: None }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(Array(Array { elem: [], named: true }))), Unnamed(Expr(LambdaFunction { args: [Ident { value: "x", quote_style: None }, Ident { value: "y", quote_style: None }], body: BinaryOp { left: Identifier(Ident { value: "x", quote_style: None }), op: Plus, right: BinaryOp { left: Identifier(Ident { value: "y", quote_style: None }), op: Multiply, right: Value(Number("2")) } } }))], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
diff --git a/src/sqlparser/tests/testdata/qualified_operator.yaml b/src/sqlparser/tests/testdata/qualified_operator.yaml
index 83f8e885989c7..6d856d181250c 100644
--- a/src/sqlparser/tests/testdata/qualified_operator.yaml
+++ b/src/sqlparser/tests/testdata/qualified_operator.yaml
@@ -19,10 +19,10 @@
   formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Identifier(Ident { value: "operator", quote_style: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select "operator"(foo.bar);
   formatted_sql: SELECT "operator"(foo.bar)
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "operator", quote_style: Some(''"'') }]), args: [Unnamed(Expr(CompoundIdentifier([Ident { value: "foo", quote_style: None }, Ident { value: "bar", quote_style: None }])))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "operator", quote_style: Some(''"'') }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(CompoundIdentifier([Ident { value: "foo", quote_style: None }, Ident { value: "bar", quote_style: None }])))], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select operator operator(+) operator(+) "operator"(9) operator from operator;
   formatted_sql: SELECT operator OPERATOR(+) OPERATOR(+) "operator"(9) AS operator FROM operator
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [ExprWithAlias { expr: BinaryOp { left: Identifier(Ident { value: "operator", quote_style: None }), op: PGQualified(QualifiedOperator { schema: None, name: "+" }), right: UnaryOp { op: PGQualified(QualifiedOperator { schema: None, name: "+" }), expr: Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "operator", quote_style: Some(''"'') }]), args: [Unnamed(Expr(Value(Number("9"))))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }) } }, alias: Ident { value: "operator", quote_style: None } }], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "operator", quote_style: None }]), alias: None, as_of: None }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [ExprWithAlias { expr: BinaryOp { left: Identifier(Ident { value: "operator", quote_style: None }), op: PGQualified(QualifiedOperator { schema: None, name: "+" }), right: UnaryOp { op: PGQualified(QualifiedOperator { schema: None, name: "+" }), expr: Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "operator", quote_style: Some(''"'') }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(Value(Number("9"))))], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }) } }, alias: Ident { value: "operator", quote_style: None } }], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "operator", quote_style: None }]), alias: None, as_of: None }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select 3 operator(-) 2 - 1;
   formatted_sql: SELECT 3 OPERATOR(-) 2 - 1
   formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(BinaryOp { left: Value(Number("3")), op: PGQualified(QualifiedOperator { schema: None, name: "-" }), right: BinaryOp { left: Value(Number("2")), op: Minus, right: Value(Number("1")) } })], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
diff --git a/src/sqlparser/tests/testdata/select.yaml b/src/sqlparser/tests/testdata/select.yaml
index 06c12a3e7d554..e333ba3caf8ec 100644
--- a/src/sqlparser/tests/testdata/select.yaml
+++ b/src/sqlparser/tests/testdata/select.yaml
@@ -1,7 +1,7 @@
 # This file is automatically generated by `src/sqlparser/tests/parser_test.rs`.
 - input: SELECT sqrt(id) FROM foo
   formatted_sql: SELECT sqrt(id) FROM foo
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "sqrt", quote_style: None }]), args: [Unnamed(Expr(Identifier(Ident { value: "id", quote_style: None })))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }))], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "foo", quote_style: None }]), alias: None, as_of: None }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "sqrt", quote_style: None }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(Identifier(Ident { value: "id", quote_style: None })))], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }))], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "foo", quote_style: None }]), alias: None, as_of: None }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: SELECT INT '1'
   formatted_sql: SELECT INT '1'
 - input: SELECT (foo).v1.v2 FROM foo
@@ -99,7 +99,7 @@
   formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(AtTimeZone { timestamp: TypedString { data_type: Timestamp(true), value: "2022-10-01 12:00:00Z" }, time_zone: Identifier(Ident { value: "zone", quote_style: None }) })], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: SELECT now() + INTERVAL '14 days' AT TIME ZONE 'UTC'; -- https://www.postgresql.org/message-id/CADT4RqBPdbsZW7HS1jJP319TMRHs1hzUiP=iRJYR6UqgHCrgNQ@mail.gmail.com
   formatted_sql: SELECT now() + INTERVAL '14 days' AT TIME ZONE 'UTC'
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(BinaryOp { left: Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "now", quote_style: None }]), args: [], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: None }), op: Plus, right: AtTimeZone { timestamp: Value(Interval { value: "14 days", leading_field: None, leading_precision: None, last_field: None, fractional_seconds_precision: None }), time_zone: Value(SingleQuotedString("UTC")) } })], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(BinaryOp { left: Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "now", quote_style: None }]), arg_list: FunctionArgList { distinct: false, args: [], variadic: false, order_by: [] }, over: None, filter: None, within_group: None }), op: Plus, right: AtTimeZone { timestamp: Value(Interval { value: "14 days", leading_field: None, leading_precision: None, last_field: None, fractional_seconds_precision: None }), time_zone: Value(SingleQuotedString("UTC")) } })], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: SELECT c FROM t WHERE c >= '2019-03-27T22:00:00.000Z'::timestamp AT TIME ZONE 'Europe/Brussels'; -- https://github.com/sqlparser-rs/sqlparser-rs/issues/1266
   formatted_sql: SELECT c FROM t WHERE c >= CAST('2019-03-27T22:00:00.000Z' AS TIMESTAMP) AT TIME ZONE 'Europe/Brussels'
   formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Identifier(Ident { value: "c", quote_style: None }))], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "t", quote_style: None }]), alias: None, as_of: None }, joins: [] }], lateral_views: [], selection: Some(BinaryOp { left: Identifier(Ident { value: "c", quote_style: None }), op: GtEq, right: AtTimeZone { timestamp: Cast { expr: Value(SingleQuotedString("2019-03-27T22:00:00.000Z")), data_type: Timestamp(false) }, time_zone: Value(SingleQuotedString("Europe/Brussels")) } }), group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
@@ -173,7 +173,7 @@
   formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Identifier(Ident { value: "id1", quote_style: None })), UnnamedExpr(Identifier(Ident { value: "a1", quote_style: None })), UnnamedExpr(Identifier(Ident { value: "id2", quote_style: None })), UnnamedExpr(Identifier(Ident { value: "a2", quote_style: None }))], from: [TableWithJoins { relation: Table { name: ObjectName([Ident { value: "stream", quote_style: None }]), alias: Some(TableAlias { name: Ident { value: "S", quote_style: None }, columns: [] }), as_of: None }, joins: [Join { relation: Table { name: ObjectName([Ident { value: "version", quote_style: None }]), alias: Some(TableAlias { name: Ident { value: "V", quote_style: None }, columns: [] }), as_of: Some(ProcessTime) }, join_operator: Inner(On(BinaryOp { left: Identifier(Ident { value: "id1", quote_style: None }), op: Eq, right: Identifier(Ident { value: "id2", quote_style: None }) })) }] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select percentile_cont(0.3) within group (order by x desc) from unnest(array[1,2,4,5,10]) as x
   formatted_sql: SELECT percentile_cont(0.3) FROM unnest(ARRAY[1, 2, 4, 5, 10]) AS x
-  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "percentile_cont", quote_style: None }]), args: [Unnamed(Expr(Value(Number("0.3"))))], variadic: false, over: None, distinct: false, order_by: [], filter: None, within_group: Some(OrderByExpr { expr: Identifier(Ident { value: "x", quote_style: None }), asc: Some(false), nulls_first: None }) }))], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "unnest", quote_style: None }]), alias: Some(TableAlias { name: Ident { value: "x", quote_style: None }, columns: [] }), args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("4")), Value(Number("5")), Value(Number("10"))], named: true })))], with_ordinality: false }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
+  formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Function(Function { scalar_as_agg: false, name: ObjectName([Ident { value: "percentile_cont", quote_style: None }]), arg_list: FunctionArgList { distinct: false, args: [Unnamed(Expr(Value(Number("0.3"))))], variadic: false, order_by: [] }, over: None, filter: None, within_group: Some(OrderByExpr { expr: Identifier(Ident { value: "x", quote_style: None }), asc: Some(false), nulls_first: None }) }))], from: [TableWithJoins { relation: TableFunction { name: ObjectName([Ident { value: "unnest", quote_style: None }]), alias: Some(TableAlias { name: Ident { value: "x", quote_style: None }, columns: [] }), args: [Unnamed(Expr(Array(Array { elem: [Value(Number("1")), Value(Number("2")), Value(Number("4")), Value(Number("5")), Value(Number("10"))], named: true })))], with_ordinality: false }, joins: [] }], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
 - input: select percentile_cont(0.3) within group (order by x, y desc) from t
   error_msg: |-
     sql parser error: expected ), found: ,
diff --git a/src/storage/Cargo.toml b/src/storage/Cargo.toml
index b49a625111d37..2886c4e4e23f7 100644
--- a/src/storage/Cargo.toml
+++ b/src/storage/Cargo.toml
@@ -106,7 +106,7 @@ fiemap = "0.1.1"
 [features]
 # rocksdb-local = ["rocksdb"]
 # tikv = ["tikv-client"]
-test = []
+test = ["risingwave_hummock_sdk/test"]
 failpoints = ["fail/failpoints"]
 bpf = []
 hm-trace = []
diff --git a/src/storage/backup/src/lib.rs b/src/storage/backup/src/lib.rs
index 91d3f6c77ca58..8dfba1b62a181 100644
--- a/src/storage/backup/src/lib.rs
+++ b/src/storage/backup/src/lib.rs
@@ -75,7 +75,7 @@ impl MetaSnapshotMetadata {
             id,
             hummock_version_id: v.id,
             ssts: v.get_object_ids(),
-            max_committed_epoch: v.max_committed_epoch,
+            max_committed_epoch: v.visible_table_committed_epoch(),
             safe_epoch: v.visible_table_safe_epoch(),
             format_version,
             remarks,
@@ -122,7 +122,7 @@ impl From<&MetaSnapshotMetadata> for PbMetaSnapshotMetadata {
             state_table_info: m
                 .state_table_info
                 .iter()
-                .map(|(t, i)| (t.table_id, i.clone()))
+                .map(|(t, i)| (t.table_id, *i))
                 .collect(),
             rw_version: m.rw_version.clone(),
         }
diff --git a/src/storage/backup/src/meta_snapshot_v2.rs b/src/storage/backup/src/meta_snapshot_v2.rs
index bec07a80cf19d..e7dbc92eae23a 100644
--- a/src/storage/backup/src/meta_snapshot_v2.rs
+++ b/src/storage/backup/src/meta_snapshot_v2.rs
@@ -128,7 +128,8 @@ impl Display for MetadataV2 {
         writeln!(
             f,
             "Hummock version: id {}, max_committed_epoch: {}",
-            self.hummock_version.id, self.hummock_version.max_committed_epoch
+            self.hummock_version.id,
+            self.hummock_version.visible_table_committed_epoch()
         )?;
         // optionally dump other metadata
         Ok(())
diff --git a/src/storage/benches/bench_table_watermarks.rs b/src/storage/benches/bench_table_watermarks.rs
index 96fda84629656..4a9e1c5edda0b 100644
--- a/src/storage/benches/bench_table_watermarks.rs
+++ b/src/storage/benches/bench_table_watermarks.rs
@@ -28,8 +28,8 @@ use risingwave_hummock_sdk::table_watermark::{
     TableWatermarks, TableWatermarksIndex, VnodeWatermark, WatermarkDirection,
 };
 use risingwave_hummock_sdk::version::{HummockVersion, HummockVersionStateTableInfo};
-use risingwave_hummock_sdk::{HummockEpoch, HummockVersionId};
-use risingwave_pb::hummock::StateTableInfoDelta;
+use risingwave_hummock_sdk::HummockEpoch;
+use risingwave_pb::hummock::{PbHummockVersion, StateTableInfoDelta};
 use risingwave_storage::hummock::local_version::pinned_version::PinnedVersion;
 use spin::Mutex;
 use tokio::sync::mpsc::unbounded_channel;
@@ -115,10 +115,12 @@ fn gen_version(
         new_epoch_idx,
         vnode_part_count,
     ));
-    let mut version = HummockVersion::default();
     let committed_epoch = test_epoch(new_epoch_idx as _);
-    version.id = HummockVersionId::new(new_epoch_idx as _);
-    version.max_committed_epoch = committed_epoch;
+    let mut version = HummockVersion::from_persisted_protobuf(&PbHummockVersion {
+        id: new_epoch_idx as _,
+        max_committed_epoch: committed_epoch,
+        ..Default::default()
+    });
     version.table_watermarks = (0..table_count)
         .map(|table_id| (TableId::new(table_id as _), table_watermarks.clone()))
         .collect();
diff --git a/src/storage/hummock_sdk/Cargo.toml b/src/storage/hummock_sdk/Cargo.toml
index fa080b90b7469..79d596bb3121b 100644
--- a/src/storage/hummock_sdk/Cargo.toml
+++ b/src/storage/hummock_sdk/Cargo.toml
@@ -30,5 +30,8 @@ tracing = "0.1"
 [target.'cfg(not(madsim))'.dependencies]
 workspace-hack = { path = "../../workspace-hack" }
 
+[features]
+test = []
+
 [lints]
 workspace = true
diff --git a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs
index 3720afc12ff29..1ee4fe0443783 100644
--- a/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs
+++ b/src/storage/hummock_sdk/src/compaction_group/hummock_version_ext.rs
@@ -85,7 +85,7 @@ pub fn summarize_group_deltas(group_deltas: &GroupDeltas) -> GroupDeltasSummary
             }
             GroupDelta::GroupDestroy(destroy_delta) => {
                 assert!(group_destroy.is_none());
-                group_destroy = Some(destroy_delta.clone());
+                group_destroy = Some(*destroy_delta);
             }
             GroupDelta::GroupMetaChange(meta_delta) => {
                 group_meta_changes.push(meta_delta.clone());
@@ -556,7 +556,7 @@ impl HummockVersion {
     pub fn apply_version_delta(&mut self, version_delta: &HummockVersionDelta) {
         assert_eq!(self.id, version_delta.prev_id);
 
-        let changed_table_info = self.state_table_info.apply_delta(
+        let (changed_table_info, is_commit_epoch) = self.state_table_info.apply_delta(
             &version_delta.state_table_info_delta,
             &version_delta.removed_table_ids,
         );
@@ -630,6 +630,7 @@ impl HummockVersion {
                     .append(&mut moving_tables);
             }
             let has_destroy = summary.group_destroy.is_some();
+            let visible_table_committed_epoch = self.visible_table_committed_epoch();
             let levels = self
                 .levels
                 .get_mut(compaction_group_id)
@@ -647,12 +648,12 @@ impl HummockVersion {
             }
 
             assert!(
-                self.max_committed_epoch <= version_delta.max_committed_epoch,
+                visible_table_committed_epoch <= version_delta.visible_table_committed_epoch(),
                 "new max commit epoch {} is older than the current max commit epoch {}",
-                version_delta.max_committed_epoch,
-                self.max_committed_epoch
+                version_delta.visible_table_committed_epoch(),
+                visible_table_committed_epoch
             );
-            if self.max_committed_epoch < version_delta.max_committed_epoch {
+            if is_commit_epoch {
                 // `max_committed_epoch` increases. It must be a `commit_epoch`
                 let GroupDeltasSummary {
                     delete_sst_levels,
@@ -700,7 +701,7 @@ impl HummockVersion {
             }
         }
         self.id = version_delta.id;
-        self.max_committed_epoch = version_delta.max_committed_epoch;
+        self.set_max_committed_epoch(version_delta.visible_table_committed_epoch());
         self.set_safe_epoch(version_delta.visible_table_safe_epoch());
 
         // apply to table watermark
@@ -790,7 +791,7 @@ impl HummockVersion {
             if !contains {
                 warn!(
                         ?table_id,
-                        max_committed_epoch = version_delta.max_committed_epoch,
+                        max_committed_epoch = version_delta.visible_table_committed_epoch(),
                         "table change log dropped due to no further change log at newly committed epoch",
                     );
             }
@@ -1223,11 +1224,11 @@ pub fn validate_version(version: &HummockVersion) -> Vec<String> {
     let mut res = Vec::new();
 
     // Ensure safe_epoch <= max_committed_epoch
-    if version.visible_table_safe_epoch() > version.max_committed_epoch {
+    if version.visible_table_safe_epoch() > version.visible_table_committed_epoch() {
         res.push(format!(
             "VERSION: safe_epoch {} > max_committed_epoch {}",
             version.visible_table_safe_epoch(),
-            version.max_committed_epoch
+            version.visible_table_committed_epoch()
         ));
     }
 
diff --git a/src/storage/hummock_sdk/src/table_watermark.rs b/src/storage/hummock_sdk/src/table_watermark.rs
index 2de3fd1b69427..250e9014a1d36 100644
--- a/src/storage/hummock_sdk/src/table_watermark.rs
+++ b/src/storage/hummock_sdk/src/table_watermark.rs
@@ -1155,7 +1155,7 @@ mod tests {
         );
 
         let mut version = HummockVersion::default();
-        version.max_committed_epoch = EPOCH1;
+        version.set_max_committed_epoch(EPOCH1);
         let test_table_id = TableId::from(233);
         version.table_watermarks.insert(
             test_table_id,
diff --git a/src/storage/hummock_sdk/src/time_travel.rs b/src/storage/hummock_sdk/src/time_travel.rs
index 1756353de74af..5894ed3e4a6e9 100644
--- a/src/storage/hummock_sdk/src/time_travel.rs
+++ b/src/storage/hummock_sdk/src/time_travel.rs
@@ -36,7 +36,7 @@ use crate::{CompactionGroupId, HummockSstableId, HummockVersionId};
 pub struct IncompleteHummockVersion {
     pub id: HummockVersionId,
     pub levels: HashMap<CompactionGroupId, Levels>,
-    pub max_committed_epoch: u64,
+    max_committed_epoch: u64,
     safe_epoch: u64,
     pub table_watermarks: HashMap<TableId, Arc<TableWatermarks>>,
     pub table_change_log: HashMap<TableId, TableChangeLog>,
@@ -211,7 +211,7 @@ impl From<(&HummockVersion, &HashSet<CompactionGroupId>)> for IncompleteHummockV
                     }
                 })
                 .collect(),
-            max_committed_epoch: version.max_committed_epoch,
+            max_committed_epoch: version.visible_table_committed_epoch(),
             safe_epoch: version.visible_table_safe_epoch(),
             table_watermarks: version.table_watermarks.clone(),
             // TODO: optimization: strip table change log
@@ -294,7 +294,7 @@ impl From<(&HummockVersionDelta, &HashSet<CompactionGroupId>)> for IncompleteHum
                     }
                 })
                 .collect(),
-            max_committed_epoch: delta.max_committed_epoch,
+            max_committed_epoch: delta.visible_table_committed_epoch(),
             safe_epoch: delta.visible_table_safe_epoch(),
             trivial_move: delta.trivial_move,
             new_table_watermarks: delta.new_table_watermarks.clone(),
@@ -338,7 +338,7 @@ impl IncompleteHummockVersionDelta {
             state_table_info_delta: self
                 .state_table_info_delta
                 .iter()
-                .map(|(table_id, delta)| (table_id.table_id, delta.clone()))
+                .map(|(table_id, delta)| (table_id.table_id, *delta))
                 .collect(),
         }
     }
diff --git a/src/storage/hummock_sdk/src/version.rs b/src/storage/hummock_sdk/src/version.rs
index 5e6ea2f3fe4a7..e418250f0b6bf 100644
--- a/src/storage/hummock_sdk/src/version.rs
+++ b/src/storage/hummock_sdk/src/version.rs
@@ -77,7 +77,7 @@ impl HummockVersionStateTableInfo {
     pub fn from_protobuf(state_table_info: &HashMap<u32, PbStateTableInfo>) -> Self {
         let state_table_info = state_table_info
             .iter()
-            .map(|(table_id, info)| (TableId::new(*table_id), info.clone()))
+            .map(|(table_id, info)| (TableId::new(*table_id), *info))
             .collect();
         let compaction_group_member_tables =
             Self::build_compaction_group_member_tables(&state_table_info);
@@ -90,7 +90,7 @@ impl HummockVersionStateTableInfo {
     pub fn to_protobuf(&self) -> HashMap<u32, PbStateTableInfo> {
         self.state_table_info
             .iter()
-            .map(|(table_id, info)| (table_id.table_id, info.clone()))
+            .map(|(table_id, info)| (table_id.table_id, *info))
             .collect()
     }
 
@@ -98,8 +98,9 @@ impl HummockVersionStateTableInfo {
         &mut self,
         delta: &HashMap<TableId, StateTableInfoDelta>,
         removed_table_id: &HashSet<TableId>,
-    ) -> HashMap<TableId, Option<StateTableInfo>> {
+    ) -> (HashMap<TableId, Option<StateTableInfo>>, bool) {
         let mut changed_table = HashMap::new();
+        let mut has_bumped_committed_epoch = false;
         fn remove_table_from_compaction_group(
             compaction_group_member_tables: &mut HashMap<CompactionGroupId, BTreeSet<TableId>>,
             compaction_group_id: CompactionGroupId,
@@ -150,6 +151,9 @@ impl HummockVersionStateTableInfo {
                         prev_info,
                         new_info
                     );
+                    if new_info.committed_epoch > prev_info.committed_epoch {
+                        has_bumped_committed_epoch = true;
+                    }
                     if prev_info.compaction_group_id != new_info.compaction_group_id {
                         // table moved to another compaction group
                         remove_table_from_compaction_group(
@@ -172,6 +176,7 @@ impl HummockVersionStateTableInfo {
                         .entry(new_info.compaction_group_id)
                         .or_default()
                         .insert(*table_id));
+                    has_bumped_committed_epoch = true;
                     entry.insert(new_info);
                     changed_table.insert(*table_id, None);
                 }
@@ -181,7 +186,7 @@ impl HummockVersionStateTableInfo {
             self.compaction_group_member_tables,
             Self::build_compaction_group_member_tables(&self.state_table_info)
         );
-        changed_table
+        (changed_table, has_bumped_committed_epoch)
     }
 
     pub fn info(&self) -> &HashMap<TableId, StateTableInfo> {
@@ -207,7 +212,7 @@ impl HummockVersionStateTableInfo {
 pub struct HummockVersion {
     pub id: HummockVersionId,
     pub levels: HashMap<CompactionGroupId, Levels>,
-    pub max_committed_epoch: u64,
+    max_committed_epoch: u64,
     safe_epoch: u64,
     pub table_watermarks: HashMap<TableId, Arc<TableWatermarks>>,
     pub table_change_log: HashMap<TableId, TableChangeLog>,
@@ -396,6 +401,19 @@ impl HummockVersion {
         self.safe_epoch
     }
 
+    pub(crate) fn set_max_committed_epoch(&mut self, max_committed_epoch: u64) {
+        self.max_committed_epoch = max_committed_epoch;
+    }
+
+    #[cfg(any(test, feature = "test"))]
+    pub fn max_committed_epoch(&self) -> u64 {
+        self.max_committed_epoch
+    }
+
+    pub fn visible_table_committed_epoch(&self) -> u64 {
+        self.max_committed_epoch
+    }
+
     pub fn create_init_version(default_compaction_config: Arc<CompactionConfig>) -> HummockVersion {
         let mut init_version = HummockVersion {
             id: FIRST_VERSION_ID,
@@ -439,7 +457,7 @@ pub struct HummockVersionDelta {
     pub id: HummockVersionId,
     pub prev_id: HummockVersionId,
     pub group_deltas: HashMap<CompactionGroupId, GroupDeltas>,
-    pub max_committed_epoch: u64,
+    max_committed_epoch: u64,
     safe_epoch: u64,
     pub trivial_move: bool,
     pub new_table_watermarks: HashMap<TableId, TableWatermarks>,
@@ -570,6 +588,14 @@ impl HummockVersionDelta {
     pub fn set_safe_epoch(&mut self, safe_epoch: u64) {
         self.safe_epoch = safe_epoch;
     }
+
+    pub fn visible_table_committed_epoch(&self) -> u64 {
+        self.max_committed_epoch
+    }
+
+    pub fn set_max_committed_epoch(&mut self, max_committed_epoch: u64) {
+        self.max_committed_epoch = max_committed_epoch;
+    }
 }
 
 impl From<&PbHummockVersionDelta> for HummockVersionDelta {
@@ -616,7 +642,7 @@ impl From<&PbHummockVersionDelta> for HummockVersionDelta {
             state_table_info_delta: pb_version_delta
                 .state_table_info_delta
                 .iter()
-                .map(|(table_id, delta)| (TableId::new(*table_id), delta.clone()))
+                .map(|(table_id, delta)| (TableId::new(*table_id), *delta))
                 .collect(),
         }
     }
@@ -653,7 +679,7 @@ impl From<&HummockVersionDelta> for PbHummockVersionDelta {
             state_table_info_delta: version_delta
                 .state_table_info_delta
                 .iter()
-                .map(|(table_id, delta)| (table_id.table_id, delta.clone()))
+                .map(|(table_id, delta)| (table_id.table_id, *delta))
                 .collect(),
         }
     }
@@ -690,7 +716,7 @@ impl From<HummockVersionDelta> for PbHummockVersionDelta {
             state_table_info_delta: version_delta
                 .state_table_info_delta
                 .into_iter()
-                .map(|(table_id, delta)| (table_id.table_id, delta.clone()))
+                .map(|(table_id, delta)| (table_id.table_id, delta))
                 .collect(),
         }
     }
@@ -735,7 +761,7 @@ impl From<PbHummockVersionDelta> for HummockVersionDelta {
             state_table_info_delta: pb_version_delta
                 .state_table_info_delta
                 .iter()
-                .map(|(table_id, delta)| (TableId::new(*table_id), delta.clone()))
+                .map(|(table_id, delta)| (TableId::new(*table_id), *delta))
                 .collect(),
         }
     }
@@ -912,7 +938,7 @@ impl From<&GroupDelta> for PbGroupDelta {
                 delta_type: Some(PbDeltaType::GroupConstruct(pb_group_construct.clone())),
             },
             GroupDelta::GroupDestroy(pb_group_destroy) => PbGroupDelta {
-                delta_type: Some(PbDeltaType::GroupDestroy(pb_group_destroy.clone())),
+                delta_type: Some(PbDeltaType::GroupDestroy(*pb_group_destroy)),
             },
             GroupDelta::GroupMetaChange(pb_group_meta_change) => PbGroupDelta {
                 delta_type: Some(PbDeltaType::GroupMetaChange(pb_group_meta_change.clone())),
@@ -934,7 +960,7 @@ impl From<&PbGroupDelta> for GroupDelta {
                 GroupDelta::GroupConstruct(pb_group_construct.clone())
             }
             Some(PbDeltaType::GroupDestroy(pb_group_destroy)) => {
-                GroupDelta::GroupDestroy(pb_group_destroy.clone())
+                GroupDelta::GroupDestroy(*pb_group_destroy)
             }
             Some(PbDeltaType::GroupMetaChange(pb_group_meta_change)) => {
                 GroupDelta::GroupMetaChange(pb_group_meta_change.clone())
diff --git a/src/storage/hummock_test/src/bin/replay/replay_impl.rs b/src/storage/hummock_test/src/bin/replay/replay_impl.rs
index 7d879392d2876..0634ae1f30d10 100644
--- a/src/storage/hummock_test/src/bin/replay/replay_impl.rs
+++ b/src/storage/hummock_test/src/bin/replay/replay_impl.rs
@@ -188,10 +188,6 @@ impl ReplayStateStore for GlobalReplayImpl {
             .map_err(|_| TraceError::ValidateReadEpochFailed)?;
         Ok(())
     }
-
-    async fn clear_shared_buffer(&self, prev_epoch: u64) {
-        self.store.clear_shared_buffer(prev_epoch).await
-    }
 }
 pub(crate) struct LocalReplayImpl(LocalHummockStorage);
 
diff --git a/src/storage/hummock_test/src/compactor_tests.rs b/src/storage/hummock_test/src/compactor_tests.rs
index 38ef095969a1f..79b00d0f9b8f2 100644
--- a/src/storage/hummock_test/src/compactor_tests.rs
+++ b/src/storage/hummock_test/src/compactor_tests.rs
@@ -977,7 +977,7 @@ pub(crate) mod tests {
         compact_task.current_epoch_time = hummock_manager_ref
             .get_current_version()
             .await
-            .max_committed_epoch;
+            .max_committed_epoch();
 
         // assert compact_task
         assert_eq!(
@@ -1179,7 +1179,7 @@ pub(crate) mod tests {
         compact_task.current_epoch_time = hummock_manager_ref
             .get_current_version()
             .await
-            .max_committed_epoch;
+            .max_committed_epoch();
 
         // 3. compact
         let (_tx, rx) = tokio::sync::oneshot::channel();
diff --git a/src/storage/hummock_test/src/hummock_storage_tests.rs b/src/storage/hummock_test/src/hummock_storage_tests.rs
index b3e304305660c..caae996d62e4f 100644
--- a/src/storage/hummock_test/src/hummock_storage_tests.rs
+++ b/src/storage/hummock_test/src/hummock_storage_tests.rs
@@ -2496,7 +2496,7 @@ async fn test_commit_multi_epoch() {
         .manager
         .get_current_version()
         .await
-        .max_committed_epoch;
+        .max_committed_epoch();
 
     let epoch1 = initial_epoch.next_epoch();
     let sst1_epoch1 = SstableInfo {
diff --git a/src/storage/hummock_test/src/state_store_tests.rs b/src/storage/hummock_test/src/state_store_tests.rs
index 8df4170722072..1df58074e22d0 100644
--- a/src/storage/hummock_test/src/state_store_tests.rs
+++ b/src/storage/hummock_test/src/state_store_tests.rs
@@ -1341,7 +1341,9 @@ async fn test_gc_watermark_and_clear_shared_buffer() {
 
     drop(local_hummock_storage);
 
-    hummock_storage.clear_shared_buffer(epoch1).await;
+    hummock_storage
+        .clear_shared_buffer(hummock_storage.get_pinned_version().id())
+        .await;
 
     assert_eq!(
         hummock_storage
diff --git a/src/storage/hummock_trace/src/collector.rs b/src/storage/hummock_trace/src/collector.rs
index c6eb360ad1116..b9e800a8bbea8 100644
--- a/src/storage/hummock_trace/src/collector.rs
+++ b/src/storage/hummock_trace/src/collector.rs
@@ -216,13 +216,6 @@ impl TraceSpan {
         Self::new_global_op(Operation::SealCurrentEpoch { epoch, opts }, storage_type)
     }
 
-    pub fn new_clear_shared_buffer_span(prev_epoch: u64) -> MayTraceSpan {
-        Self::new_global_op(
-            Operation::ClearSharedBuffer(prev_epoch),
-            StorageType::Global,
-        )
-    }
-
     pub fn new_validate_read_epoch_span(epoch: HummockReadEpoch) -> MayTraceSpan {
         Self::new_global_op(
             Operation::ValidateReadEpoch(epoch.into()),
diff --git a/src/storage/hummock_trace/src/record.rs b/src/storage/hummock_trace/src/record.rs
index 4aced4e023d38..fc2b0bb1c5c22 100644
--- a/src/storage/hummock_trace/src/record.rs
+++ b/src/storage/hummock_trace/src/record.rs
@@ -169,9 +169,6 @@ pub enum Operation {
     /// Try wait epoch
     TryWaitEpoch(TracedHummockReadEpoch),
 
-    /// clear shared buffer
-    ClearSharedBuffer(u64),
-
     /// Seal current epoch
     SealCurrentEpoch {
         epoch: u64,
diff --git a/src/storage/hummock_trace/src/replay/mod.rs b/src/storage/hummock_trace/src/replay/mod.rs
index 9e8b586640b5f..9996d6212a01f 100644
--- a/src/storage/hummock_trace/src/replay/mod.rs
+++ b/src/storage/hummock_trace/src/replay/mod.rs
@@ -120,7 +120,6 @@ pub trait ReplayStateStore {
     async fn notify_hummock(&self, info: Info, op: RespOperation, version: u64) -> Result<u64>;
     async fn new_local(&self, opts: TracedNewLocalOptions) -> Box<dyn LocalReplay>;
     async fn try_wait_epoch(&self, epoch: HummockReadEpoch) -> Result<()>;
-    async fn clear_shared_buffer(&self, prev_epoch: u64);
     fn validate_read_epoch(&self, epoch: HummockReadEpoch) -> Result<()>;
 }
 
@@ -152,7 +151,6 @@ mock! {
         ) -> Result<u64>;
         async fn new_local(&self, opts: TracedNewLocalOptions) -> Box<dyn LocalReplay>;
         async fn try_wait_epoch(&self, epoch: HummockReadEpoch) -> Result<()>;
-        async fn clear_shared_buffer(&self, prev_epoch: u64);
         fn validate_read_epoch(&self, epoch: HummockReadEpoch) -> Result<()>;
     }
     impl GlobalReplay for GlobalReplayInterface{}
diff --git a/src/storage/hummock_trace/src/replay/worker.rs b/src/storage/hummock_trace/src/replay/worker.rs
index 622d7cb833190..d566dbbe18410 100644
--- a/src/storage/hummock_trace/src/replay/worker.rs
+++ b/src/storage/hummock_trace/src/replay/worker.rs
@@ -327,10 +327,6 @@ impl ReplayWorker {
                     );
                 }
             }
-            Operation::ClearSharedBuffer(prev_epoch) => {
-                assert_eq!(storage_type, StorageType::Global);
-                replay.clear_shared_buffer(prev_epoch).await;
-            }
             Operation::SealCurrentEpoch { epoch, opts } => {
                 assert_ne!(storage_type, StorageType::Global);
                 let local_storage = local_storages.get_mut(&storage_type).unwrap();
diff --git a/src/storage/src/hummock/event_handler/hummock_event_handler.rs b/src/storage/src/hummock/event_handler/hummock_event_handler.rs
index 5b254556575a0..ee87177923e9b 100644
--- a/src/storage/src/hummock/event_handler/hummock_event_handler.rs
+++ b/src/storage/src/hummock/event_handler/hummock_event_handler.rs
@@ -28,7 +28,7 @@ use prometheus::core::{AtomicU64, GenericGauge};
 use prometheus::{Histogram, IntGauge};
 use risingwave_common::catalog::TableId;
 use risingwave_hummock_sdk::compaction_group::hummock_version_ext::SstDeltaInfo;
-use risingwave_hummock_sdk::{HummockEpoch, SyncResult};
+use risingwave_hummock_sdk::{HummockEpoch, HummockVersionId, SyncResult};
 use thiserror_ext::AsReport;
 use tokio::spawn;
 use tokio::sync::mpsc::error::SendError;
@@ -317,7 +317,7 @@ impl HummockEventHandler {
         let (hummock_event_tx, hummock_event_rx) =
             event_channel(state_store_metrics.event_handler_pending_event.clone());
         let (version_update_notifier_tx, _) =
-            tokio::sync::watch::channel(pinned_version.max_committed_epoch());
+            tokio::sync::watch::channel(pinned_version.visible_table_committed_epoch());
         let version_update_notifier_tx = Arc::new(version_update_notifier_tx);
         let read_version_mapping = Arc::new(RwLock::new(HashMap::default()));
         let buffer_tracker = BufferTracker::from_storage_opts(
@@ -484,10 +484,10 @@ impl HummockEventHandler {
             .start_sync_epoch(new_sync_epoch, sync_result_sender, table_ids);
     }
 
-    async fn handle_clear(&mut self, notifier: oneshot::Sender<()>, prev_epoch: u64) {
+    async fn handle_clear(&mut self, notifier: oneshot::Sender<()>, version_id: HummockVersionId) {
         info!(
-            prev_epoch,
-            max_committed_epoch = self.uploader.max_committed_epoch(),
+            ?version_id,
+            current_version_id = ?self.uploader.hummock_version().id(),
             "handle clear event"
         );
 
@@ -495,7 +495,7 @@ impl HummockEventHandler {
 
         let current_version = self.uploader.hummock_version();
 
-        if current_version.max_committed_epoch() < prev_epoch {
+        if current_version.version().id < version_id {
             let mut latest_version = if let Some(CacheRefillerEvent {
                 pinned_version,
                 new_pinned_version,
@@ -510,9 +510,9 @@ impl HummockEventHandler {
                 );
 
                 info!(
-                    prev_epoch,
-                    current_mce = current_version.max_committed_epoch(),
-                    refiller_mce = new_pinned_version.max_committed_epoch(),
+                    ?version_id,
+                    current_mce = current_version.visible_table_committed_epoch(),
+                    refiller_mce = new_pinned_version.visible_table_committed_epoch(),
                     "refiller is clear in recovery"
                 );
 
@@ -522,18 +522,25 @@ impl HummockEventHandler {
             };
 
             while let latest_version_ref = latest_version.as_ref().unwrap_or(current_version)
-                && latest_version_ref.max_committed_epoch() < prev_epoch
+                && latest_version_ref.version().id < version_id
             {
                 let version_update = self
                     .version_update_rx
                     .recv()
                     .await
                     .expect("should not be empty");
-                latest_version = Some(Self::resolve_version_update_info(
+                let prev_version_id = latest_version_ref.id();
+                let new_version = Self::resolve_version_update_info(
                     latest_version_ref.clone(),
                     version_update,
                     None,
-                ));
+                );
+                info!(
+                    ?prev_version_id,
+                    new_version_id = ?new_version.id(),
+                    "recv new version"
+                );
+                latest_version = Some(new_version);
             }
 
             self.apply_version_update(
@@ -542,14 +549,6 @@ impl HummockEventHandler {
             );
         }
 
-        assert!(self.uploader.max_committed_epoch() >= prev_epoch);
-        if self.uploader.max_committed_epoch() > prev_epoch {
-            warn!(
-                mce = self.uploader.max_committed_epoch(),
-                prev_epoch, "mce higher than clear prev_epoch"
-            );
-        }
-
         assert!(
             self.local_read_version_mapping.is_empty(),
             "read version mapping not empty when clear. remaining tables: {:?}",
@@ -569,7 +568,7 @@ impl HummockEventHandler {
             error!("failed to notify completion of clear event: {:?}", e);
         });
 
-        info!(prev_epoch, "clear finished");
+        info!(?version_id, "clear finished");
     }
 
     fn handle_version_update(&mut self, version_payload: HummockVersionUpdate) {
@@ -644,8 +643,8 @@ impl HummockEventHandler {
             );
         }
 
-        let prev_max_committed_epoch = pinned_version.max_committed_epoch();
-        let max_committed_epoch = new_pinned_version.max_committed_epoch();
+        let prev_max_committed_epoch = pinned_version.visible_table_committed_epoch();
+        let max_committed_epoch = new_pinned_version.visible_table_committed_epoch();
 
         // only notify local_version_manager when MCE change
         self.version_update_notifier_tx.send_if_modified(|state| {
@@ -662,16 +661,17 @@ impl HummockEventHandler {
             conflict_detector.set_watermark(max_committed_epoch);
         }
 
+        // TODO: should we change the logic when supporting partial ckpt?
         if let Some(sstable_object_id_manager) = &self.sstable_object_id_manager {
             sstable_object_id_manager.remove_watermark_object_id(TrackerId::Epoch(
-                self.pinned_version.load().max_committed_epoch(),
+                self.pinned_version.load().visible_table_committed_epoch(),
             ));
         }
 
         debug!(
             "update to hummock version: {}, epoch: {}",
             new_pinned_version.id(),
-            new_pinned_version.max_committed_epoch()
+            new_pinned_version.visible_table_committed_epoch()
         );
 
         self.uploader.update_pinned_version(new_pinned_version);
@@ -692,8 +692,8 @@ impl HummockEventHandler {
                 event = pin!(self.hummock_event_rx.recv()) => {
                     let Some(event) = event else { break };
                     match event {
-                        HummockEvent::Clear(notifier, prev_epoch) => {
-                            self.handle_clear(notifier, prev_epoch).await
+                        HummockEvent::Clear(notifier, version_id) => {
+                            self.handle_clear(notifier, version_id).await
                         },
                         HummockEvent::Shutdown => {
                             info!("event handler shutdown");
@@ -954,7 +954,7 @@ mod tests {
 
     #[tokio::test]
     async fn test_clear_shared_buffer() {
-        let epoch0 = 233;
+        let epoch0 = test_epoch(233);
         let mut next_version_id = 1;
         let mut make_new_version = |max_committed_epoch| {
             let id = next_version_id;
@@ -996,16 +996,16 @@ mod tests {
         let latest_version = event_handler.pinned_version.clone();
         let latest_version_update_tx = event_handler.version_update_notifier_tx.clone();
 
-        let send_clear = |epoch| {
+        let send_clear = |version_id| {
             let (tx, rx) = oneshot::channel();
-            event_tx.send(HummockEvent::Clear(tx, epoch)).unwrap();
+            event_tx.send(HummockEvent::Clear(tx, version_id)).unwrap();
             rx
         };
 
         let _join_handle = spawn(event_handler.start_hummock_event_handler_worker());
 
         // test normal recovery
-        send_clear(epoch0).await.unwrap();
+        send_clear(initial_version.id()).await.unwrap();
 
         // test normal refill finish
         let epoch1 = epoch0 + 1;
@@ -1053,7 +1053,7 @@ mod tests {
             assert_eq!(new_version3.version(), &version3);
             assert_eq!(latest_version.load().version(), &version1);
 
-            let rx = send_clear(epoch3);
+            let rx = send_clear(version3.id);
             rx.await.unwrap();
             assert_eq!(latest_version.load().version(), &version3);
         }
@@ -1068,7 +1068,7 @@ mod tests {
         let epoch5 = epoch4 + 1;
         let version5 = make_new_version(epoch5);
         {
-            let mut rx = send_clear(epoch5);
+            let mut rx = send_clear(version5.id);
             assert_pending(&mut rx).await;
             version_update_tx
                 .send(HummockVersionUpdate::PinnedVersion(Box::new(
diff --git a/src/storage/src/hummock/event_handler/mod.rs b/src/storage/src/hummock/event_handler/mod.rs
index 39e9d3ecc920a..60f2e0c02d07e 100644
--- a/src/storage/src/hummock/event_handler/mod.rs
+++ b/src/storage/src/hummock/event_handler/mod.rs
@@ -18,7 +18,7 @@ use std::sync::Arc;
 use parking_lot::{RwLock, RwLockReadGuard};
 use risingwave_common::bitmap::Bitmap;
 use risingwave_common::catalog::TableId;
-use risingwave_hummock_sdk::HummockEpoch;
+use risingwave_hummock_sdk::{HummockEpoch, HummockVersionId};
 use thiserror_ext::AsReport;
 use tokio::sync::oneshot;
 
@@ -65,7 +65,7 @@ pub enum HummockEvent {
     },
 
     /// Clear shared buffer and reset all states
-    Clear(oneshot::Sender<()>, u64),
+    Clear(oneshot::Sender<()>, HummockVersionId),
 
     Shutdown,
 
@@ -118,7 +118,7 @@ impl HummockEvent {
                 table_ids,
             } => format!("AwaitSyncEpoch epoch {} {:?}", new_sync_epoch, table_ids),
 
-            HummockEvent::Clear(_, prev_epoch) => format!("Clear {:?}", prev_epoch),
+            HummockEvent::Clear(_, version_id) => format!("Clear {}", version_id),
 
             HummockEvent::Shutdown => "Shutdown".to_string(),
 
diff --git a/src/storage/src/hummock/event_handler/uploader/mod.rs b/src/storage/src/hummock/event_handler/uploader/mod.rs
index 8210a998974c4..88351c34b6210 100644
--- a/src/storage/src/hummock/event_handler/uploader/mod.rs
+++ b/src/storage/src/hummock/event_handler/uploader/mod.rs
@@ -27,7 +27,7 @@ use std::task::{ready, Context, Poll};
 
 use futures::FutureExt;
 use itertools::Itertools;
-use more_asserts::{assert_ge, assert_gt};
+use more_asserts::assert_gt;
 use prometheus::core::{AtomicU64, GenericGauge};
 use prometheus::{HistogramTimer, IntGauge};
 use risingwave_common::bitmap::BitmapBuilder;
@@ -1138,6 +1138,7 @@ impl HummockUploader {
         &self.context.buffer_tracker
     }
 
+    #[cfg(test)]
     pub(super) fn max_committed_epoch(&self) -> HummockEpoch {
         self.context.pinned_version.max_committed_epoch()
     }
@@ -1246,10 +1247,6 @@ impl HummockUploader {
     }
 
     pub(crate) fn update_pinned_version(&mut self, pinned_version: PinnedVersion) {
-        assert_ge!(
-            pinned_version.max_committed_epoch(),
-            self.context.pinned_version.max_committed_epoch()
-        );
         if let UploaderState::Working(data) = &mut self.state {
             // TODO: may only `ack_committed` on table whose `committed_epoch` is changed.
             for (table_id, info) in pinned_version.version().state_table_info.info() {
diff --git a/src/storage/src/hummock/event_handler/uploader/test_utils.rs b/src/storage/src/hummock/event_handler/uploader/test_utils.rs
index 07a306a2a8df8..ca3a38db2b941 100644
--- a/src/storage/src/hummock/event_handler/uploader/test_utils.rs
+++ b/src/storage/src/hummock/event_handler/uploader/test_utils.rs
@@ -34,8 +34,8 @@ use risingwave_hummock_sdk::key::{FullKey, TableKey};
 use risingwave_hummock_sdk::key_range::KeyRange;
 use risingwave_hummock_sdk::sstable_info::SstableInfo;
 use risingwave_hummock_sdk::version::HummockVersion;
-use risingwave_hummock_sdk::{HummockEpoch, HummockVersionId, LocalSstableInfo};
-use risingwave_pb::hummock::StateTableInfoDelta;
+use risingwave_hummock_sdk::{HummockEpoch, LocalSstableInfo};
+use risingwave_pb::hummock::{PbHummockVersion, StateTableInfoDelta};
 use spin::Mutex;
 use tokio::spawn;
 use tokio::sync::mpsc::unbounded_channel;
@@ -89,9 +89,11 @@ impl HummockUploader {
 }
 
 pub(super) fn test_hummock_version(epoch: HummockEpoch) -> HummockVersion {
-    let mut version = HummockVersion::default();
-    version.id = HummockVersionId::new(epoch);
-    version.max_committed_epoch = epoch;
+    let mut version = HummockVersion::from_persisted_protobuf(&PbHummockVersion {
+        id: epoch,
+        max_committed_epoch: epoch,
+        ..Default::default()
+    });
     version.state_table_info.apply_delta(
         &HashMap::from_iter([(
             TEST_TABLE_ID,
diff --git a/src/storage/src/hummock/local_version/pinned_version.rs b/src/storage/src/hummock/local_version/pinned_version.rs
index 8fc6475d0ed2b..2a552ffbbf31a 100644
--- a/src/storage/src/hummock/local_version/pinned_version.rs
+++ b/src/storage/src/hummock/local_version/pinned_version.rs
@@ -149,8 +149,13 @@ impl PinnedVersion {
         }
     }
 
+    #[cfg(any(test, feature = "test"))]
     pub fn max_committed_epoch(&self) -> u64 {
-        self.version.max_committed_epoch
+        self.version.max_committed_epoch()
+    }
+
+    pub fn visible_table_committed_epoch(&self) -> u64 {
+        self.version.visible_table_committed_epoch()
     }
 
     /// ret value can't be used as `HummockVersion`. it must be modified with delta
diff --git a/src/storage/src/hummock/store/hummock_storage.rs b/src/storage/src/hummock/store/hummock_storage.rs
index 9b534dee967f6..6753131c402f5 100644
--- a/src/storage/src/hummock/store/hummock_storage.rs
+++ b/src/storage/src/hummock/store/hummock_storage.rs
@@ -132,8 +132,17 @@ pub fn get_committed_read_version_tuple(
     epoch: HummockEpoch,
 ) -> (TableKeyRange, ReadVersionTuple) {
     if let Some(table_watermarks) = version.version().table_watermarks.get(&table_id) {
-        TableWatermarksIndex::new_committed(table_watermarks.clone(), version.max_committed_epoch())
-            .rewrite_range_with_table_watermark(epoch, &mut key_range)
+        TableWatermarksIndex::new_committed(
+            table_watermarks.clone(),
+            version
+                .version()
+                .state_table_info
+                .info()
+                .get(&table_id)
+                .expect("should exist when having table watermark")
+                .committed_epoch,
+        )
+        .rewrite_range_with_table_watermark(epoch, &mut key_range)
     }
     (key_range, (vec![], vec![], version))
 }
@@ -202,8 +211,12 @@ impl HummockStorage {
             await_tree_reg.clone(),
         );
 
-        let seal_epoch = Arc::new(AtomicU64::new(pinned_version.max_committed_epoch()));
-        let min_current_epoch = Arc::new(AtomicU64::new(pinned_version.max_committed_epoch()));
+        let seal_epoch = Arc::new(AtomicU64::new(
+            pinned_version.visible_table_committed_epoch(),
+        ));
+        let min_current_epoch = Arc::new(AtomicU64::new(
+            pinned_version.visible_table_committed_epoch(),
+        ));
         let hummock_event_handler = HummockEventHandler::new(
             version_update_rx,
             pinned_version,
@@ -388,9 +401,17 @@ impl HummockStorage {
     ) -> StorageResult<(TableKeyRange, ReadVersionTuple)> {
         let pinned_version = self.pinned_version.load();
         validate_safe_epoch(pinned_version.version(), table_id, epoch)?;
+        let table_committed_epoch = pinned_version
+            .version()
+            .state_table_info
+            .info()
+            .get(&table_id)
+            .map(|info| info.committed_epoch);
 
         // check epoch if lower mce
-        let ret = if epoch <= pinned_version.max_committed_epoch() {
+        let ret = if let Some(table_committed_epoch) = table_committed_epoch
+            && epoch <= table_committed_epoch
+        {
             // read committed_version directly without build snapshot
             get_committed_read_version_tuple((**pinned_version).clone(), table_id, key_range, epoch)
         } else {
@@ -427,20 +448,20 @@ impl HummockStorage {
             if read_version_vec.is_empty() {
                 if matched_replicated_read_version_cnt > 0 {
                     tracing::warn!(
-                        "Read(table_id={} vnode={} epoch={}) is not allowed on replicated read version ({} found). Fall back to committed version (epoch={})",
+                        "Read(table_id={} vnode={} epoch={}) is not allowed on replicated read version ({} found). Fall back to committed version (epoch={:?})",
                         table_id,
                         vnode.to_index(),
                         epoch,
                         matched_replicated_read_version_cnt,
-                        pinned_version.max_committed_epoch()
+                        table_committed_epoch,
                     );
                 } else {
                     tracing::debug!(
-                        "No read version found for read(table_id={} vnode={} epoch={}). Fall back to committed version (epoch={})",
+                        "No read version found for read(table_id={} vnode={} epoch={}). Fall back to committed version (epoch={:?})",
                         table_id,
                         vnode.to_index(),
                         epoch,
-                        pinned_version.max_committed_epoch()
+                        table_committed_epoch
                     );
                 }
                 get_committed_read_version_tuple(
@@ -498,6 +519,19 @@ impl HummockStorage {
         )
     }
 
+    pub async fn clear_shared_buffer(&self, version_id: HummockVersionId) {
+        let (tx, rx) = oneshot::channel();
+        self.hummock_event_sender
+            .send(HummockEvent::Clear(tx, version_id))
+            .expect("should send success");
+        rx.await.expect("should wait success");
+
+        let epoch = self.pinned_version.load().visible_table_committed_epoch();
+        self.min_current_epoch
+            .store(HummockEpoch::MAX, MemOrdering::SeqCst);
+        self.seal_epoch.store(epoch, MemOrdering::SeqCst);
+    }
+
     /// Declare the start of an epoch. This information is provided for spill so that the spill task won't
     /// include data of two or more syncs.
     // TODO: remove this method when we support spill task that can include data of more two or more syncs
@@ -647,19 +681,6 @@ impl StateStore for HummockStorage {
         StoreLocalStatistic::flush_all();
     }
 
-    async fn clear_shared_buffer(&self, prev_epoch: u64) {
-        let (tx, rx) = oneshot::channel();
-        self.hummock_event_sender
-            .send(HummockEvent::Clear(tx, prev_epoch))
-            .expect("should send success");
-        rx.await.expect("should wait success");
-
-        let epoch = self.pinned_version.load().max_committed_epoch();
-        self.min_current_epoch
-            .store(HummockEpoch::MAX, MemOrdering::SeqCst);
-        self.seal_epoch.store(epoch, MemOrdering::SeqCst);
-    }
-
     fn new_local(&self, option: NewLocalOptions) -> impl Future<Output = Self::Local> + Send + '_ {
         self.new_local_inner(option)
     }
diff --git a/src/storage/src/memory.rs b/src/storage/src/memory.rs
index 7a71a2309c3ef..e8da757523ed5 100644
--- a/src/storage/src/memory.rs
+++ b/src/storage/src/memory.rs
@@ -752,14 +752,6 @@ impl<R: RangeKv> StateStore for RangeKvStateStore<R> {
 
     fn seal_epoch(&self, _epoch: u64, _is_checkpoint: bool) {}
 
-    #[allow(clippy::unused_async)]
-    async fn clear_shared_buffer(&self, prev_epoch: u64) {
-        for (key, _) in self.inner.range((Unbounded, Unbounded), None).unwrap() {
-            assert!(key.epoch_with_gap.pure_epoch() <= prev_epoch);
-        }
-    }
-
-    #[allow(clippy::unused_async)]
     async fn new_local(&self, option: NewLocalOptions) -> Self::Local {
         MemtableLocalStateStore::new(self.clone(), option)
     }
diff --git a/src/storage/src/monitor/monitored_store.rs b/src/storage/src/monitor/monitored_store.rs
index 8c00435541d2c..30350d9b9c648 100644
--- a/src/storage/src/monitor/monitored_store.rs
+++ b/src/storage/src/monitor/monitored_store.rs
@@ -335,12 +335,6 @@ impl<S: StateStore> StateStore for MonitoredStateStore<S> {
         panic!("the state store is already monitored")
     }
 
-    fn clear_shared_buffer(&self, prev_epoch: u64) -> impl Future<Output = ()> + Send + '_ {
-        self.inner
-            .clear_shared_buffer(prev_epoch)
-            .verbose_instrument_await("store_clear_shared_buffer")
-    }
-
     async fn new_local(&self, option: NewLocalOptions) -> Self::Local {
         MonitoredStateStore::new_from_local(
             self.inner
diff --git a/src/storage/src/monitor/traced_store.rs b/src/storage/src/monitor/traced_store.rs
index b31c8fd0d73e8..246313f71a498 100644
--- a/src/storage/src/monitor/traced_store.rs
+++ b/src/storage/src/monitor/traced_store.rs
@@ -270,11 +270,6 @@ impl<S: StateStore> StateStore for TracedStateStore<S> {
         self.inner.seal_epoch(epoch, is_checkpoint);
     }
 
-    async fn clear_shared_buffer(&self, prev_epoch: u64) {
-        let _span = TraceSpan::new_clear_shared_buffer_span(prev_epoch);
-        self.inner.clear_shared_buffer(prev_epoch).await;
-    }
-
     async fn new_local(&self, options: NewLocalOptions) -> Self::Local {
         TracedStateStore::new_local(self.inner.new_local(options.clone()).await, options)
     }
diff --git a/src/storage/src/panic_store.rs b/src/storage/src/panic_store.rs
index a9e10c6553c54..42737d914f536 100644
--- a/src/storage/src/panic_store.rs
+++ b/src/storage/src/panic_store.rs
@@ -186,11 +186,6 @@ impl StateStore for PanicStateStore {
         panic!("should not update current epoch from the panic state store!");
     }
 
-    #[allow(clippy::unused_async)]
-    async fn clear_shared_buffer(&self, _prev_epoch: u64) {
-        panic!("should not clear shared buffer from the panic state store!");
-    }
-
     #[allow(clippy::unused_async)]
     async fn new_local(&self, _option: NewLocalOptions) -> Self::Local {
         panic!("should not call new local from the panic state store");
diff --git a/src/storage/src/store.rs b/src/storage/src/store.rs
index 4b837ce6d098e..f382bf5fc2d5d 100644
--- a/src/storage/src/store.rs
+++ b/src/storage/src/store.rs
@@ -364,10 +364,6 @@ pub trait StateStore: StateStoreRead + StaticSendSync + Clone {
         MonitoredStateStore::new(self, storage_metrics)
     }
 
-    /// Clears contents in shared buffer.
-    /// This method should only be called when dropping all actors in the local compute node.
-    fn clear_shared_buffer(&self, prev_epoch: u64) -> impl Future<Output = ()> + Send + '_;
-
     fn new_local(&self, option: NewLocalOptions) -> impl Future<Output = Self::Local> + Send + '_;
 
     /// Validates whether store can serve `epoch` at the moment.
diff --git a/src/storage/src/store_impl.rs b/src/storage/src/store_impl.rs
index 3ee9e849dda4c..2115db1aa56d1 100644
--- a/src/storage/src/store_impl.rs
+++ b/src/storage/src/store_impl.rs
@@ -592,10 +592,6 @@ pub mod verify {
             self.actual.seal_epoch(epoch, is_checkpoint)
         }
 
-        fn clear_shared_buffer(&self, prev_epoch: u64) -> impl Future<Output = ()> + Send + '_ {
-            self.actual.clear_shared_buffer(prev_epoch)
-        }
-
         async fn new_local(&self, option: NewLocalOptions) -> Self::Local {
             let expected = if let Some(expected) = &self.expected {
                 Some(expected.new_local(option.clone()).await)
@@ -1174,8 +1170,6 @@ pub mod boxed_state_store {
 
         fn seal_epoch(&self, epoch: u64, is_checkpoint: bool);
 
-        async fn clear_shared_buffer(&self, prev_epoch: u64);
-
         async fn new_local(&self, option: NewLocalOptions) -> BoxDynamicDispatchedLocalStateStore;
 
         fn validate_read_epoch(&self, epoch: HummockReadEpoch) -> StorageResult<()>;
@@ -1199,10 +1193,6 @@ pub mod boxed_state_store {
             self.seal_epoch(epoch, is_checkpoint);
         }
 
-        async fn clear_shared_buffer(&self, prev_epoch: u64) {
-            self.clear_shared_buffer(prev_epoch).await
-        }
-
         async fn new_local(&self, option: NewLocalOptions) -> BoxDynamicDispatchedLocalStateStore {
             Box::new(self.new_local(option).await)
         }
@@ -1293,10 +1283,6 @@ pub mod boxed_state_store {
             self.deref().sync(epoch, table_ids)
         }
 
-        fn clear_shared_buffer(&self, prev_epoch: u64) -> impl Future<Output = ()> + Send + '_ {
-            self.deref().clear_shared_buffer(prev_epoch)
-        }
-
         fn seal_epoch(&self, epoch: u64, is_checkpoint: bool) {
             self.deref().seal_epoch(epoch, is_checkpoint)
         }
diff --git a/src/stream/Cargo.toml b/src/stream/Cargo.toml
index 3c85092a4d677..de25cf8439be1 100644
--- a/src/stream/Cargo.toml
+++ b/src/stream/Cargo.toml
@@ -79,7 +79,7 @@ tokio = { version = "0.2", package = "madsim-tokio", features = [
 ] }
 tokio-metrics = "0.3.0"
 tokio-retry = "0.3"
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tonic = { workspace = true }
 tracing = "0.1"
 
diff --git a/src/stream/src/common/log_store_impl/kv_log_store/mod.rs b/src/stream/src/common/log_store_impl/kv_log_store/mod.rs
index b50b90b0ebef1..63ee6762cfb30 100644
--- a/src/stream/src/common/log_store_impl/kv_log_store/mod.rs
+++ b/src/stream/src/common/log_store_impl/kv_log_store/mod.rs
@@ -507,7 +507,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -614,7 +614,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -691,7 +691,10 @@ mod tests {
         drop(writer);
 
         // Recovery
-        test_env.storage.clear_shared_buffer(epoch2).await;
+        test_env
+            .storage
+            .clear_shared_buffer(test_env.manager.get_current_version().await.id)
+            .await;
 
         // Rebuild log reader and writer in recovery
         let factory = KvLogStoreFactory::new(
@@ -803,7 +806,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -904,7 +907,10 @@ mod tests {
         drop(writer);
 
         // Recovery
-        test_env.storage.clear_shared_buffer(epoch2).await;
+        test_env
+            .storage
+            .clear_shared_buffer(test_env.manager.get_current_version().await.id)
+            .await;
 
         // Rebuild log reader and writer in recovery
         let factory = KvLogStoreFactory::new(
@@ -1029,7 +1035,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -1139,7 +1145,10 @@ mod tests {
         drop(writer2);
 
         // Recovery
-        test_env.storage.clear_shared_buffer(epoch2).await;
+        test_env
+            .storage
+            .clear_shared_buffer(test_env.manager.get_current_version().await.id)
+            .await;
 
         let vnodes = build_bitmap(0..VirtualNode::COUNT);
         let factory = KvLogStoreFactory::new(
@@ -1222,7 +1231,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -1362,7 +1371,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -1692,7 +1701,7 @@ mod tests {
             .storage
             .get_pinned_version()
             .version()
-            .max_committed_epoch
+            .max_committed_epoch()
             .next_epoch();
         test_env
             .storage
@@ -1751,7 +1760,10 @@ mod tests {
         drop(writer);
 
         // Recovery
-        test_env.storage.clear_shared_buffer(epoch2).await;
+        test_env
+            .storage
+            .clear_shared_buffer(test_env.manager.get_current_version().await.id)
+            .await;
 
         // Rebuild log reader and writer in recovery
         let factory = KvLogStoreFactory::new(
@@ -1815,7 +1827,10 @@ mod tests {
         drop(writer);
 
         // Recovery
-        test_env.storage.clear_shared_buffer(epoch3).await;
+        test_env
+            .storage
+            .clear_shared_buffer(test_env.manager.get_current_version().await.id)
+            .await;
 
         // Rebuild log reader and writer in recovery
         let factory = KvLogStoreFactory::new(
diff --git a/src/stream/src/executor/approx_percentile/global.rs b/src/stream/src/executor/approx_percentile/global.rs
index 534857237bffb..9434ccf05d5a8 100644
--- a/src/stream/src/executor/approx_percentile/global.rs
+++ b/src/stream/src/executor/approx_percentile/global.rs
@@ -84,13 +84,10 @@ impl<S: StateStore> GlobalApproxPercentileExecutor<S> {
         )
         .await?;
 
-        let mut received_input = false;
-
         #[for_await]
         for message in input_stream {
             match message? {
                 Message::Chunk(chunk) => {
-                    received_input = true;
                     for (_, row) in chunk.rows() {
                         // Decoding
                         let sign_datum = row.datum_at(0);
@@ -125,15 +122,6 @@ impl<S: StateStore> GlobalApproxPercentileExecutor<S> {
                     }
                 }
                 Message::Barrier(barrier) => {
-                    // If we haven't received any input, we don't need to update the state.
-                    // This is unless row count state is empty, then we need to persist the state,
-                    // and yield a NULL downstream.
-                    if !received_input && row_count_state.is_some() {
-                        count_state_table.commit(barrier.epoch).await?;
-                        bucket_state_table.commit(barrier.epoch).await?;
-                        yield Message::Barrier(barrier);
-                        continue;
-                    }
                     // We maintain an invariant, iff row_count_state is none,
                     // we haven't pushed any data to downstream.
                     // Naturally, if row_count_state is some,
diff --git a/src/stream/src/executor/backfill/cdc/cdc_backfill.rs b/src/stream/src/executor/backfill/cdc/cdc_backfill.rs
index 943059355f054..bfffa066fc265 100644
--- a/src/stream/src/executor/backfill/cdc/cdc_backfill.rs
+++ b/src/stream/src/executor/backfill/cdc/cdc_backfill.rs
@@ -159,6 +159,7 @@ impl<S: StateStore> CdcBackfillExecutor<S> {
         let first_barrier = expect_first_barrier(&mut upstream).await?;
 
         let mut is_snapshot_paused = first_barrier.is_pause_on_startup();
+        let mut rate_limit_to_zero = self.rate_limit_rps.is_some_and(|val| val == 0);
 
         // Check whether this parallelism has been assigned splits,
         // if not, we should bypass the backfill directly.
@@ -343,7 +344,23 @@ impl<S: StateStore> CdcBackfillExecutor<S> {
                                                     && *new_rate_limit != self.rate_limit_rps
                                                 {
                                                     self.rate_limit_rps = *new_rate_limit;
-                                                    // rebuild the new reader stream with new rate limit
+                                                    rate_limit_to_zero = self
+                                                        .rate_limit_rps
+                                                        .is_some_and(|val| val == 0);
+
+                                                    // update and persist current backfill progress without draining the buffered upstream chunks
+                                                    state_impl
+                                                        .mutate_state(
+                                                            current_pk_pos.clone(),
+                                                            last_binlog_offset.clone(),
+                                                            total_snapshot_row_count,
+                                                            false,
+                                                        )
+                                                        .await?;
+                                                    state_impl.commit_state(barrier.epoch).await?;
+                                                    yield Message::Barrier(barrier);
+
+                                                    // rebuild the snapshot stream with new rate limit
                                                     continue 'backfill_loop;
                                                 }
                                             }
@@ -497,7 +514,9 @@ impl<S: StateStore> CdcBackfillExecutor<S> {
                 // It maybe a cancellation bug of the mysql driver.
                 let (_, mut snapshot_stream) = backfill_stream.into_inner();
 
+                // skip consume the snapshot stream if it is paused or rate limit to 0
                 if !is_snapshot_paused
+                    && !rate_limit_to_zero
                     && let Some(msg) = snapshot_stream
                         .next()
                         .instrument_await("consume_snapshot_stream_once")
diff --git a/src/stream/src/task/barrier_manager.rs b/src/stream/src/task/barrier_manager.rs
index 654980db1c17e..d2f9c24e16a29 100644
--- a/src/stream/src/task/barrier_manager.rs
+++ b/src/stream/src/task/barrier_manager.rs
@@ -53,7 +53,7 @@ use risingwave_common::catalog::TableId;
 use risingwave_common::util::epoch::EpochPair;
 use risingwave_common::util::runtime::BackgroundShutdownRuntime;
 use risingwave_hummock_sdk::table_stats::to_prost_table_stats_map;
-use risingwave_hummock_sdk::{LocalSstableInfo, SyncResult};
+use risingwave_hummock_sdk::{HummockVersionId, LocalSstableInfo, SyncResult};
 use risingwave_pb::common::ActorInfo;
 use risingwave_pb::stream_plan::barrier::BarrierKind;
 use risingwave_pb::stream_service::streaming_control_stream_request::{InitRequest, Request};
@@ -481,7 +481,7 @@ impl LocalBarrierWorker {
                         match actor_op {
                             LocalActorOperation::NewControlStream { handle, init_request  } => {
                                 self.control_stream_handle.reset_stream_with_err(Status::internal("control stream has been reset to a new one"));
-                                self.reset(init_request.prev_epoch).await;
+                                self.reset(HummockVersionId::new(init_request.version_id)).await;
                                 self.control_stream_handle = handle;
                                 self.control_stream_handle.send_response(StreamingControlStreamResponse {
                                     response: Some(streaming_control_stream_response::Response::Init(InitResponse {}))
@@ -1178,7 +1178,7 @@ pub(crate) mod barrier_test_utils {
                     response_tx,
                     UnboundedReceiverStream::new(request_rx).boxed(),
                 ),
-                init_request: InitRequest { prev_epoch: 0 },
+                init_request: InitRequest { version_id: 0 },
             });
 
             assert_matches!(
diff --git a/src/stream/src/task/barrier_manager/managed_state.rs b/src/stream/src/task/barrier_manager/managed_state.rs
index 3651dcc44d5e9..fe58c4b3d911b 100644
--- a/src/stream/src/task/barrier_manager/managed_state.rs
+++ b/src/stream/src/task/barrier_manager/managed_state.rs
@@ -598,7 +598,7 @@ impl PartialGraphManagedBarrierState {
                         "ignore sealing data for the first barrier"
                     );
                     if let Some(hummock) = self.state_store.as_hummock() {
-                        let mce = hummock.get_pinned_version().max_committed_epoch();
+                        let mce = hummock.get_pinned_version().visible_table_committed_epoch();
                         assert_eq!(
                             mce, prev_epoch,
                             "first epoch should match with the current version",
diff --git a/src/stream/src/task/stream_manager.rs b/src/stream/src/task/stream_manager.rs
index a8c1c625a5c37..5a5b2d48d57c4 100644
--- a/src/stream/src/task/stream_manager.rs
+++ b/src/stream/src/task/stream_manager.rs
@@ -22,6 +22,7 @@ use std::time::Instant;
 
 use anyhow::anyhow;
 use async_recursion::async_recursion;
+use await_tree::InstrumentAwait;
 use futures::stream::BoxStream;
 use futures::FutureExt;
 use itertools::Itertools;
@@ -29,6 +30,7 @@ use risingwave_common::bail;
 use risingwave_common::bitmap::Bitmap;
 use risingwave_common::catalog::{Field, Schema, TableId};
 use risingwave_common::config::MetricLevel;
+use risingwave_hummock_sdk::HummockVersionId;
 use risingwave_pb::common::ActorInfo;
 use risingwave_pb::stream_plan;
 use risingwave_pb::stream_plan::stream_node::NodeBody;
@@ -296,7 +298,7 @@ impl LocalBarrierWorker {
     }
 
     /// Force stop all actors on this worker, and then drop their resources.
-    pub(super) async fn reset(&mut self, prev_epoch: u64) {
+    pub(super) async fn reset(&mut self, version_id: HummockVersionId) {
         let actor_handles = self.actor_manager_state.drain_actor_handles();
         for (actor_id, handle) in &actor_handles {
             tracing::debug!("force stopping actor {}", actor_id);
@@ -320,9 +322,13 @@ impl LocalBarrierWorker {
         if let Some(m) = self.actor_manager.await_tree_reg.as_ref() {
             m.clear();
         }
-        dispatch_state_store!(&self.actor_manager.env.state_store(), store, {
-            store.clear_shared_buffer(prev_epoch).await;
-        });
+
+        if let Some(hummock) = self.actor_manager.env.state_store().as_hummock() {
+            hummock
+                .clear_shared_buffer(version_id)
+                .verbose_instrument_await("store_clear_shared_buffer")
+                .await
+        }
         self.reset_state();
         self.actor_manager.env.dml_manager_ref().clear();
     }
diff --git a/src/tests/compaction_test/src/compaction_test_runner.rs b/src/tests/compaction_test/src/compaction_test_runner.rs
index 6e1ca43be811f..0aa7d1d83c8d5 100644
--- a/src/tests/compaction_test/src/compaction_test_runner.rs
+++ b/src/tests/compaction_test/src/compaction_test_runner.rs
@@ -375,7 +375,7 @@ async fn start_replay(
     for delta in version_delta_logs {
         let (current_version, compaction_groups) = meta_client.replay_version_delta(delta).await?;
         let (version_id, max_committed_epoch) =
-            (current_version.id, current_version.max_committed_epoch);
+            (current_version.id, current_version.max_committed_epoch());
         tracing::info!(
             "Replayed version delta version_id: {}, max_committed_epoch: {}, compaction_groups: {:?}",
             version_id,
@@ -464,7 +464,7 @@ async fn start_replay(
             );
 
             let (new_version_id, new_committed_epoch) =
-                (new_version.id, new_version.max_committed_epoch);
+                (new_version.id, new_version.max_committed_epoch());
             assert!(
                 new_version_id >= version_id,
                 "new_version_id: {}, epoch: {}",
diff --git a/src/tests/simulation/Cargo.toml b/src/tests/simulation/Cargo.toml
index 6d881f203f670..143d0f0c01c75 100644
--- a/src/tests/simulation/Cargo.toml
+++ b/src/tests/simulation/Cargo.toml
@@ -55,7 +55,7 @@ tempfile = "3"
 tikv-jemallocator = { workspace = true }
 tokio = { version = "0.2", package = "madsim-tokio" }
 tokio-postgres = "0.7"
-tokio-stream = "0.1"
+tokio-stream = { workspace = true }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 
diff --git a/src/tests/simulation/src/slt.rs b/src/tests/simulation/src/slt.rs
index 943d9bffcf4ca..799602a00aa3f 100644
--- a/src/tests/simulation/src/slt.rs
+++ b/src/tests/simulation/src/slt.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+use std::cmp::min;
 use std::path::Path;
 use std::sync::Arc;
 use std::time::Duration;
@@ -27,7 +28,7 @@ use crate::cluster::{Cluster, KillOpts};
 use crate::utils::TimedExt;
 
 // retry a maximum times until it succeed
-const MAX_RETRY: usize = 5;
+const MAX_RETRY: usize = 10;
 
 fn is_create_table_as(sql: &str) -> bool {
     let parts: Vec<String> = sql.split_whitespace().map(|s| s.to_lowercase()).collect();
@@ -305,10 +306,17 @@ pub async fn run_slt_task(
                         let err_string = err.to_string();
                         // cluster could be still under recovering if killed before, retry if
                         // meets `no reader for dml in table with id {}`.
-                        let should_retry = (err_string.contains("no reader for dml in table")
-                            || err_string
-                                .contains("error reading a body from connection: broken pipe"))
-                            || err_string.contains("failed to inject barrier") && i < MAX_RETRY;
+                        let allowed_errs = [
+                            "no reader for dml in table",
+                            "error reading a body from connection: broken pipe",
+                            "failed to inject barrier",
+                            "get error from control stream",
+                            "cluster is under recovering",
+                        ];
+                        let should_retry = i < MAX_RETRY
+                            && allowed_errs
+                                .iter()
+                                .any(|allowed_err| err_string.contains(allowed_err));
                         if !should_retry {
                             panic!("{}", err);
                         }
@@ -338,7 +346,7 @@ pub async fn run_slt_task(
 
             for i in 0usize.. {
                 tracing::debug!(iteration = i, "retry count");
-                let delay = Duration::from_secs(1 << i);
+                let delay = Duration::from_secs(min(1 << i, 10));
                 if i > 0 {
                     tokio::time::sleep(delay).await;
                 }
diff --git a/src/tests/sqlsmith/src/sql_gen/agg.rs b/src/tests/sqlsmith/src/sql_gen/agg.rs
index 4953235d4cba4..177603ddb333a 100644
--- a/src/tests/sqlsmith/src/sql_gen/agg.rs
+++ b/src/tests/sqlsmith/src/sql_gen/agg.rs
@@ -18,7 +18,7 @@ use risingwave_common::types::DataType;
 use risingwave_expr::aggregate::PbAggKind;
 use risingwave_expr::sig::SigDataType;
 use risingwave_sqlparser::ast::{
-    Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, OrderByExpr,
+    Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgList, Ident, ObjectName, OrderByExpr,
 };
 
 use crate::sql_gen::types::AGG_FUNC_TABLE;
@@ -142,11 +142,8 @@ fn make_agg_func(
     Function {
         scalar_as_agg: false,
         name: ObjectName(vec![Ident::new_unchecked(func_name)]),
-        args,
-        variadic: false,
+        arg_list: FunctionArgList::for_agg(distinct, args, order_by),
         over: None,
-        distinct,
-        order_by,
         filter,
         within_group: None,
     }
diff --git a/src/tests/sqlsmith/src/sql_gen/functions.rs b/src/tests/sqlsmith/src/sql_gen/functions.rs
index cee18a18081ca..8cd1645ec1f5b 100644
--- a/src/tests/sqlsmith/src/sql_gen/functions.rs
+++ b/src/tests/sqlsmith/src/sql_gen/functions.rs
@@ -18,8 +18,8 @@ use rand::Rng;
 use risingwave_common::types::DataType;
 use risingwave_frontend::expr::ExprType;
 use risingwave_sqlparser::ast::{
-    BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName,
-    TrimWhereField, UnaryOperator, Value,
+    BinaryOperator, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgList, Ident,
+    ObjectName, TrimWhereField, UnaryOperator, Value,
 };
 
 use crate::sql_gen::types::{FUNC_TABLE, IMPLICIT_CAST_TABLE, INVARIANT_FUNC_SET};
@@ -258,11 +258,8 @@ pub fn make_simple_func(func_name: &str, exprs: &[Expr]) -> Function {
     Function {
         scalar_as_agg: false,
         name: ObjectName(vec![Ident::new_unchecked(func_name)]),
-        args,
-        variadic: false,
+        arg_list: FunctionArgList::args_only(args),
         over: None,
-        distinct: false,
-        order_by: vec![],
         filter: None,
         within_group: None,
     }
diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs
index 2f12f96790f3d..1da66ca710e0c 100644
--- a/src/tests/sqlsmith/src/sql_gen/types.rs
+++ b/src/tests/sqlsmith/src/sql_gen/types.rs
@@ -53,6 +53,7 @@ pub(super) fn data_type_to_ast_data_type(data_type: &DataType) -> AstDataType {
                 .collect(),
         ),
         DataType::List(ref typ) => AstDataType::Array(Box::new(data_type_to_ast_data_type(typ))),
+        DataType::Map(_) => todo!(),
     }
 }
 
diff --git a/src/tests/state_cleaning_test/Cargo.toml b/src/tests/state_cleaning_test/Cargo.toml
index a105360a68f6b..6c12898343951 100644
--- a/src/tests/state_cleaning_test/Cargo.toml
+++ b/src/tests/state_cleaning_test/Cargo.toml
@@ -24,7 +24,7 @@ serde = { version = "1", features = ["derive"] }
 serde_with = "3"
 tokio = { version = "0.2", package = "madsim-tokio" }
 tokio-postgres = "0.7"
-tokio-stream = { version = "0.1", features = ["fs"] }
+tokio-stream = { workspace = true }
 toml = "0.8"
 tracing = "0.1"
 
diff --git a/src/utils/runtime/Cargo.toml b/src/utils/runtime/Cargo.toml
index 0559caee265d9..ff2902e7a4b4c 100644
--- a/src/utils/runtime/Cargo.toml
+++ b/src/utils/runtime/Cargo.toml
@@ -17,7 +17,7 @@ normal = ["workspace-hack"]
 [dependencies]
 await-tree = { workspace = true }
 console = "0.15"
-console-subscriber = "0.3.0"
+console-subscriber = "0.4"
 either = "1"
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 hostname = "0.4"
@@ -26,6 +26,8 @@ pprof = { version = "0.13", features = ["flamegraph"] }
 risingwave_common = { workspace = true }
 risingwave_variables = { workspace = true }
 rlimit = "0.10"
+# Explicitly specify the tokio version used in RisingWave runtime
+rw-tokio = { version = "=1.38.0", package = "tokio" }
 thiserror-ext = { workspace = true }
 time = { version = "0.3", features = ["formatting", "local-offset"] }
 tokio = { version = "0.2", package = "madsim-tokio", features = [